cc: @cseed @dking (new users can only cc 2 others…)
Currently in pipeline:
p = self.pipeline()
t = p.new_task()
t.command(f'echo "0" >> {t.ofile}')
Here, t.ofile
doesn’t exist before use, implicitly created during access:
def _get_resource(self, item):
if item not in self._resources:
r = self._pipeline._new_task_resource_file(self)
self._resources[item] = r
self._resources_inverse[r] = item
return self._resources[item]
def __getitem__(self, item):
return self._get_resource(item)
def __getattr__(self, item):
return self._get_resource(item)
This breaks the semantic promise of a getter, and provides 2 different ways of adding attributes to a Task
, one explicit in declare_resource_group
, and one implicit. It requires us to maintain 2 very similar methods, with subtle differences:
def declare_resource_group(self, **mappings):
for name, d in mappings.items():
assert name not in self._resources
if not isinstance(d, dict):
raise ValueError(f"value for name '{name}' is not a dict. Found '{type(d)}' instead.")
rg = self._pipeline._new_resource_group(self, d)
self._resources[name] = rg
self._mentioned.add(rg)
return self
def _new_resource_group(self, source, mappings):
assert isinstance(mappings, dict)
root = self._tmp_file()
d = {}
new_resource_map = {}
for name, code in mappings.items():
if not isinstance(code, str):
raise ValueError(f"value for name '{name}' is not a string. Found '{type(code)}' instead.")
r = self._new_task_resource_file(source=source, value=eval(f'f"""{code}"""')) # pylint: disable=W0123
d[name] = r
new_resource_map[r._uid] = r
self._resource_map.update(new_resource_map)
rg = ResourceGroup(source, root, **d)
self._resource_map.update({rg._uid: rg})
return rg
These differences appear when we run a command
:
def handler(match_obj):
groups = match_obj.groupdict()
if groups['TASK']:
raise ValueError(f"found a reference to a Task object in command '{command}'.")
elif groups['PIPELINE']:
raise ValueError(f"found a reference to a Pipeline object in command '{command}'.")
else:
assert groups['RESOURCE_FILE'] or groups['RESOURCE_GROUP']
r_uid = match_obj.group()
r = self._pipeline._resource_map.get(r_uid)
if r is None:
raise KeyError(f"undefined resource '{r_uid}' in command '{command}'.\n"
f"Hint: resources must be from the same pipeline as the current task.")
if r._source != self:
#... Here we check if in r._source._mentioned
else:
# But allow implicit creation when r._source == self
self._mentioned.add(r)
In the r._source != self
case we don’t allow implicit creation, but do in the r._source == self
case. This kind of polymorphism is clever, and I suspect error prone.
Ref: https://github.com/hail-is/hail/pull/5455#discussion_r260869446