diff --git a/udapi/block/corefud/delete.py b/udapi/block/corefud/delete.py index 4e68e8dd..5aaf94e7 100644 --- a/udapi/block/corefud/delete.py +++ b/udapi/block/corefud/delete.py @@ -25,19 +25,15 @@ def is_root_reachable_by_deps(self, node, parents_to_ignore=None): proc_node, path = stack.pop() # root is reachable if proc_node == node.root: - break + return True # path forms a cycle, the root cannot be reached through this branch - if proc_node in path: - continue - for dep in proc_node.deps: - # the root cannot be reached through ignored nodes - if dep['parent'] in parents_to_ignore: - continue - # process the parent recursively - stack.append((dep['parent'], path + [proc_node])) - else: - return False - return True + if proc_node not in path: + for dep in proc_node.deps: + # the root cannot be reached through ignored nodes + if dep['parent'] not in parents_to_ignore: + # process the parent recursively + stack.append((dep['parent'], path + [proc_node])) + return False def _deps_ignore_nodes(self, node, parents_to_ignore): """ Retrieve deps from the node, recursively ignoring specified parents. @@ -46,18 +42,16 @@ def _deps_ignore_nodes(self, node, parents_to_ignore): stack = [(node, [])] while stack: proc_node, skipped_nodes = stack.pop() - # if there is a cycle of skipped nodes, ground the subtree to the root - if proc_node in skipped_nodes: - newdeps.append({'parent': node.root, 'deprel': 'root'}) - continue - for dep in proc_node.deps: - # keep deps with a parent that shouldn't be ignored - if not dep['parent'] in parents_to_ignore: - newdeps.append(dep) - continue - # process the ignored parent recursively - stack.append((dep['parent'], skipped_nodes + [proc_node])) - return newdeps + if proc_node not in skipped_nodes: + for dep in proc_node.deps: + if dep['parent'] in parents_to_ignore: + # process the ignored parent recursively + stack.append((dep['parent'], skipped_nodes + [proc_node])) + else: + # keep deps with a parent that shouldn't be ignored + newdeps.append(dep) + # If no newdeps were found (because of a cycle), return the root. + return newdeps if newdeps else [{'parent': node.root, 'deprel': 'root'}] def process_document(self, doc): # This block should work both with coreference loaded (deserialized) and not. @@ -67,17 +61,14 @@ def process_document(self, doc): if self.empty: for node in root.descendants: # process only the nodes dependent on empty nodes - if not '.' in node.raw_deps: - continue - # just remove empty parents if the root remains reachable - if self.is_root_reachable_by_deps(node, root.empty_nodes): - node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes] - # otherwise propagate to non-empty ancestors - else: - newdeps = self._deps_ignore_nodes(node, root.empty_nodes) - newdeps_sorted = sorted(set((dep['parent'].ord, dep['deprel']) for dep in newdeps)) - node.raw_deps = '|'.join(f"{p}:{r}" for p, r in newdeps_sorted) - + if '.' in node.raw_deps: + # just remove empty parents if the root remains reachable + if self.is_root_reachable_by_deps(node, root.empty_nodes): + node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes] + # otherwise propagate to non-empty ancestors + else: + node.deps = self._deps_ignore_nodes(node, root.empty_nodes) + # This needs to be done even if '.' not in node.raw_deps. if '.' in node.misc['Functor'].split(':')[0]: del node.misc['Functor'] root.empty_nodes = [] diff --git a/udapi/core/node.py b/udapi/core/node.py index 0e746c60..863ed90f 100644 --- a/udapi/core/node.py +++ b/udapi/core/node.py @@ -252,7 +252,7 @@ def raw_deps(self): #if self._raw_deps is not None: # return self._raw_deps if self._deps: - self._raw_deps = '|'.join(f"{dep['parent']._ord}:{dep['deprel']}" for dep in self._deps) + self._raw_deps = '|'.join(f"{p}:{r}" for p, r in sorted(set((d['parent'].ord, d['deprel']) for d in self._deps))) return self._raw_deps @raw_deps.setter diff --git a/udapi/core/tests/test_enhdeps.py b/udapi/core/tests/test_enhdeps.py index 3f473bf3..53a74389 100644 --- a/udapi/core/tests/test_enhdeps.py +++ b/udapi/core/tests/test_enhdeps.py @@ -57,7 +57,7 @@ def test_create_deps2empty(self): e.deps.append({'parent': h, 'deprel':'dep:e2h'}) d.deps.append({'parent': e, 'deprel': 'dep:d2e'}) self.assertEqual("2:dep:e2h", e.raw_deps, ) - self.assertEqual("5:conj|3.1:dep:d2e", d.raw_deps) + self.assertEqual("3.1:dep:d2e|5:conj", d.raw_deps) self.assertEqual(self.tree.descendants_and_empty, self.nodes[:3] + [e] + self.nodes[3:]) diff --git a/udapi/core/tests/test_node.py b/udapi/core/tests/test_node.py index 83348c67..f2b64a3d 100755 --- a/udapi/core/tests/test_node.py +++ b/udapi/core/tests/test_node.py @@ -245,5 +245,25 @@ def test_empty_nodes(self): self.assertEqual(root.descendants_and_empty, [e1, e2, e3, e4, e6, e7]) self.assertEqual([n.ord for n in root.descendants_and_empty], [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) + def test_enh_deps_and_reordering(self): + """Test reordering of node ord in enhanced deps when reorderin/removing nodes.""" + root = Root() + for i in range(3): + root.create_child(form=f'node{i+1}') + + n1, n2, n3 = root.descendants() + n1.raw_deps = '2:nsubj|3:obj' + self.assertEqual(n1.raw_deps, '2:nsubj|3:obj') + self.assertEqual(n1.deps, [{'parent': n2, 'deprel': 'nsubj'}, {'parent': n3, 'deprel': 'obj'}]) + n2.shift_after_node(n3) + self.assertEqual(n1.raw_deps, '2:obj|3:nsubj') + # TODO only node.raw_deps are currently guaranteed to return the deps sorted, not node.deps + #self.assertEqual(n1.deps, [{'parent': n3, 'deprel': 'obj'}, {'parent': n2, 'deprel': 'nsubj'}]) + # TODO: after removing a node, all deps should be updated + #n2.remove() + #self.assertEqual(n1.raw_deps, '2:nsubj') + #self.assertEqual(n1.deps, [{'parent': n3, 'deprel': 'obj'}]) + + if __name__ == "__main__": unittest.main()
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: