From c28dfd34a6ad931fa752f73789db42f3d592a7f7 Mon Sep 17 00:00:00 2001 From: Martin Popel Date: Mon, 15 Apr 2024 20:11:37 +0200 Subject: [PATCH 1/3] refactor corefud.Delete It may be subjective, but I find this easier to understand the code. --- udapi/block/corefud/delete.py | 42 +++++++++++++++-------------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/udapi/block/corefud/delete.py b/udapi/block/corefud/delete.py index 4e68e8dd..fe525941 100644 --- a/udapi/block/corefud/delete.py +++ b/udapi/block/corefud/delete.py @@ -25,19 +25,15 @@ def is_root_reachable_by_deps(self, node, parents_to_ignore=None): proc_node, path = stack.pop() # root is reachable if proc_node == node.root: - break + return True # path forms a cycle, the root cannot be reached through this branch - if proc_node in path: - continue - for dep in proc_node.deps: - # the root cannot be reached through ignored nodes - if dep['parent'] in parents_to_ignore: - continue - # process the parent recursively - stack.append((dep['parent'], path + [proc_node])) - else: - return False - return True + if proc_node not in path: + for dep in proc_node.deps: + # the root cannot be reached through ignored nodes + if dep['parent'] not in parents_to_ignore: + # process the parent recursively + stack.append((dep['parent'], path + [proc_node])) + return False def _deps_ignore_nodes(self, node, parents_to_ignore): """ Retrieve deps from the node, recursively ignoring specified parents. @@ -46,18 +42,16 @@ def _deps_ignore_nodes(self, node, parents_to_ignore): stack = [(node, [])] while stack: proc_node, skipped_nodes = stack.pop() - # if there is a cycle of skipped nodes, ground the subtree to the root - if proc_node in skipped_nodes: - newdeps.append({'parent': node.root, 'deprel': 'root'}) - continue - for dep in proc_node.deps: - # keep deps with a parent that shouldn't be ignored - if not dep['parent'] in parents_to_ignore: - newdeps.append(dep) - continue - # process the ignored parent recursively - stack.append((dep['parent'], skipped_nodes + [proc_node])) - return newdeps + if proc_node not in skipped_nodes: + for dep in proc_node.deps: + if dep['parent'] in parents_to_ignore: + # process the ignored parent recursively + stack.append((dep['parent'], skipped_nodes + [proc_node])) + else: + # keep deps with a parent that shouldn't be ignored + newdeps.append(dep) + # If no newdeps were found (because of a cycle), return the root. + return newdeps if newdeps else [{'parent': node.root, 'deprel': 'root'}] def process_document(self, doc): # This block should work both with coreference loaded (deserialized) and not. From a2dc30114e9749a4b4c44fcca16d1faec6e825e4 Mon Sep 17 00:00:00 2001 From: Martin Popel Date: Mon, 15 Apr 2024 20:13:02 +0200 Subject: [PATCH 2/3] corefud.Delete bugfix: Functor may refere even to nodes outside DEPS --- udapi/block/corefud/delete.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/udapi/block/corefud/delete.py b/udapi/block/corefud/delete.py index fe525941..d2a6197d 100644 --- a/udapi/block/corefud/delete.py +++ b/udapi/block/corefud/delete.py @@ -61,17 +61,16 @@ def process_document(self, doc): if self.empty: for node in root.descendants: # process only the nodes dependent on empty nodes - if not '.' in node.raw_deps: - continue - # just remove empty parents if the root remains reachable - if self.is_root_reachable_by_deps(node, root.empty_nodes): - node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes] - # otherwise propagate to non-empty ancestors - else: - newdeps = self._deps_ignore_nodes(node, root.empty_nodes) - newdeps_sorted = sorted(set((dep['parent'].ord, dep['deprel']) for dep in newdeps)) - node.raw_deps = '|'.join(f"{p}:{r}" for p, r in newdeps_sorted) - + if '.' in node.raw_deps: + # just remove empty parents if the root remains reachable + if self.is_root_reachable_by_deps(node, root.empty_nodes): + node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes] + # otherwise propagate to non-empty ancestors + else: + newdeps = self._deps_ignore_nodes(node, root.empty_nodes) + newdeps_sorted = sorted(set((dep['parent'].ord, dep['deprel']) for dep in newdeps)) + node.raw_deps = '|'.join(f"{p}:{r}" for p, r in newdeps_sorted) + # This needs to be done even if '.' not in node.raw_deps. if '.' in node.misc['Functor'].split(':')[0]: del node.misc['Functor'] root.empty_nodes = [] From 07864f922ba436fa0213aea781210a77981a2cd9 Mon Sep 17 00:00:00 2001 From: Martin Popel Date: Mon, 15 Apr 2024 20:20:00 +0200 Subject: [PATCH 3/3] simplify the corefud.Delete code by using deps instead of raw_deps Fix node.raw_deps, so that it always sorts and removes duplicates, so that the output CoNLL-U files are valid. Add tests. --- udapi/block/corefud/delete.py | 4 +--- udapi/core/node.py | 2 +- udapi/core/tests/test_enhdeps.py | 2 +- udapi/core/tests/test_node.py | 20 ++++++++++++++++++++ 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/udapi/block/corefud/delete.py b/udapi/block/corefud/delete.py index d2a6197d..5aaf94e7 100644 --- a/udapi/block/corefud/delete.py +++ b/udapi/block/corefud/delete.py @@ -67,9 +67,7 @@ def process_document(self, doc): node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes] # otherwise propagate to non-empty ancestors else: - newdeps = self._deps_ignore_nodes(node, root.empty_nodes) - newdeps_sorted = sorted(set((dep['parent'].ord, dep['deprel']) for dep in newdeps)) - node.raw_deps = '|'.join(f"{p}:{r}" for p, r in newdeps_sorted) + node.deps = self._deps_ignore_nodes(node, root.empty_nodes) # This needs to be done even if '.' not in node.raw_deps. if '.' in node.misc['Functor'].split(':')[0]: del node.misc['Functor'] diff --git a/udapi/core/node.py b/udapi/core/node.py index 0e746c60..863ed90f 100644 --- a/udapi/core/node.py +++ b/udapi/core/node.py @@ -252,7 +252,7 @@ def raw_deps(self): #if self._raw_deps is not None: # return self._raw_deps if self._deps: - self._raw_deps = '|'.join(f"{dep['parent']._ord}:{dep['deprel']}" for dep in self._deps) + self._raw_deps = '|'.join(f"{p}:{r}" for p, r in sorted(set((d['parent'].ord, d['deprel']) for d in self._deps))) return self._raw_deps @raw_deps.setter diff --git a/udapi/core/tests/test_enhdeps.py b/udapi/core/tests/test_enhdeps.py index 3f473bf3..53a74389 100644 --- a/udapi/core/tests/test_enhdeps.py +++ b/udapi/core/tests/test_enhdeps.py @@ -57,7 +57,7 @@ def test_create_deps2empty(self): e.deps.append({'parent': h, 'deprel':'dep:e2h'}) d.deps.append({'parent': e, 'deprel': 'dep:d2e'}) self.assertEqual("2:dep:e2h", e.raw_deps, ) - self.assertEqual("5:conj|3.1:dep:d2e", d.raw_deps) + self.assertEqual("3.1:dep:d2e|5:conj", d.raw_deps) self.assertEqual(self.tree.descendants_and_empty, self.nodes[:3] + [e] + self.nodes[3:]) diff --git a/udapi/core/tests/test_node.py b/udapi/core/tests/test_node.py index 83348c67..f2b64a3d 100755 --- a/udapi/core/tests/test_node.py +++ b/udapi/core/tests/test_node.py @@ -245,5 +245,25 @@ def test_empty_nodes(self): self.assertEqual(root.descendants_and_empty, [e1, e2, e3, e4, e6, e7]) self.assertEqual([n.ord for n in root.descendants_and_empty], [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) + def test_enh_deps_and_reordering(self): + """Test reordering of node ord in enhanced deps when reorderin/removing nodes.""" + root = Root() + for i in range(3): + root.create_child(form=f'node{i+1}') + + n1, n2, n3 = root.descendants() + n1.raw_deps = '2:nsubj|3:obj' + self.assertEqual(n1.raw_deps, '2:nsubj|3:obj') + self.assertEqual(n1.deps, [{'parent': n2, 'deprel': 'nsubj'}, {'parent': n3, 'deprel': 'obj'}]) + n2.shift_after_node(n3) + self.assertEqual(n1.raw_deps, '2:obj|3:nsubj') + # TODO only node.raw_deps are currently guaranteed to return the deps sorted, not node.deps + #self.assertEqual(n1.deps, [{'parent': n3, 'deprel': 'obj'}, {'parent': n2, 'deprel': 'nsubj'}]) + # TODO: after removing a node, all deps should be updated + #n2.remove() + #self.assertEqual(n1.raw_deps, '2:nsubj') + #self.assertEqual(n1.deps, [{'parent': n3, 'deprel': 'obj'}]) + + if __name__ == "__main__": unittest.main() pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy