Skip to content

Commit 42f37fe

Browse files
committed
first draft of support of enhanced dependencies
1 parent edbda80 commit 42f37fe

File tree

2 files changed

+140
-1
lines changed

2 files changed

+140
-1
lines changed

udapi/core/links.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
"""Links is a class for storing a set of links with the same source node."""
2+
import collections.abc
3+
import logging
4+
import re
5+
6+
Link = collections.namedtuple('Link', 'node relation')
7+
8+
class Links(list):
9+
"""Links class serves as a `list` with additional methods.
10+
11+
>>> enhdeps = EnhDeps('4:nsubj|11:nsubj')
12+
>>> for enhdep in enhdeps:
13+
>>> str(enhdep)
14+
'4:nsubj'
15+
'11:nsubj'
16+
>>> enhdeps[0].parent = node_with_ord5
17+
>>> enhdeps[0].deprel = 'obj'
18+
>>> str(enhdeps)
19+
'5:obj|11:nsubj'
20+
21+
This class provides access to both
22+
* a structured (list of named tuples) representation and
23+
* a string (serialized) representation of the enhanced depndencies.
24+
25+
Implementation details:
26+
Unlike `DualDict`
27+
* the structured internal storage is list, not dict
28+
* the string representation is always computed on the fly, it is not stored.
29+
"""
30+
31+
def __init__(self, src_node, string=None):
32+
self.src_node = src_node
33+
items = []
34+
if string is not None:
35+
all_nodes = src_node.root.descendants(add_self=1)
36+
for edge_str in string.split('|'):
37+
try:
38+
trg_node_id, relation = edge_str.split(':')
39+
except ValueError as exception:
40+
logging.error("<%s> contains <%s> which does not contain one ':' symbol.",
41+
string, edge_str)
42+
raise exception
43+
# TODO allow `trg_node_id`s like 5.1, /zone#1, bundle/zone#1, bundle#1
44+
trg_node = all_nodes[int(trg_node_id)]
45+
link = Link(node=trg_node, relation=relation)
46+
items.append(link)
47+
super().__init__(self, items)
48+
49+
def __str__(self):
50+
serialized = []
51+
for link in self:
52+
# TODO allow `trg_node_id`s like /zone#1, bundle/zone#1, bundle#1
53+
serialized.append('%s:%s' % (link.node.ord, link.relation))
54+
return '|'.join(serialized) if serialized else '_'
55+
56+
def set_links(self, value):
57+
"""Set the edges from a list of tuples or string.
58+
59+
If the `value` is None or an empty string, it is converted to storing empty list of edges.
60+
If the `value` is a string, it is parsed as in `__init__`.
61+
If the `value` is a list of `Edge` namedtuples its copy is stored.
62+
Other types of `value` raise an `ValueError` exception.
63+
"""
64+
if value is None:
65+
self.clear()
66+
elif isinstance(value, str):
67+
self.clear()
68+
self.__init__(value)
69+
elif isinstance(value, collections.abc.Sequence):
70+
self.clear()
71+
super().__init__(value)
72+
else:
73+
raise ValueError("Unsupported value type " + str(value))
74+
75+
def __call__(self, following_only=False, preceding_only=False, relations=None):
76+
"""Return a subset of links contained in this list as specified by the args.
77+
78+
TODO: document args
79+
"""
80+
if not following_only and not preceding_only and relations is None:
81+
return self
82+
links = list(self)
83+
if preceding_only:
84+
links = [l for l in links if l.node.precedes(self.src_node)]
85+
if following_only:
86+
links = [l for l in links if self.src_node.precedes(l.node)]
87+
if relations:
88+
links = [l for l in links if re.match(relations, l.relation)]
89+
return Links(self.src_node, links)
90+
91+
@property
92+
def nodes(self):
93+
"""Return a list of the target nodes (without relations)."""
94+
return [link.node for link in self]
95+
96+
# TODO make sure backlinks are created and updated
97+
def TODO__setitem__(self, index, new_value):
98+
old_value = self[index]
99+
old_value.node._enh_children = [l for l in old_value.node._enh_children if l != old_value]
100+
if new_value.node._enh_children is None:
101+
new_value.node._enh_children = Links(new_value.node, None)
102+
super().__setitem__(self, index, new_value)

udapi/core/node.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from udapi.block.write.textmodetrees import TextModeTrees
77
from udapi.core.dualdict import DualDict
88
from udapi.core.feats import Feats
9+
from udapi.core.links import Links
910

1011
# Pylint complains when we access e.g. node.parent._children or root._descendants
1112
# because it does not know that node.parent is the same class (Node)
@@ -69,14 +70,16 @@ class Node(object):
6970
'_misc', # Any other annotation as udapi.core.dualdict.DualDict object.
7071
'_raw_deps', # Enhanced dependencies (head-deprel pairs) in their original CoNLLU format.
7172
'_deps', # Deserialized enhanced dependencies in a list of {parent, deprel} dicts.
73+
'_enh_parents', # Enhanced dependencies (head-deprel pairs) as EnhDeps object.
74+
'_enh_children', # Enhanced dependencies (child-deprel pairs) as EnhDeps object.
7275
'_feats', # Morphological features as udapi.core.feats.Feats object.
7376
'_parent', # Parent node.
7477
'_children', # Ord-ordered list of child nodes.
7578
'_mwt', # multi-word token in which this word participates
7679
]
7780

7881
def __init__(self, form=None, lemma=None, upos=None, # pylint: disable=too-many-arguments
79-
xpos=None, feats=None, deprel=None, misc=None):
82+
xpos=None, feats=None, deprel=None, enh_parents=None, misc=None):
8083
"""Create a new node and initialize its attributes using the keyword arguments."""
8184
self.ord = None
8285
self.form = form
@@ -86,6 +89,10 @@ def __init__(self, form=None, lemma=None, upos=None, # pylint: disable=too-many-
8689
self._feats = Feats(string=feats)
8790
self.deprel = deprel
8891
self._misc = DualDict(string=misc)
92+
self._enh_parents = None
93+
if enh_parents is not None and enh_parents != '_':
94+
self._enh_parents = Links(self, enh_parents)
95+
self._enh_children = None
8996
self._raw_deps = '_'
9097
self._deps = None
9198
self._parent = None
@@ -151,6 +158,36 @@ def misc(self):
151158
def misc(self, value):
152159
self._misc.set_mapping(value)
153160

161+
@property
162+
def enh_parents(self):
163+
"""Return a list of (parent, deprel) enhanced dependencies.
164+
165+
To get just the parent nodes (without deprels) use
166+
`enhanced_parents = node.enh_parents.nodes`
167+
"""
168+
if self._enh_parents is None:
169+
self._enh_parents = Links(self, None)
170+
return self._enh_parents
171+
172+
@enh_parents.setter
173+
def enh_parents(self, value):
174+
if self._enh_parents is None:
175+
if value is not None and value != '_':
176+
self._enh_parents = Links(self, value)
177+
else:
178+
self._enh_parents.set_links(value)
179+
180+
@property
181+
def enh_children(self):
182+
"""Return a list of (child, deprel) enhanced dependencies.
183+
184+
To get just the child nodes (without deprels) use
185+
`enhanced_children = node.enh_children.nodes`
186+
"""
187+
if self._enh_children is None:
188+
self._enh_children = Links(self, None)
189+
return self._enh_children
190+
154191
@property
155192
def raw_deps(self):
156193
"""String serialization of enhanced dependencies as stored in CoNLL-U files.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy