Skip to content

Commit 7459d35

Browse files
committed
Updated processing of Old Czech data.
1 parent 0abec00 commit 7459d35

File tree

3 files changed

+15
-6
lines changed

3 files changed

+15
-6
lines changed

udapi/block/ud/cs/addmwt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def multiword_analysis(self, node):
126126
# could be masculine or neuter. We pick Gender=Masc and Animacy=Anim
127127
# by default, unless the original token was annotated as Animacy=Inan
128128
# or Gender=Neut.
129-
m = re.match(r"^(na|o|pro|přěde|ski?rz[eě]|za)[nň](ž?)$", node.form.lower())
129+
m = re.match(r"^(na|nade|o|pro|přěde|ski?rz[eě]|za)[nň](ž?)$", node.form.lower())
130130
if m:
131131
node.misc['AddMwt'] = ''
132132
# Remove vocalization from 'přěde' (přěd něj) but keep it in 'skrze'

udapi/block/ud/cs/markfeatsbugs.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ def process_node(self, node):
3737
'Gender': ['Neut'],
3838
'Number': ['Sing', 'Dual', 'Plur'],
3939
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
40-
'Foreign': ['Yes']
40+
'Foreign': ['Yes'],
41+
'Abbr': ['Yes']
4142
})
4243
elif node.feats['Gender'] == 'Masc':
4344
self.check_required_features(node, ['Animacy'])
@@ -46,13 +47,15 @@ def process_node(self, node):
4647
'Animacy': ['Anim', 'Inan'],
4748
'Number': ['Sing', 'Dual', 'Plur'],
4849
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
49-
'Foreign': ['Yes']})
50+
'Foreign': ['Yes'],
51+
'Abbr': ['Yes']})
5052
else:
5153
self.check_allowed_features(node, {
5254
'Gender': ['Masc', 'Fem', 'Neut'],
5355
'Number': ['Sing', 'Dual', 'Plur'],
5456
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
55-
'Foreign': ['Yes']})
57+
'Foreign': ['Yes'],
58+
'Abbr': ['Yes']})
5659
# PROPER NOUNS #########################################################
5760
elif node.upos == 'PROPN':
5861
self.check_required_features(node, ['Gender', 'Number', 'Case'])
@@ -64,14 +67,16 @@ def process_node(self, node):
6467
'Number': ['Sing', 'Dual', 'Plur'],
6568
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
6669
'NameType': ['Giv', 'Sur', 'Geo', 'Nat'],
67-
'Foreign': ['Yes']})
70+
'Foreign': ['Yes'],
71+
'Abbr': ['Yes']})
6872
else:
6973
self.check_allowed_features(node, {
7074
'Gender': ['Masc', 'Fem', 'Neut'],
7175
'Number': ['Sing', 'Dual', 'Plur'],
7276
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins'],
7377
'NameType': ['Giv', 'Sur', 'Geo', 'Nat'],
74-
'Foreign': ['Yes']})
78+
'Foreign': ['Yes'],
79+
'Abbr': ['Yes']})
7580
# ADJECTIVES ###########################################################
7681
elif node.upos == 'ADJ':
7782
if node.feats['Poss'] == 'Yes': # possessive adjectives

udapi/block/ud/fixadvmodbyupos.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def process_node(self, node):
5151
elif node.udeprel == 'mark':
5252
if node.upos in ['PRON', 'DET']:
5353
node.deprel = 'nsubj' # it could be also obj, iobj, obl or nmod; just guessing what might be more probable
54+
elif node.upos == 'NOUN':
55+
node.deprel = 'obl'
5456
elif node.upos == 'INTJ':
5557
node.deprel = 'discourse'
5658
elif node.udeprel == 'cc':
@@ -71,6 +73,8 @@ def process_node(self, node):
7173
node.deprel = 'aux'
7274
elif node.upos == 'VERB':
7375
node.deprel = 'dep'
76+
elif node.upos == 'SCONJ':
77+
node.deprel = 'mark'
7478
elif node.upos == 'X':
7579
node.deprel = 'dep'
7680
elif node.udeprel == 'nummod':

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy