@inproceedings{booth-etal-2020-penn,
title = "A {P}enn-style Treebank of {M}iddle {L}ow {G}erman",
author = "Booth, Hannah and
Breitbarth, Anne and
Ecay, Aaron and
Farasyn, Melissa",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.96/",
pages = "766--775",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "We outline the issues and decisions involved in creating a Penn-style treebank of Middle Low German (MLG, 1200-1650), which will form part of the Corpus of Historical Low German (CHLG). The attestation for MLG is rich, but the syntax of the language remains relatively understudied. The development of a syntactically annotated corpus for the language will facilitate future studies with a strong empirical basis, building on recent work which indicates that, syntactically, MLG occupies a position in its own right within West Germanic. In this paper, we describe the background for the corpus and the process by which texts were selected to be included. In particular, we focus on the decisions involved in the syntactic annotation of the corpus, specifically, the practical and linguistic reasons for adopting the Penn annotation scheme, the stages of the annotation process itself, and how we have adapted the Penn scheme for syntactic features specific to MLG. We also discuss the issue of data uncertainty, which is a major issue when building a corpus of an under-researched language stage like MLG, and some novel ways in which we capture this uncertainty in the annotation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="booth-etal-2020-penn">
<titleInfo>
<title>A Penn-style Treebank of Middle Low German</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hannah</namePart>
<namePart type="family">Booth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne</namePart>
<namePart type="family">Breitbarth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Ecay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Melissa</namePart>
<namePart type="family">Farasyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>We outline the issues and decisions involved in creating a Penn-style treebank of Middle Low German (MLG, 1200-1650), which will form part of the Corpus of Historical Low German (CHLG). The attestation for MLG is rich, but the syntax of the language remains relatively understudied. The development of a syntactically annotated corpus for the language will facilitate future studies with a strong empirical basis, building on recent work which indicates that, syntactically, MLG occupies a position in its own right within West Germanic. In this paper, we describe the background for the corpus and the process by which texts were selected to be included. In particular, we focus on the decisions involved in the syntactic annotation of the corpus, specifically, the practical and linguistic reasons for adopting the Penn annotation scheme, the stages of the annotation process itself, and how we have adapted the Penn scheme for syntactic features specific to MLG. We also discuss the issue of data uncertainty, which is a major issue when building a corpus of an under-researched language stage like MLG, and some novel ways in which we capture this uncertainty in the annotation.</abstract>
<identifier type="citekey">booth-etal-2020-penn</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.96/</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>766</start>
<end>775</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Penn-style Treebank of Middle Low German
%A Booth, Hannah
%A Breitbarth, Anne
%A Ecay, Aaron
%A Farasyn, Melissa
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G eng
%F booth-etal-2020-penn
%X We outline the issues and decisions involved in creating a Penn-style treebank of Middle Low German (MLG, 1200-1650), which will form part of the Corpus of Historical Low German (CHLG). The attestation for MLG is rich, but the syntax of the language remains relatively understudied. The development of a syntactically annotated corpus for the language will facilitate future studies with a strong empirical basis, building on recent work which indicates that, syntactically, MLG occupies a position in its own right within West Germanic. In this paper, we describe the background for the corpus and the process by which texts were selected to be included. In particular, we focus on the decisions involved in the syntactic annotation of the corpus, specifically, the practical and linguistic reasons for adopting the Penn annotation scheme, the stages of the annotation process itself, and how we have adapted the Penn scheme for syntactic features specific to MLG. We also discuss the issue of data uncertainty, which is a major issue when building a corpus of an under-researched language stage like MLG, and some novel ways in which we capture this uncertainty in the annotation.
%U https://aclanthology.org/2020.lrec-1.96/
%P 766-775
Markdown (Informal)
[A Penn-style Treebank of Middle Low German](https://aclanthology.org/2020.lrec-1.96/) (Booth et al., LREC 2020)
ACL
- Hannah Booth, Anne Breitbarth, Aaron Ecay, and Melissa Farasyn. 2020. A Penn-style Treebank of Middle Low German. In Proceedings of the Twelfth Language Resources and Evaluation Conference, pages 766–775, Marseille, France. European Language Resources Association.