@inproceedings{roussinov-sharoff-2023-bert,
title = "{BERT} Goes Off-Topic: Investigating the Domain Transfer Challenge using Genre Classification",
author = "Roussinov, Dmitri and
Sharoff, Serge",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.34/",
doi = "10.18653/v1/2023.findings-emnlp.34",
pages = "468--483",
abstract = "While performance of many text classification tasks has been recently improved due to Pretrained Language Models (PLMs), in this paper we show that they still suffer from a performance gap when the underlying distribution of topics changes. For example, a genre classifier trained on political topics often fails when tested on documents in the same genre, but about sport or medicine. In this work, we quantify this phenomenon empirically with a large corpus and a large set of topics. Thus, we verify that domain transfer remains challenging both for classic PLMs, such as BERT, and for modern large models (LLMs), such as GPT. We develop a data augmentation approach by generating texts in any desired genre and on any desired topic, even when there are no documents in the training corpus that are both in that particular genre and on that particular topic. When we augment the training dataset with the topically-controlled synthetic texts, F1 improves up to 50{\%} for some topics, approaching on-topic training, while showing no or next to no improvement for other topics. While our empirical results focus on genre classification, our methodology is applicable to other classification tasks such as gender, authorship, or sentiment classification."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="roussinov-sharoff-2023-bert">
<titleInfo>
<title>BERT Goes Off-Topic: Investigating the Domain Transfer Challenge using Genre Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dmitri</namePart>
<namePart type="family">Roussinov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While performance of many text classification tasks has been recently improved due to Pretrained Language Models (PLMs), in this paper we show that they still suffer from a performance gap when the underlying distribution of topics changes. For example, a genre classifier trained on political topics often fails when tested on documents in the same genre, but about sport or medicine. In this work, we quantify this phenomenon empirically with a large corpus and a large set of topics. Thus, we verify that domain transfer remains challenging both for classic PLMs, such as BERT, and for modern large models (LLMs), such as GPT. We develop a data augmentation approach by generating texts in any desired genre and on any desired topic, even when there are no documents in the training corpus that are both in that particular genre and on that particular topic. When we augment the training dataset with the topically-controlled synthetic texts, F1 improves up to 50% for some topics, approaching on-topic training, while showing no or next to no improvement for other topics. While our empirical results focus on genre classification, our methodology is applicable to other classification tasks such as gender, authorship, or sentiment classification.</abstract>
<identifier type="citekey">roussinov-sharoff-2023-bert</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.34</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.34/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>468</start>
<end>483</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BERT Goes Off-Topic: Investigating the Domain Transfer Challenge using Genre Classification
%A Roussinov, Dmitri
%A Sharoff, Serge
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F roussinov-sharoff-2023-bert
%X While performance of many text classification tasks has been recently improved due to Pretrained Language Models (PLMs), in this paper we show that they still suffer from a performance gap when the underlying distribution of topics changes. For example, a genre classifier trained on political topics often fails when tested on documents in the same genre, but about sport or medicine. In this work, we quantify this phenomenon empirically with a large corpus and a large set of topics. Thus, we verify that domain transfer remains challenging both for classic PLMs, such as BERT, and for modern large models (LLMs), such as GPT. We develop a data augmentation approach by generating texts in any desired genre and on any desired topic, even when there are no documents in the training corpus that are both in that particular genre and on that particular topic. When we augment the training dataset with the topically-controlled synthetic texts, F1 improves up to 50% for some topics, approaching on-topic training, while showing no or next to no improvement for other topics. While our empirical results focus on genre classification, our methodology is applicable to other classification tasks such as gender, authorship, or sentiment classification.
%R 10.18653/v1/2023.findings-emnlp.34
%U https://aclanthology.org/2023.findings-emnlp.34/
%U https://doi.org/10.18653/v1/2023.findings-emnlp.34
%P 468-483
Markdown (Informal)
[BERT Goes Off-Topic: Investigating the Domain Transfer Challenge using Genre Classification](https://aclanthology.org/2023.findings-emnlp.34/) (Roussinov & Sharoff, Findings 2023)
ACL