@inproceedings{zhang-hollenstein-2024-eye,
title = "Eye-Tracking Features Masking Transformer Attention in Question-Answering Tasks",
author = "Zhang, Leran and
Hollenstein, Nora",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.619/",
pages = "7057--7070",
abstract = "Eye movement features are considered to be direct signals reflecting human attention distribution with a low cost to obtain, inspiring researchers to augment language models with eye-tracking (ET) data. In this study, we select first fixation duration (FFD) and total reading time (TRT) as the cognitive signals to guide Transformer attention in question-answering (QA) tasks. We design three different ET attention masks based on the two features, either collected from human reading events or generated by a gaze-predicting model. We augment BERT and ALBERT models with attention masks structured based on the ET data. We find that augmenting a model with ET data carries linguistic features complementing the information captured by the model. It improves the models' performance but compromises the stability. Different Transformer models benefit from different types of ET attention masks, while ALBERT performs better than BERT. Moreover, ET data collected from real-life reading events has better model augmenting ability than the model-predicted data."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-hollenstein-2024-eye">
<titleInfo>
<title>Eye-Tracking Features Masking Transformer Attention in Question-Answering Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leran</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Hollenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Eye movement features are considered to be direct signals reflecting human attention distribution with a low cost to obtain, inspiring researchers to augment language models with eye-tracking (ET) data. In this study, we select first fixation duration (FFD) and total reading time (TRT) as the cognitive signals to guide Transformer attention in question-answering (QA) tasks. We design three different ET attention masks based on the two features, either collected from human reading events or generated by a gaze-predicting model. We augment BERT and ALBERT models with attention masks structured based on the ET data. We find that augmenting a model with ET data carries linguistic features complementing the information captured by the model. It improves the models’ performance but compromises the stability. Different Transformer models benefit from different types of ET attention masks, while ALBERT performs better than BERT. Moreover, ET data collected from real-life reading events has better model augmenting ability than the model-predicted data.</abstract>
<identifier type="citekey">zhang-hollenstein-2024-eye</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.619/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>7057</start>
<end>7070</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Eye-Tracking Features Masking Transformer Attention in Question-Answering Tasks
%A Zhang, Leran
%A Hollenstein, Nora
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F zhang-hollenstein-2024-eye
%X Eye movement features are considered to be direct signals reflecting human attention distribution with a low cost to obtain, inspiring researchers to augment language models with eye-tracking (ET) data. In this study, we select first fixation duration (FFD) and total reading time (TRT) as the cognitive signals to guide Transformer attention in question-answering (QA) tasks. We design three different ET attention masks based on the two features, either collected from human reading events or generated by a gaze-predicting model. We augment BERT and ALBERT models with attention masks structured based on the ET data. We find that augmenting a model with ET data carries linguistic features complementing the information captured by the model. It improves the models’ performance but compromises the stability. Different Transformer models benefit from different types of ET attention masks, while ALBERT performs better than BERT. Moreover, ET data collected from real-life reading events has better model augmenting ability than the model-predicted data.
%U https://aclanthology.org/2024.lrec-main.619/
%P 7057-7070
Markdown (Informal)
[Eye-Tracking Features Masking Transformer Attention in Question-Answering Tasks](https://aclanthology.org/2024.lrec-main.619/) (Zhang & Hollenstein, LREC-COLING 2024)
ACL