@inproceedings{weiss-etal-2024-share,
title = "To Share or Not to Share: What Risks Would Laypeople Accept to Give Sensitive Data to Differentially-Private {NLP} Systems?",
author = "Weiss, Christopher and
Kreuter, Frauke and
Habernal, Ivan",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1419/",
pages = "16331--16342",
abstract = "Although the NLP community has adopted central differential privacy as a go-to framework for privacy-preserving model training or data sharing, the choice and interpretation of the key parameter, privacy budget $\varepsilon$ that governs the strength of privacy protection, remains largely arbitrary. We argue that determining the $\varepsilon$ value should not be solely in the hands of researchers or system developers, but must also take into account the actual people who share their potentially sensitive data. In other words: Would \textit{you} share your instant messages for $\varepsilon$ of 10? We address this research gap by designing, implementing, and conducting a behavioral experiment (311 lay participants) to study the behavior of people in uncertain decision-making situations with respect to privacy-threatening situations. Framing the risk perception in terms of two realistic NLP scenarios and using a vignette behavioral study help us determine what $\varepsilon$ thresholds would lead lay people to be willing to share sensitive textual data {--} to our knowledge, the first study of its kind."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="weiss-etal-2024-share">
<titleInfo>
<title>To Share or Not to Share: What Risks Would Laypeople Accept to Give Sensitive Data to Differentially-Private NLP Systems?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Weiss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frauke</namePart>
<namePart type="family">Kreuter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Habernal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Although the NLP community has adopted central differential privacy as a go-to framework for privacy-preserving model training or data sharing, the choice and interpretation of the key parameter, privacy budget ǎrepsilon that governs the strength of privacy protection, remains largely arbitrary. We argue that determining the ǎrepsilon value should not be solely in the hands of researchers or system developers, but must also take into account the actual people who share their potentially sensitive data. In other words: Would you share your instant messages for ǎrepsilon of 10? We address this research gap by designing, implementing, and conducting a behavioral experiment (311 lay participants) to study the behavior of people in uncertain decision-making situations with respect to privacy-threatening situations. Framing the risk perception in terms of two realistic NLP scenarios and using a vignette behavioral study help us determine what ǎrepsilon thresholds would lead lay people to be willing to share sensitive textual data – to our knowledge, the first study of its kind.</abstract>
<identifier type="citekey">weiss-etal-2024-share</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1419/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>16331</start>
<end>16342</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T To Share or Not to Share: What Risks Would Laypeople Accept to Give Sensitive Data to Differentially-Private NLP Systems?
%A Weiss, Christopher
%A Kreuter, Frauke
%A Habernal, Ivan
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F weiss-etal-2024-share
%X Although the NLP community has adopted central differential privacy as a go-to framework for privacy-preserving model training or data sharing, the choice and interpretation of the key parameter, privacy budget ǎrepsilon that governs the strength of privacy protection, remains largely arbitrary. We argue that determining the ǎrepsilon value should not be solely in the hands of researchers or system developers, but must also take into account the actual people who share their potentially sensitive data. In other words: Would you share your instant messages for ǎrepsilon of 10? We address this research gap by designing, implementing, and conducting a behavioral experiment (311 lay participants) to study the behavior of people in uncertain decision-making situations with respect to privacy-threatening situations. Framing the risk perception in terms of two realistic NLP scenarios and using a vignette behavioral study help us determine what ǎrepsilon thresholds would lead lay people to be willing to share sensitive textual data – to our knowledge, the first study of its kind.
%U https://aclanthology.org/2024.lrec-main.1419/
%P 16331-16342
Markdown (Informal)
[To Share or Not to Share: What Risks Would Laypeople Accept to Give Sensitive Data to Differentially-Private NLP Systems?](https://aclanthology.org/2024.lrec-main.1419/) (Weiss et al., LREC-COLING 2024)
ACL