Vai al contenuto

Utente:GiacoBot/itwiki

Da Wikipedia, l'enciclopedia libera.
# -*- coding: utf-8  -*-
#
# (C) Daniel Herding, 2004
#
# Distributed under the terms of the MIT license.
#
__version__='$Id: replace.py,v 1.102 2006/03/12 16:35:54 wikipedian Exp $'

from __future__ import generators
import sys, re
import wikipedia, pagegenerators, catlib, config

# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.
msg = {
       'de':u'Bot: Automatisierte Textersetzung %s',
       'en':u'Robot: Automated text replacement %s',
       'es':u'Robot: Reemplazo automático de texto %s',
       'fr':u'Bot : Remplacement de texte automatisé %s',
       'hu':u'Robot: Automatikus szövegcsere %s',
       'ia':u'Robot: Reimplaciamento automatic de texto %s',
       'is':u'Vélmenni: breyti texta %s',
       'it':u'Bot: Sostituzione automatica del testo %s',
       'ka':u'რობოტი: ტექსტის ავტომატური შეცვლა %s',
       'lt':u'Botas: Automatinis teksto keitimas %s',       
       'pt':u'Bot: Mudança automática %s',
       'sr':u'Бот: Аутоматска замена текста %s',
       }

# Predefined replacements tasks.
fixes = {
    # per it.wikipedia
    'accenti': {
        'regex': True,
        'msg': {
               'it':u'Correzione di alcuni errori comuni contenuti in questa [[Discussioni_Wikipedia:Bot/Sostituzioni/Espressioni_regolari|lista]]',
              },
        'replacements': [
                #congiunzioni (e non) terminanti in -chè
                (u'\\b([Aa])bbenchè\\b', ur'\1bbenché'),
                (u'\\b([aA])cciocchè\\b', ur'\1cciocché'),
                (u'\\b([aA])ffinchè\\b', ur'\1ffinché'),
                (u'\\b([aA])lcunchè\\b', ur'\1lcunché'),
                (u'\\b([aA])llorchè\\b', ur'\1llorché'),
                (u'\\b([aA])ltrochè\\b', ur'\1ltroché'),
                (u'\\b([aA])ncorchè\\b', ur'\1ncorché'),
                (u'\\b([aA])nzichè\\b', ur'\1nziché'),
                (u'\\b([aA])ttesochè\\b', ur'\1ttesoché'),
                (u'\\b([aA])vvegnachè\\b', ur'\1vvegnaché'),
                (u"\\b([aA])vvegnadiochè\\b", ur"\1vvegnadioché"),
                (u"\\b([aA])vvengachè\\b", ur"\1vvengaché"),
                (u"\\b([aA])vvengadiochè\\b", ur"\1vvengadioché"),
                (u'\\b([bB])enchè\\b', ur'\1enché'),
                (u'\\b([cC])hecchè\\b', ur'\1hecché'),
                (u"\\b([cC])iocchè\\b", ur"\1iocché"),
                (u'\\b([cC])omecchè\\b', ur'\1omecché'),
                (u"\\b([cC])onciofossechè\\b", ur"\1onciofosseché"),
                (u'\\b([cC])ontuttochè\\b', ur'\1ontuttoché'),
                (u'\\b([cC])osicchè\\b', ur'\1osicché'),
                (u'\\b([cC])otalchè\\b', ur'\1otalché'),
                (u'\\b([dD])acchè\\b', ur'\1acché'),
                (u'\\b([dD])appoichè\\b', ur'\1appoiché'),
                (u'\\b([dD])imodochè\\b', ur'\1imodoché'),
                (u"\\b([dD])opochè\\b", ur"\1opoché"),
                (u"\\b([dD])opodichè\\b", ur"\1opodiché"),
                (u'\\b([eE])ssendochè\\b', ur'\1ssendoché'),
                (u'\\b([fF])inattantochè\\b', ur'\1inattantoché'),
                (u'\\b([fF])inchè\\b', ur'\1inché'),
                (u'\\b([fF])intantochè\\b', ur'\1intantoché'),
                (u"\\b([fF])inacchè\\b", ur"\1inacché"),
                (u"\\b([fF])inattantochè\\b", ur"\1inattantoché"),
                (u'\\b([fF])uorchè\\b', ur'\1uorché'),
                (u'\\b([gG])iacchè\\b', ur'\1iacché'),
                (u'\\b([gG])ranchè\\b', ur'\1ranché'),
                (u"\\b([gG])iafossechè\\b", ur"\1iafosseché"),
                (u"\\b([gG])iafossecosachè\\b", ur"\1iafossecosaché"),
                (u"\\b([iI])nfinattantochè\\b", ur"\1nfinattantoché"),
                (u'\\b([lL])orchè\\b', ur'\1orché'),
                (u"\\b([iI])nquantochè\\b", ur"\1nquantoché"),
                (u'\\b([mM])acchè\\b', ur'\1acché'),
                (u'\\b([nN])onchè\\b', ur'\1onché'),
                (u"\\b([nN])onsochè\\b", ur"\1onsoché"),
                (u'\\b([oO])ltrechè\\b', ur'\1ltreché'),
                (u"\\b([oO])ndechè\\b", ur"\1ndeché"),
                (u'\\b([pP])erchè\\b', ur'\1erché'),
                (u'\\b([pP])erciocchè\\b', ur'\1erciocché'),
                (u'\\b([pP])erlochè\\b', ur'\1erloché'),
                (u'\\b([pP])erocchè\\b', ur'\1erocché'),
                (u'\\b([pP])oichè\\b', ur'\1oiché'),
                (u"\\b([pP])osciachè\\b", ur"\1osciaché"),
                (u'\\b([pP])ressochè\\b', ur'\1ressoché'),
                (u'\\b([pP])urchè\\b', ur'\1urché'),
                (u"\\b([qQ])uantochè\\b", ur"\1uantoché"),
                (u"\\b([qQ])uasichè\\b", ur"\1uasiché"),
                (u"\\b([sS])econdochè\\b", ur"\1econdoché"),
                (u'\\b([sS])ennonchè\\b', ur'\1ennonché'),
                (u'\\b([sS])enonchè\\b', ur'\1enonché'),
                (u'\\b([sS])icchè\\b', ur'\1icché'),
                (u'\\b([sS])inattantochè\\b', ur'\1inattantoché'),
                (u'\\b([sS])inchè\\b', ur'\1inché'),
                (u'\\b([sS])intantochè\\b', ur'\1intantoché'),
                (u"\\b([sS])tantechè\\b", ur"\1tanteché"),
                (u'\\b([tT])alchè\\b', ur'\1alché'),
                (u"\\b([tT])almentechè\\b", ur"\1almenteché"),
                (u'\\b([tT])antochè\\b', ur'\1antoché'),
                (u"\\b([tT])rannechè\\b", ur"\1ranneché"),
                (u'\\b([tT])uttochè\\b', ur'\1uttoché'),
                #passati remoti
                (u"\\b([aA])bbattè\\b", ur"\1bbatté"),
                (u"\\b([aA])ccedè\\b", ur"\1ccedé"),
                (u"\\b([aA])ddissè\\b", ur"\1ddissé"),
                (u"\\b([aA])dempiè\\b", ur"\1dempié"),
                (u"\\b([aA])nnettè\\b", ur"\1nnetté"),
                (u"\\b([aA])ntiprendè\\b", ur"\1ntiprendé"),
                (u"\\b([aA])ppartenè\\b", ur"\1ppartené"),
                (u"\\b([aA])ppendè\\b", ur"\1ppendé"),
                (u"\\b([aA])pprendè\\b", ur"\1pprendé"),
                (u"\\b([aA])rrendè\\b", ur"\1rrendé"),
                (u"\\b([aA])rrompè\\b", ur"\1rrompé"),
                (u"\\b([aA])ssistè\\b", ur"\1ssisté"),
                (u"\\b([aA])ssolvè\\b", ur"\1ssolvé"),
                (u"\\b([aA])stenè\\b", ur"\1stené"),
                (u"\\b([aA])ttenè\\b", ur"\1ttené"),
                (u"\\b([aA])vvedè\\b", ur"\1vvedé"),
                (u"\\b([bB])attè\\b", ur"\1atté"),
                (u"\\b([cC])edè\\b", ur"\1edé"),
                (u"\\b([cC])ernè\\b", ur"\1erné"),
                (u"\\b([cC])hiedè\\b", ur"\1hiedé"),
                (u"\\b([cC])ombattè\\b", ur"\1ombatté"),
                (u"\\b([cC])ompetè\\b", ur"\1ompeté"),
                (u"\\b([cC])ompiè\\b", ur"\1ompié"),
                (u"\\b([cC])omprendè\\b", ur"\1omprendé"),
                (u"\\b([cC])oncedè\\b", ur"\1oncedé"),
                (u"\\b([cC])oncernè\\b", ur"\1oncerné"),
                (u"\\b([cC])onnettè\\b", ur"\1onnetté"),
                (u"\\b([cC])onsistè\\b", ur"\1onsisté"),
                (u"\\b([cC])ontenè\\b", ur"\1ontené"),
                (u"\\b([cC])ontrobattè\\b", ur"\1ontrobatté"),
                (u"\\b([cC])onvedè\\b", ur"\1onvedé"),
                (u"\\b([cC])onvergè\\b", ur"\1onvergé"),
                (u"\\b([cC])onvivè\\b", ur"\1onvivé"),
                (u"\\b([cC])orrompè\\b", ur"\1orrompé"),
                (u"\\b([cC])redè\\b", ur"\1redé"),
                (u"\\b([dD])ecedè\\b", ur"\1ecedé"),
                (u"\\b([dD])eflettè\\b", ur"\1efletté"),
                (u"\\b([dD])elinquè\\b", ur"\1elinqué"),
                (u"\\b([dD])esistè\\b", ur"\1esisté"),
                (u"\\b([dD])etenè\\b", ur"\1etené"),
                (u"\\b([dD])evolvè\\b", ur"\1evolvé"),
                (u"\\b([dD])ibattè\\b", ur"\1ibatté"),
                (u"\\b([dD])ipendè\\b", ur"\1ipendé"),
                (u"\\b([dD])iprendè\\b", ur"\1iprendé"),
                (u"\\b([dD])irimè\\b", ur"\1irimé"),
                (u"\\b([dD])iscernè\\b", ur"\1iscerné"),
                (u"\\b([dD])isottenè\\b", ur"\1isottené"),
                (u"\\b([dD])isperdè\\b", ur"\1isperdé"),
                (u"\\b([dD])isplendè\\b", ur"\1isplendé"),
                (u"\\b([dD])issolvè\\b", ur"\1issolvé"),
                (u"\\b([dD])issovvennè\\b", ur"\1issovvenné"),
                (u"\\b([dD])istemè\\b", ur"\1istemé"),
                (u"\\b([dD])isvolvè\\b", ur"\1isvolvé"),
                (u"\\b([eE])ccedè\\b", ur"\1ccedé"),
                (u"\\b([eE])lidè\\b", ur"\1lidé"),
                (u"\\b([eE])ludè\\b", ur"\1ludé"),
                (u"\\b([eE])rompè\\b", ur"\1rompé"),
                (u"\\b([eE])sigè\\b", ur"\1sigé"),
                (u"\\b([eE])simè\\b", ur"\1simé"),
                (u"\\b([eE])sistè\\b", ur"\1sisté"),
                (u"\\b([eE])spandè\\b", ur"\1spandé"),
                (u"\\b([eE])stroquè\\b", ur"\1stroqué"),
                (u"\\b([eE])volvè\\b", ur"\1volvé"),
                (u"\\b([fF])endè\\b", ur"\1endé"),
                (u"\\b([fF])ervè\\b", ur"\1ervé"),
                (u"\\b([fF])lettè\\b", ur"\1letté"),
                (u"\\b([fF])rapprendè\\b", ur"\1rapprendé"),
                (u"\\b([fF])remè\\b", ur"\1remé"),
                (u"\\b([gG])enuflettè\\b", ur"\1enufletté"),
                (u"\\b([iI])mbattè\\b", ur"\1mbatté"),
                (u"\\b([iI])mbevè\\b", ur"\1mbevé"),
                (u"\\b([iI])mpiè\\b", ur"\1mpié"),
                (u"\\b([iI])mprendè\\b", ur"\1mprendé"),
                (u"\\b([iI])ncedè\\b", ur"\1ncedé"),
                (u"\\b([iI])ncombè\\b", ur"\1ncombé"),
                (u"\\b([iI])nfremè\\b", ur"\1nfremé"),
                (u"\\b([iI])nsistè\\b", ur"\1nsisté"),
                (u"\\b([iI])ntercedè\\b", ur"\1ntercedé"),
                (u"\\b([iI])nterprendè\\b", ur"\1nterprendé"),
                (u"\\b([iI])nterrompè\\b", ur"\1nterrompé"),
                (u"\\b([iI])ntessè\\b", ur"\1ntessé"),
                (u"\\b([iI])ntraprendè\\b", ur"\1ntraprendé"),
                (u"\\b([iI])ntrarompè\\b", ur"\1ntrarompé"),
                (u"\\b([iI])ntratessè\\b", ur"\1ntratessé"),
                (u"\\b([iI])ntrattenè\\b", ur"\1ntrattené"),
                (u"\\b([iI])ntravedè\\b", ur"\1ntravedé"),
                (u"\\b([iI])ntroflettè\\b", ur"\1ntrofletté"),
                (u"\\b([iI])rrompè\\b", ur"\1rrompé"),
                (u"\\b([mM])antenè\\b", ur"\1antené"),
                (u"\\b([mM])ietè\\b", ur"\1ieté"),
                (u"\\b([oO])ttenè\\b", ur"\1ttené"),
                (u"\\b([pP])endè\\b", ur"\1endé"),
                (u"\\b([pP])erdè\\b", ur"\1erdé"),
                (u"\\b([pP])ersistè\\b", ur"\1ersisté"),
                (u"\\b([pP])iovè\\b", ur"\1iové"),
                (u"\\b([pP])ossedè\\b", ur"\1ossedé"),
                (u"\\b([pP])otè\\b", ur"\1oté"),
                (u"\\b([pP])recedè\\b", ur"\1recedé"),
                (u"\\b([pP])reesistè\\b", ur"\1reesisté"),
                (u"\\b([pP])remè\\b", ur"\1remé"),
                (u"\\b([pP])rendè\\b", ur"\1rendé"),
                (u"\\b([pP])rescindè\\b", ur"\1rescindé"),
                (u"\\b([pP])resiedè\\b", ur"\1resiedé"),
                (u"\\b([pP])revedè\\b", ur"\1revedé"),
                (u"\\b([pP])rocedè\\b", ur"\1rocedé"),
                (u"\\b([pP])ropendè\\b", ur"\1ropendé"),
                (u"\\b([pP])rorompè\\b", ur"\1rorompé"),
                (u"\\b([pP])rovolvè\\b", ur"\1rovolvé"),
                (u"\\b([rR])apprendè\\b", ur"\1apprendé"),
                (u"\\b([rR])attenè\\b", ur"\1attené"),
                (u"\\b([rR])avvedè\\b", ur"\1avvedé"),
                (u"\\b([rR])ecedè\\b", ur"\1ecedé"),
                (u"\\b([rR])edigè\\b", ur"\1edigé"),
                (u"\\b([rR])endè\\b", ur"\1endé"),
                (u"\\b([rR])esistè\\b", ur"\1esisté"),
                (u"\\b([rR])etrocedè\\b", ur"\1etrocedé"),
                (u"\\b([rR])iannettè\\b", ur"\1iannetté"),
                (u"\\b([rR])ibattè\\b", ur"\1ibatté"),
                (u"\\b([rR])icedè\\b", ur"\1icedé"),
                (u"\\b([rR])icevè\\b", ur"\1icevé"),
                (u"\\b([rR])ichiedè\\b", ur"\1ichiedé"),
                (u"\\b([rR])iconnettè\\b", ur"\1iconnetté"),
                (u"\\b([rR])iconverrè\\b", ur"\1iconverré"),
                (u"\\b([rR])icredè\\b", ur"\1icredé"),
                (u"\\b([rR])iedè\\b", ur"\1iedé"),
                (u"\\b([rR])iempiè\\b", ur"\1iempié"),
                (u"\\b([rR])iflettè\\b", ur"\1ifletté"),
                (u"\\b([rR])ingodè\\b", ur"\1ingodé"),
                (u"\\b([rR])ipentè\\b", ur"\1ipenté"),
                (u"\\b([rR])ipetè\\b", ur"\1ipeté"),
                (u"\\b([rR])iprendè\\b", ur"\1iprendé"),
                (u"\\b([rR])isedè\\b", ur"\1isedé"),
                (u"\\b([rR])isiedè\\b", ur"\1isiedé"),
                (u"\\b([rR])isolvè\\b", ur"\1isolvé"),
                (u"\\b([rR])isplendè\\b", ur"\1isplendé"),
                (u"\\b([rR])itenè\\b", ur"\1itené"),
                (u"\\b([rR])ivedè\\b", ur"\1ivedé"),
                (u"\\b([rR])ivendè\\b", ur"\1ivendé"),
                (u"\\b([rR])ivivè\\b", ur"\1ivivé"),
                (u"\\b([rR])ompè\\b", ur"\1ompé"),
                (u"\\b([sS])battè\\b", ur"\1batté"),
                (u"\\b([sS])candè\\b", ur"\1candé"),
                (u"\\b([sS])cernè\\b", ur"\1cerné"),
                (u"\\b([sS])connettè\\b", ur"\1connetté"),
                (u"\\b([sS])ecernè\\b", ur"\1ecerné"),
                (u"\\b([sS])fottè\\b", ur"\1fotté"),
                (u"\\b([sS])occombè\\b", ur"\1occombé"),
                (u"\\b([sS])oprassedè\\b", ur"\1oprassedé"),
                (u"\\b([sS])opravvivè\\b", ur"\1opravvivé"),
                (u"\\b([sS])orprendè\\b", ur"\1orprendé"),
                (u"\\b([sS])ostenè\\b", ur"\1ostené"),
                (u"\\b([sS])pandè\\b", ur"\1pandé"),
                (u"\\b([sS])perdè\\b", ur"\1perdé"),
                (u"\\b([sS])plendè\\b", ur"\1plendé"),
                (u"\\b([sS])premè\\b", ur"\1premé"),
                (u"\\b([sS])ternè\\b", ur"\1terné"),
                (u"\\b([sS])trafottè\\b", ur"\1trafotté"),
                (u"\\b([sS])travedè\\b", ur"\1travedé"),
                (u"\\b([sS])tridè\\b", ur"\1tridé"),
                (u"\\b([tT])emè\\b", ur"\1emé"),
                (u"\\b([tT])enè\\b", ur"\1ené"),
                (u"\\b([tT])essè\\b", ur"\1essé"),
                (u"\\b([tT])ralucè\\b", ur"\1ralucé"),
                (u"\\b([tT])ransigè\\b", ur"\1ransigé"),
                (u"\\b([tT])rattenè\\b", ur"\1rattené"),
                (u"\\b([tT])ravedè\\b", ur"\1ravedé"),
                (u"\\b([vV])edè\\b", ur"\1edé"),
                (u"\\b([vV])endè\\b", ur"\1endé"),
                (u"\\b([vV])ertè\\b", ur"\1erté"),
                #Termini d'origine francese (ed italiani come caffè)
                (u'\\b([aA])ntirè\\b', ur'\1ntiré'),
                (u'\\b([aA])utodafè\\b', ur'\1utodafé'),
                (u'\\b([cC])annetè\\b', ur'\1anneté'),
                (u'\\b([cC])apitonnè\\b', ur'\1apitonné'),
                (u'\\b([cC])lichè\\b', ur'\1liché'),
                (u'\\b([cC])loisonnè\\b', ur'\1loisonné'),
                (u'\\b([cC])onsommè\\b', ur'\1onsommé'),
                (u"\\b([cC])impanzè\\b", ur"\1impanzé"),
                (u'\\b([cC])oupè\\b', ur'\1oupé'),
                (u'\\b([cC])raquelè\\b', ur'\1raquelé'),
                (u'\\b([dD])ecolletè\\b', ur'\1ecolleté'),
                (u'\\b([dD])écolletè\\b', ur'\1écolleté'),
                (u'\\b([dD])efilè\\b', ur'\1efilé'),
                (u'\\b([dD])éfilè\\b', ur'\1éfilé'),
                (u'\\b([dD])egagè\\b', ur'\1egagé'),
                (u'\\b([dD])égagè\\b', ur'\1égagé'),
                (u'\\b([dD])elavè\\b', ur'\1elavé'),
                (u'\\b([dD])élavè\\b', ur'\1élavé'),
                (u'\\b([dD])emodè\\b', ur'\1emodé'),
                (u'\\b([dD])émodè\\b', ur'\1émodé'),
                (u'\\b([dD])eracinè\\b', ur'\1eraciné'),
                (u'\\b([dD])éracinè\\b', ur'\1éraciné'),
                (u'\\b([dD])eshabillè\\b', ur'\1eshabillé'),
                (u'\\b([dD])éshabillè\\b', ur'\1éshabillé'),
                (u'\\b([eE])cartè\\b', ur'\1carté'),
                (u'\\b([eE])nfant gƒtè\\b', ur'\1nfant gƒté'),
                (u'\\b([eE])ngagè\\b', ur'\1ngagé'),
                (u'\\b([fF])lambè\\b', ur'\1lambé'),
                (u'\\b([fF])oncè\\b', ur'\1oncé'),
                (u'\\b([fF])risè\\b', ur'\1risé'),
                (u'\\b([gG])aufrè\\b', ur'\1aufré'),
                (u'\\b([gG])lacè\\b', ur'\1lacé'),
                (u"\\b([gG])ranmercè\\b", ur"\1ranmercé"),
                (u'\\b([hH])abituè\\b', ur'\1abitué'),
                (u'\\b([hH])ôtel meublè\\b', ur'\1ôtel meublé'),
                (u'\\b([iI])mprimè\\b', ur'\1mprimé'),
                (u'\\b([iI])nterrè\\b', ur'\1nterré'),
                (u'\\b([kK])aritè\\b', ur'\1arité'),
                (u'\\b([mM])arron glacè\\b', ur'\1arron glacé'),
                (u'\\b([mM])atelassè\\b', ur'\1atelassé'),
                (u'\\bmercè\\b', ur'mercé'),
                (u'\\b([mM])erzè\\b', ur'\1erzé'),
                (u'\\b([mM])eublè\\b', ur'\1eublé'),
                (u'\\b([mM])oirè\\b', ur'\1oiré'),
                (u'\\b([mM])oulinè\\b', ur'\1ouliné'),
                (u'\\b([nN])egligè\\b', ur'\1egligé'),
                (u'\\b([nN])égligè\\b', ur'\1égligé'),
                (u"\\b([nN])ontiscordardimè\\b", ur"\1ontiscordardimé"),
                (u'\\b([pP])ancarrè\\b', ur'\1ancarré'),
                (u'\\b([pP])âtè\\b', ur'\1âté'),
                (u'\\b([sS])aint-honorè\\b', ur'\1aint-honoré'),
                (u'\\b([sS])cimpanz[eè]\\b', ur'\1cimpanzé'),
                (u'\\b([sS])eparè\\b', ur'\1eparé'),
                (u'\\b([sS])oufflè\\b', ur'\1oufflé'),
                (u'\\b([tT])amurè\\b', ur'\1amuré'),
                (u'\\b([tT])rentatrè\\b', ur'\1rentatré'),
                (u'\\b([tT])ruffè\\b', ur'\1ruffé'),
                (u'\\b([vV])arietè\\b', ur'\1arieté'),
                (u'\\b([vV])ariétè\\b', ur'\1ariété'),
                (u'\\b([vV])icerè\\b', ur'\1iceré'),
                (u'\\b([vV])entitrè\\b', ur'\1entitré'),
                (u'\\b([aA])himé\\b', ur'\1himè'),
                (u'\\b([aA])mmazzacaffé\\b', ur'\1mmazzacaffè'),
                (u'\\b([aA])ppié\\b', ur'\1ppiè'),
                (u'\\b([bB])igné\\b', ur'\1ignè'),
                (u'\\b([bB])uffé\\b', ur'\1uffè'),
                (u'\\b([cC])abaré\\b', ur'\1abarè'),
                (u'\\b([cC])abriolé\\b', ur'\1abriolè'),
                (u'\\b([cC])anapé\\b', ur'\1anapè'),
                (u'\\b([cC])arcadé\\b', ur'\1arcadè'),
                (u'\\b([cC])hedivé\\b', ur'\1hedivè'),
                (u'\\b([cC])ioé\\b', ur'\1ioè'),
                (u'\\b([cC])occodé\\b', ur'\1occodè'),
                (u'\\b([cC])ontrobuffé\\b', ur'\1ontrobuffè'),
                (u'\\b([cC])orvé\\b', ur'\1orvè'),
                (u'\\b([cC])roscé\\b', ur'\1roscè'),
                (u'\\b([cC])upé\\b', ur'\1upè'),
                (u'\\b([dD])appié\\b', ur'\1appiè'),
                (u'\\b([dD])osacaffé\\b', ur'\1osacaffè'),
                (u'\\b([eE])uhoé\\b', ur'\1uhoè'),
                (u'\\b([fF])orfé\\b', ur'\1orfè'),
                (u'\\b([kK])arkadé\\b', ur'\1arkadè'),
                (u'\\b([kK])edivé\\b', ur'\1edivè'),
                (u'\\b([lL])acché\\b', ur'\1acchè'),
                (u'\\b([mM])acinacaffé\\b', ur'\1acinacaffè'),
                (u'\\b([mM])acramé\\b', ur'\1acramè'),
                (u'\\b([mM])ordoré\\b', ur'\1ordorè'),
                (u'\\b([mM])usmé\\b', ur'\1usmè'),
                (u'\\b([nN])arghilé\\b', ur'\1arghilè'),
                (u'\\b([pP])arché\\b', ur'\1archè'),
                (u'\\b([pP])uré\\b', ur'\1urè'),
                (u'\\b([rR])adiorelé\\b', ur'\1adiorelè'),
                (u'\\b([rR])amié\\b', ur'\1amiè'),
                (u'\\b([sS])ufflé\\b', ur'\1ufflè'),
                (u'\\b([tT])oppé\\b', ur'\1oppè'),
                (u'\\b([tT])ostacaffé\\b', ur'\1ostacaffè'),
                (u'\\b([tT])uppé\\b', ur'\1uppè'),
                (u'\\b([vV])ahiné\\b', ur'\1ahinè'),
                (u'\\bGiosué\\b', ur'Giosuè'),
                (u'\\bMosé\\b', ur'Mosè'),
                # Altre sostituzioni
                (u"(?m)(== ?[Ll]Collegamenti Esterni ?==)", ur"== Collegamenti esterni =="),
                (u"(?m)(== ?[Ll]ink [Ee]sterni ?==)", ur"== Collegamenti esterni =="),
                (u"(?m)(== ?[Vv]edi [Aa]nche ?==)", ur"== Voci correlate =="),
            ]
    },
}

class XmlDumpReplacePageGenerator:
    """
    Generator which will yield Pages to pages that might contain text to
    replace. These pages will be retrieved from a local XML dump file
    (cur table).
    """
    def __init__(self, xmlFilename, replacements, exceptions):
        """
        Arguments:
            * xmlFilename  - The dump's path, either absolute or relative
            * replacements - A list of 2-tuples of original text (as a compiled
                             regular expression) and replacement text (as a
                             string).
            * exceptions   - A list of compiled regular expression; pages which
                             contain text that matches one of these won't be
                             changed.
        """

        self.xmlFilename = xmlFilename
        self.replacements = replacements
        self.exceptions = exceptions
    
    def __iter__(self):
        import xmlreader
        mysite = wikipedia.getSite()
        dump = xmlreader.XmlDump(self.xmlFilename)
        for entry in dump.parse():
            skip_page = False
            for exception in self.exceptions:
                if exception.search(entry.text):
                    skip_page = True
                    break
            if not skip_page:
                # TODO: leave out pages that only have old inside nowiki, comments, math
                for old, new in self.replacements:
                    if old.search(entry.text):
                        yield wikipedia.Page(mysite, entry.title)
                        break
    

class ReplaceRobot:
    """
    A bot that can do text replacements.
    """
    def __init__(self, generator, replacements, exceptions = [], acceptall = False):
        """
        Arguments:
            * generator    - A generator that yields Page objects.
            * replacements - A list of 2-tuples of original text (as a compiled
                             regular expression) and replacement text (as a 
                             string).
            * exceptions   - A list of compiled regular expression; pages which
                             contain text that matches one of these won't be
                             changed.
            * acceptall    - If True, the user won't be prompted before changes
                             are made.
        """
        self.generator = generator
        self.replacements = replacements
        self.exceptions = exceptions
        self.acceptall = acceptall

    def checkExceptions(self, original_text):
        """
        If one of the exceptions applies for the given text, returns the 
        substring which matches the exception. Otherwise it returns None.
        """
        for exception in self.exceptions:
            hit = exception.search(original_text)
            if hit:
                return hit.group(0)
        return None

    def doReplacements(self, original_text):
        """
        Returns the text which is generated by applying all replacements to the
        given text.
        """
        new_text = original_text
        for old, new in self.replacements:
            new_text = wikipedia.replaceExceptMathNowikiAndComments(new_text, old, new)
        return new_text
        
    def run(self):
        """
        Starts the robot.
        """
        # Run the generator which will yield Pages which might need to be
        # changed.
        for page in self.generator:
            try:
                # Load the page's text from the wiki
                original_text = page.get()
                if not page.canBeEdited():
                    wikipedia.output(u'Skipping locked page %s' % page.title())
                    continue
            except wikipedia.NoPage:
                wikipedia.output(u'Page %s not found' % page.title())
                continue
            except wikipedia.IsRedirectPage:
                original_text = page.get(get_redirect=True)
            match = self.checkExceptions(original_text)
            # skip all pages that contain certain texts
            if match:
                wikipedia.output(u'Skipping %s because it contains %s' % (page.title(), match))
            else:
                new_text = self.doReplacements(original_text)
                if new_text == original_text:
                    wikipedia.output('No changes were necessary in %s' % page.title())
                else:
                    wikipedia.output(u'\n>>> %s <<<' % page.title())
                    wikipedia.showDiff(original_text, new_text)
                    if not self.acceptall:
                        choice = wikipedia.inputChoice(u'Do you want to accept these changes?',  ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
                        if choice in ['a', 'A']:
                            self.acceptall = True
                    if self.acceptall or choice in ['y', 'Y']:
                        page.put(new_text)

def prepareRegexForMySQL(pattern):
    pattern = pattern.replace('\s', '[:space:]')
    pattern = pattern.replace('\d', '[:digit:]')
    pattern = pattern.replace('\w', '[:alnum:]')
    
    pattern = pattern.replace("'", "\\" + "'")
    #pattern = pattern.replace('\\', '\\\\')
    #for char in ['[', ']', "'"]:
    #    pattern = pattern.replace(char, '\%s' % char)
    return pattern
    
                        
def main():
    gen = None
    # How we want to retrieve information on which pages need to be changed.
    # Can either be 'xmldump', 'textfile' or 'userinput'.
    source = None
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = []
    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fix = None
    # the dump's path, either absolute or relative, which will be used when source
    # is 'xmldump'.
    xmlFilename = None
    useSql = False
    # the textfile's path, either absolute or relative, which will be used when
    # source is 'textfile'.
    textfilename = None
    # the category name which will be used when source is 'category'.
    categoryname = None
    # pages which will be processed when the -page parameter is used
    PageTitles = []
    # a page whose referrers will be processed when the -ref parameter is used
    referredPageTitle = None
    # a page whose links will be processed when the -links parameter is used
    linkingPageTitle = None
    # will become True when the user presses a ('yes to all') or uses the -always
    # commandline paramater.
    acceptall = False
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Which page to start
    startpage = None
    # Google query
    googleQuery = None
    # Load default summary message.
    wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg))

    # Read commandline parameters.
    for arg in wikipedia.handleArgs():
        if arg == '-regex':
            regex = True
        elif arg.startswith('-file'):
            if len(arg) >= 6:
                textfilename = arg[6:]
            gen = pagegenerators.TextfilePageGenerator(textfilename)
        elif arg.startswith('-cat'):
            if len(arg) == 4:
                categoryname = wikipedia.input(u'Please enter the category name:')
            else:
                categoryname = arg[5:]
            cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % categoryname)
            gen = pagegenerators.CategorizedPageGenerator(cat)
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = wikipedia.input(u'Please enter the XML dump\'s filename:')
            else:
                xmlFilename = arg[5:]
        elif arg =='-sql':
            useSql = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(wikipedia.input(u'Which page do you want to chage?'))
            else:
                PageTitles.append(arg[6:])
            source = 'specificPages'
        elif arg.startswith('-ref'):
            if len(arg) == 4:
                referredPageTitle = wikipedia.input(u'Links to which page should be processed?')
            else:
                referredPageTitle = arg[5:]
            referredPage = wikipedia.Page(wikipedia.getSite(), referredPageTitle)
            gen = pagegenerators.ReferringPageGenerator(referredPage)
        elif arg.startswith('-links'):
            if len(arg) == 6:
                linkingPageTitle = wikipedia.input(u'Links from which page should be processed?')
            else:
                linkingPageTitle = arg[7:]
            linkingPage = wikipedia.Page(wikipedia.getSite(), linkingPageTitle)
            gen = pagegenerators.LinkedPageGenerator(linkingPage)
        elif arg.startswith('-start'):
            if len(arg) == 6:
                firstPageTitle = wikipedia.input(u'Which page do you want to chage?')
            else:
                firstPageTitle = arg[7:]
            namespace = wikipedia.Page(wikipedia.getSite(), firstPageTitle).namespace()
            gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace)
        elif arg.startswith('-google'):
            if len(arg) >= 8:
                googleQuery = arg[8:]
            gen = pagegenerators.GoogleSearchPageGenerator(googleQuery)
        elif arg.startswith('-except:'):
            exceptions.append(arg[8:])
        elif arg.startswith('-fix:'):
            fix = arg[5:]
        elif arg == '-always':
            acceptall = True
        elif arg.startswith('-namespace:'):
            namespaces.append(int(arg[11:]))
        else:
            commandline_replacements.append(arg)

    if (len(commandline_replacements) == 2 and fix == None):
        replacements.append((commandline_replacements[0], commandline_replacements[1]))
        wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg ) % ' (-' + commandline_replacements[0] + ' +' + commandline_replacements[1] + ')')
    elif fix == None:
        old = wikipedia.input(u'Please enter the text that should be replaced:')
        new = wikipedia.input(u'Please enter the new text:')
        change = '(-' + old + ' +' + new
        replacements.append((old, new))
        while True:
            old = wikipedia.input(u'Please enter another text that should be replaced, or press Enter to start:')
            if old == '':
                change = change + ')'
                break
            new = wikipedia.input(u'Please enter the new text:')
            change = change + ' & -' + old + ' +' + new
            replacements.append((old, new))
        default_summary_message =  wikipedia.translate(wikipedia.getSite(), msg) % change
        wikipedia.output(u'The summary message will default to: %s' % default_summary_message)
        summary_message = wikipedia.input(u'Press Enter to use this default message, or enter a description of the changes your bot will make:')
        if summary_message == '':
            summary_message = default_summary_message
        wikipedia.setAction(summary_message)
    else:
        # Perform one of the predefined actions.
        try:
            fix = fixes[fix]
        except KeyError:
            wikipedia.output(u'Available predefined fixes are: %s' % fixes.keys())
            wikipedia.stopme()
            sys.exit()
        if fix.has_key('regex'):
            regex = fix['regex']
        if fix.has_key('msg'):
            wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), fix['msg']))
        if fix.has_key('exceptions'):
            exceptions = fix['exceptions']
        replacements = fix['replacements']

    
    # already compile all regular expressions here to save time later
    for i in range(len(replacements)):
        old, new = replacements[i]
        if not regex:
            old = re.escape(old)
        oldR = re.compile(old, re.UNICODE)
        replacements[i] = oldR, new
    for i in range(len(exceptions)):
        exception = exceptions[i]
        if not regex:
            exception = re.escape(exception)
        exceptionR = re.compile(exception, re.UNICODE)
        exceptions[i] = exceptionR
    
    if xmlFilename:
        gen = XmlDumpReplacePageGenerator(xmlFilename, replacements, exceptions)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join(["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern) for (old, new) in replacements]) 
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join(["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)

    elif PageTitles:
        pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles]
        gen = iter(pages)

    if not gen:
        # syntax error, show help text from the top of this file
        wikipedia.output(__doc__, 'utf-8')
        wikipedia.stopme()
        sys.exit()
    if namespaces != []:
        gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20)
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall)
    bot.run()


if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy