Skip to content

Commit ddf590b

Browse files
committed
pycodestyle (PEP 8) cleanup in Python scripts
These are mainly whitespace changes. I didn't fix "E501 line too long", which would require more significant surgery.
1 parent e80a7a1 commit ddf590b

File tree

2 files changed

+40
-24
lines changed

2 files changed

+40
-24
lines changed

contrib/unaccent/generate_unaccent_rules.py

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@
3838
# For now we are being conservative by including only Latin and Greek. This
3939
# could be extended in future based on feedback from people with relevant
4040
# language knowledge.
41-
PLAIN_LETTER_RANGES = ((ord('a'), ord('z')), # Latin lower case
42-
(ord('A'), ord('Z')), # Latin upper case
43-
(0x03b1, 0x03c9), # GREEK SMALL LETTER ALPHA, GREEK SMALL LETTER OMEGA
44-
(0x0391, 0x03a9)) # GREEK CAPITAL LETTER ALPHA, GREEK CAPITAL LETTER OMEGA
41+
PLAIN_LETTER_RANGES = ((ord('a'), ord('z')), # Latin lower case
42+
(ord('A'), ord('Z')), # Latin upper case
43+
(0x03b1, 0x03c9), # GREEK SMALL LETTER ALPHA, GREEK SMALL LETTER OMEGA
44+
(0x0391, 0x03a9)) # GREEK CAPITAL LETTER ALPHA, GREEK CAPITAL LETTER OMEGA
4545

4646
# Combining marks follow a "base" character, and result in a composite
4747
# character. Example: "U&'A\0300'"produces "À".There are three types of
@@ -51,9 +51,10 @@
5151
# https://en.wikipedia.org/wiki/Combining_character
5252
# https://www.unicode.org/charts/PDF/U0300.pdf
5353
# https://www.unicode.org/charts/PDF/U20D0.pdf
54-
COMBINING_MARK_RANGES = ((0x0300, 0x0362), # Mn: Accents, IPA
55-
(0x20dd, 0x20E0), # Me: Symbols
56-
(0x20e2, 0x20e4),) # Me: Screen, keycap, triangle
54+
COMBINING_MARK_RANGES = ((0x0300, 0x0362), # Mn: Accents, IPA
55+
(0x20dd, 0x20E0), # Me: Symbols
56+
(0x20e2, 0x20e4),) # Me: Screen, keycap, triangle
57+
5758

5859
def print_record(codepoint, letter):
5960
if letter:
@@ -63,12 +64,14 @@ def print_record(codepoint, letter):
6364

6465
print(output)
6566

67+
6668
class Codepoint:
6769
def __init__(self, id, general_category, combining_ids):
6870
self.id = id
6971
self.general_category = general_category
7072
self.combining_ids = combining_ids
7173

74+
7275
def is_mark_to_remove(codepoint):
7376
"""Return true if this is a combining mark to remove."""
7477
if not is_mark(codepoint):
@@ -79,17 +82,20 @@ def is_mark_to_remove(codepoint):
7982
return True
8083
return False
8184

85+
8286
def is_plain_letter(codepoint):
8387
"""Return true if codepoint represents a "plain letter"."""
8488
for begin, end in PLAIN_LETTER_RANGES:
85-
if codepoint.id >= begin and codepoint.id <= end:
86-
return True
89+
if codepoint.id >= begin and codepoint.id <= end:
90+
return True
8791
return False
8892

93+
8994
def is_mark(codepoint):
9095
"""Returns true for diacritical marks (combining codepoints)."""
9196
return codepoint.general_category in ("Mn", "Me", "Mc")
9297

98+
9399
def is_letter_with_marks(codepoint, table):
94100
"""Returns true for letters combined with one or more marks."""
95101
# See https://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values
@@ -105,16 +111,18 @@ def is_letter_with_marks(codepoint, table):
105111

106112
# Check if the base letter of this letter has marks.
107113
codepoint_base = codepoint.combining_ids[0]
108-
if (is_plain_letter(table[codepoint_base]) is False and \
109-
is_letter_with_marks(table[codepoint_base], table) is False):
114+
if is_plain_letter(table[codepoint_base]) is False and \
115+
is_letter_with_marks(table[codepoint_base], table) is False:
110116
return False
111117

112118
return True
113119

120+
114121
def is_letter(codepoint, table):
115122
"""Return true for letter with or without diacritical marks."""
116123
return is_plain_letter(codepoint) or is_letter_with_marks(codepoint, table)
117124

125+
118126
def get_plain_letter(codepoint, table):
119127
"""Return the base codepoint without marks. If this codepoint has more
120128
than one combining character, do a recursive lookup on the table to
@@ -133,15 +141,18 @@ def get_plain_letter(codepoint, table):
133141
# Should not come here
134142
assert(False)
135143

144+
136145
def is_ligature(codepoint, table):
137146
"""Return true for letters combined with letters."""
138147
return all(is_letter(table[i], table) for i in codepoint.combining_ids)
139148

149+
140150
def get_plain_letters(codepoint, table):
141151
"""Return a list of plain letters from a ligature."""
142152
assert(is_ligature(codepoint, table))
143153
return [get_plain_letter(table[id], table) for id in codepoint.combining_ids]
144154

155+
145156
def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
146157
"""Parse the XML file and return a set of tuples (src, trg), where "src"
147158
is the original character and "trg" the substitute."""
@@ -189,21 +200,23 @@ def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
189200

190201
return charactersSet
191202

203+
192204
def special_cases():
193205
"""Returns the special cases which are not handled by other methods"""
194206
charactersSet = set()
195207

196208
# Cyrillic
197-
charactersSet.add((0x0401, "\u0415")) # CYRILLIC CAPITAL LETTER IO
198-
charactersSet.add((0x0451, "\u0435")) # CYRILLIC SMALL LETTER IO
209+
charactersSet.add((0x0401, "\u0415")) # CYRILLIC CAPITAL LETTER IO
210+
charactersSet.add((0x0451, "\u0435")) # CYRILLIC SMALL LETTER IO
199211

200212
# Symbols of "Letterlike Symbols" Unicode Block (U+2100 to U+214F)
201-
charactersSet.add((0x2103, "\xb0C")) # DEGREE CELSIUS
202-
charactersSet.add((0x2109, "\xb0F")) # DEGREE FAHRENHEIT
203-
charactersSet.add((0x2117, "(P)")) # SOUND RECORDING COPYRIGHT
213+
charactersSet.add((0x2103, "\xb0C")) # DEGREE CELSIUS
214+
charactersSet.add((0x2109, "\xb0F")) # DEGREE FAHRENHEIT
215+
charactersSet.add((0x2117, "(P)")) # SOUND RECORDING COPYRIGHT
204216

205217
return charactersSet
206218

219+
207220
def main(args):
208221
# https://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings
209222
decomposition_type_pattern = re.compile(" *<[^>]*> *")
@@ -238,12 +251,12 @@ def main(args):
238251
len(codepoint.combining_ids) > 1:
239252
if is_letter_with_marks(codepoint, table):
240253
charactersSet.add((codepoint.id,
241-
chr(get_plain_letter(codepoint, table).id)))
254+
chr(get_plain_letter(codepoint, table).id)))
242255
elif args.noLigaturesExpansion is False and is_ligature(codepoint, table):
243256
charactersSet.add((codepoint.id,
244-
"".join(chr(combining_codepoint.id)
245-
for combining_codepoint \
246-
in get_plain_letters(codepoint, table))))
257+
"".join(chr(combining_codepoint.id)
258+
for combining_codepoint
259+
in get_plain_letters(codepoint, table))))
247260
elif is_mark_to_remove(codepoint):
248261
charactersSet.add((codepoint.id, None))
249262

@@ -258,6 +271,7 @@ def main(args):
258271
for characterPair in charactersList:
259272
print_record(characterPair[0], characterPair[1])
260273

274+
261275
if __name__ == "__main__":
262276
parser = argparse.ArgumentParser(description='This script builds unaccent.rules on standard output when given the contents of UnicodeData.txt and Latin-ASCII.xml given as arguments.')
263277
parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt.", type=str, required=True, dest='unicodeDataFilePath')

src/test/locale/sort-test.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
#! /usr/bin/env python
22

3-
import sys, string, locale
3+
import locale
4+
import sys
5+
46
locale.setlocale(locale.LC_ALL, "")
57

68
if len(sys.argv) != 2:
7-
sys.stderr.write("Usage: sort.py filename\n")
8-
sys.exit(1)
9+
sys.stderr.write("Usage: sort.py filename\n")
10+
sys.exit(1)
911

1012
infile = open(sys.argv[1], 'r')
1113
list = infile.readlines()
1214
infile.close()
1315

1416
for i in range(0, len(list)):
15-
list[i] = list[i][:-1] # chop!
17+
list[i] = list[i][:-1] # chop!
1618

1719
list.sort(key=locale.strxfrm)
1820
print('\n'.join(list))

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy