Skip to content

Commit 7cb6dca

Browse files
rthManishearth
authored andcommitted
MAINT Fixes for Python scripts (#54)
* Fixes to python scripts * Update src/testdata.rs
1 parent c7a6b6f commit 7cb6dca

File tree

3 files changed

+23
-23
lines changed

3 files changed

+23
-23
lines changed

scripts/unicode.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# Since this should not require frequent updates, we just store this
2121
# out-of-line and check the unicode.rs file into git.
2222

23-
import fileinput, re, os, sys, operator
23+
import fileinput, re, os, sys
2424

2525
preamble = '''// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
2626
// file at the top-level directory of this distribution and at
@@ -59,7 +59,7 @@ def is_surrogate(n):
5959

6060
def fetch(f):
6161
if not os.path.exists(os.path.basename(f)):
62-
os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
62+
os.system("curl -O http://www.unicode.org/Public/9.0.0/ucd/%s"
6363
% f)
6464

6565
if not os.path.exists(os.path.basename(f)):
@@ -80,7 +80,7 @@ def load_gencats(f):
8080
if is_surrogate(cp):
8181
continue
8282
if range_start >= 0:
83-
for i in xrange(range_start, cp):
83+
for i in range(range_start, cp):
8484
udict[i] = data;
8585
range_start = -1;
8686
if data[1].endswith(", First>"):
@@ -150,8 +150,8 @@ def format_table_content(f, content, indent):
150150
def load_properties(f, interestingprops):
151151
fetch(f)
152152
props = {}
153-
re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)")
154-
re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
153+
re1 = re.compile(r"^ *([0-9A-F]+) *; *(\w+)")
154+
re2 = re.compile(r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
155155

156156
for line in fileinput.input(os.path.basename(f)):
157157
prop = None
@@ -309,7 +309,7 @@ def emit_break_module(f, break_table, break_cats, name):
309309
# download and parse all the data
310310
fetch("ReadMe.txt")
311311
with open("ReadMe.txt") as readme:
312-
pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
312+
pattern = r"for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
313313
unicode_version = re.search(pattern, readme.read()).groups()
314314
rf.write("""
315315
/// The version of [Unicode](http://www.unicode.org/)
@@ -342,19 +342,19 @@ def emit_break_module(f, break_table, break_cats, name):
342342
for cat in grapheme_cats:
343343
grapheme_table.extend([(x, y, cat) for (x, y) in grapheme_cats[cat]])
344344
grapheme_table.sort(key=lambda w: w[0])
345-
emit_break_module(rf, grapheme_table, grapheme_cats.keys(), "grapheme")
345+
emit_break_module(rf, grapheme_table, list(grapheme_cats.keys()), "grapheme")
346346
rf.write("\n")
347347

348348
word_cats = load_properties("auxiliary/WordBreakProperty.txt", [])
349349
word_table = []
350350
for cat in word_cats:
351351
word_table.extend([(x, y, cat) for (x, y) in word_cats[cat]])
352352
word_table.sort(key=lambda w: w[0])
353-
emit_break_module(rf, word_table, word_cats.keys(), "word")
353+
emit_break_module(rf, word_table, list(word_cats.keys()), "word")
354354

355355
sentence_cats = load_properties("auxiliary/SentenceBreakProperty.txt", [])
356356
sentence_table = []
357357
for cat in sentence_cats:
358358
sentence_table.extend([(x, y, cat) for (x, y) in sentence_cats[cat]])
359359
sentence_table.sort(key=lambda w: w[0])
360-
emit_break_module(rf, sentence_table, sentence_cats.keys(), "sentence")
360+
emit_break_module(rf, sentence_table, list(sentence_cats.keys()), "sentence")

scripts/unicode_gen_breaktests.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,23 @@
1717
#
1818
# Since this should not require frequent updates, we just store this
1919
# out-of-line and check the unicode.rs file into git.
20+
from __future__ import print_function
2021

2122
import unicode, re, os, fileinput
2223

2324
def load_test_data(f, optsplit=[]):
24-
outls = []
25-
testRe1 = re.compile("^÷\s+([^\s].*[^\s])\s+÷\s+#\s+÷\s+\[0.2\].*?([÷×].*)\s+÷\s+\[0.3\]\s*$")
25+
testRe1 = re.compile(r"^÷\s+([^\s].*[^\s])\s+÷\s+#\s+÷\s+\[0.2\].*?([÷×].*)\s+÷\s+\[0.3\]\s*$")
2626

2727
unicode.fetch(f)
2828
data = []
2929
for line in fileinput.input(os.path.basename(f)):
3030
# lines that include a test start with the ÷ character
31-
if len(line) < 2 or line[0:2] != '÷':
31+
if len(line) < 2 or not line.startswith('÷'):
3232
continue
3333

3434
m = testRe1.match(line)
3535
if not m:
36-
print "error: no match on line where test was expected: %s" % line
36+
print("error: no match on line where test was expected: %s" % line)
3737
continue
3838

3939
# process the characters in this test case
@@ -48,9 +48,9 @@ def load_test_data(f, optsplit=[]):
4848
# make sure that we have break info for each break!
4949
assert len(chars) - 1 == len(info)
5050

51-
outls.append((chars, info))
51+
data.append((chars, info))
5252

53-
return outls
53+
return data
5454

5555
def process_split_info(s, c, o):
5656
outcs = []
@@ -59,7 +59,7 @@ def process_split_info(s, c, o):
5959

6060
# are we on a × or a ÷?
6161
isX = False
62-
if s[0:2] == '×':
62+
if s.startswith('×'):
6363
isX = True
6464

6565
# find each instance of '(÷|×) [x.y] '
@@ -81,10 +81,10 @@ def process_split_info(s, c, o):
8181

8282
idx = 1
8383
while idx < len(s):
84-
if s[idx:idx+2] == '×':
84+
if s[idx:].startswith('×'):
8585
isX = True
8686
break
87-
if s[idx:idx+2] == '÷':
87+
if s[idx:].startswith('÷'):
8888
isX = False
8989
break
9090
idx += 1
@@ -172,7 +172,7 @@ def create_grapheme_data(f):
172172
stype = "&'static [(&'static str, &'static [&'static str])]"
173173
dtype = "&'static [(&'static str, &'static [&'static str], &'static [&'static str])]"
174174
f.write(" // official Unicode test data\n")
175-
f.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakTest.txt\n")
175+
f.write(" // http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakTest.txt\n")
176176
unicode.emit_table(f, "TEST_SAME", test_same, stype, True, showfun, True)
177177
unicode.emit_table(f, "TEST_DIFF", test_diff, dtype, True, showfun, True)
178178

@@ -187,7 +187,7 @@ def create_words_data(f):
187187

188188
wtype = "&'static [(&'static str, &'static [&'static str])]"
189189
f.write(" // official Unicode test data\n")
190-
f.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt\n")
190+
f.write(" // http://www.unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakTest.txt\n")
191191
unicode.emit_table(f, "TEST_WORD", test, wtype, True, showfun, True)
192192

193193
def create_sentence_data(f):

src/testdata.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
1+
// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
22
// file at the top-level directory of this distribution and at
33
// http://rust-lang.org/COPYRIGHT.
44
//
@@ -12,7 +12,7 @@
1212

1313
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
1414
// official Unicode test data
15-
// http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakTest.txt
15+
// http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakTest.txt
1616
pub const TEST_SAME: &'static [(&'static str, &'static [&'static str])] = &[
1717
("\u{20}\u{20}", &["\u{20}", "\u{20}"]), ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}",
1818
"\u{20}"]), ("\u{20}\u{d}", &["\u{20}", "\u{d}"]), ("\u{20}\u{308}\u{d}", &["\u{20}\u{308}",
@@ -516,7 +516,7 @@
516516
];
517517

518518
// official Unicode test data
519-
// http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt
519+
// http://www.unicode.org/Public/9.0.0/ucd/auxiliary/WordBreakTest.txt
520520
pub const TEST_WORD: &'static [(&'static str, &'static [&'static str])] = &[
521521
("\u{1}\u{1}", &["\u{1}", "\u{1}"]), ("\u{1}\u{308}\u{1}", &["\u{1}\u{308}", "\u{1}"]),
522522
("\u{1}\u{d}", &["\u{1}", "\u{d}"]), ("\u{1}\u{308}\u{d}", &["\u{1}\u{308}", "\u{d}"]),

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy