Skip to content

Commit 5fa3b63

Browse files
committed
added prettify function
1 parent 6ab8e65 commit 5fa3b63

File tree

3 files changed

+297
-0
lines changed

3 files changed

+297
-0
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Python String Utils Changelog
22

3+
## v0.3.0
4+
5+
### Added
6+
7+
- contains_html
8+
- strip_html
9+
310

411
## v0.2.0
512

string_utils.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
'uuid',
2626
'shuffle',
2727
'strip_html',
28+
'prettify',
2829
]
2930

3031
# compiled regex
@@ -69,6 +70,74 @@
6970
r'(<([a-z]+:)?[a-z]+[^>]*/?>|</([a-z]+:)?[a-z]+>|<!--.*-->|<!doctype.*>)',
7071
re.IGNORECASE | re.MULTILINE | re.DOTALL
7172
)
73+
PRETTIFY_RE = {
74+
# match repetitions of signs that should not be repeated (like multiple spaces or duplicated quotes)
75+
'DUPLICATES': re.compile(
76+
r'(\({2,}|\){2,}|\[{2,}|\]{2,}|\{{2,}|\}{2,}|:{2,}|,{2,}|;{2,}|\+{2,}|\-{2,}|\s{2,}|%{2,}|={2,}|"{2,}|\'{2,})',
77+
re.MULTILINE
78+
),
79+
# check that a sign cannot have a space before or missing a space after,
80+
# unless it is a dot or a comma, where numbers may follow (5.5 or 5,5 is ok)
81+
'RIGHT_SPACE': re.compile(
82+
r'('
83+
r'(?<=[^\s\d]),(?=[^\s\d])|\s,\s|\s,(?=[^\s\d])|\s,(?!.)|' # comma (,)
84+
r'(?<=[^\s\d])\.(?=[^\s\d])|\s\.\s|\s\.(?=[^\s\d])|\s\.(?!.)|' # dot (.)
85+
r'(?<=\S);(?=\S)|\s;\s|\s;(?=\S)|\s;(?!.)|' # semicolon (;)
86+
r'(?<=\S):(?=\S)|\s:\s|\s:(?=\S)|\s:(?!.)|' # colon (:)
87+
r'(?<=\S)!(?=\S)|\s!\s|\s!(?=\S)|\s!(?!.)|' # exclamation (!)
88+
r'(?<=\S)\?(?=\S)|\s\?\s|\s\?(?=\S)|\s\?(?!.)|' # question (?)
89+
r'\d%(?=\S)|(?<=\d)\s%\s|(?<=\d)\s%(?=\S)|(?<=\d)\s%(?!.)' # percentage (%)
90+
r')',
91+
re.MULTILINE | re.DOTALL
92+
),
93+
'LEFT_SPACE': re.compile(
94+
r'('
95+
96+
# quoted text ("hello world")
97+
r'\s"[^"]+"(?=[\?\.:!,;])|(?<=\S)"[^"]+"\s|(?<=\S)"[^"]+"(?=[\?\.:!,;])|'
98+
99+
# text in round brackets
100+
r'\s\([^\)]+\)(?=[\?\.:!,;])|(?<=\S)\([^\)]+\)\s|(?<=\S)(\([^\)]+\))(?=[\?\.:!,;])'
101+
102+
r')',
103+
re.MULTILINE | re.DOTALL
104+
),
105+
# match chars that must be followed by uppercase letters (like ".", "?"...)
106+
'UPPERCASE_AFTER_SIGN': re.compile(
107+
r'([\.\?!]\s\w)',
108+
re.MULTILINE | re.UNICODE
109+
),
110+
'SPACES_AROUND': re.compile(
111+
r'('
112+
r'(?<=\S)\+(?=\S)|(?<=\S)\+\s|\s\+(?=\S)|' # plus (+)
113+
r'(?<=\S)\-(?=\S)|(?<=\S)\-\s|\s\-(?=\S)|' # minus (-)
114+
r'(?<=\S)/(?=\S)|(?<=\S)/\s|\s/(?=\S)|' # division (/)
115+
r'(?<=\S)\*(?=\S)|(?<=\S)\*\s|\s\*(?=\S)|' # multiplication (*)
116+
r'(?<=\S)=(?=\S)|(?<=\S)=\s|\s=(?=\S)|' # equal (=)
117+
118+
# quoted text ("hello world")
119+
r'\s"[^"]+"(?=[^\s\?\.:!,;])|(?<=\S)"[^"]+"\s|(?<=\S)"[^"]+"(?=[^\s\?\.:!,;])|'
120+
121+
# text in round brackets
122+
r'\s\([^\)]+\)(?=[^\s\?\.:!,;])|(?<=\S)\([^\)]+\)\s|(?<=\S)(\([^\)]+\))(?=[^\s\?\.:!,;])'
123+
124+
r')',
125+
re.MULTILINE | re.DOTALL
126+
),
127+
'SPACES_INSIDE': re.compile(
128+
r'('
129+
r'(?<=")[^"]+(?=")|' # quoted text ("hello world")
130+
r'(?<=\()[^\)]+(?=\))' # text in round brackets
131+
r')',
132+
re.MULTILINE | re.DOTALL
133+
),
134+
'NO_SPACES': re.compile(
135+
r'('
136+
r'(?<=\w)\'\s(?=s)|\s\'\s(?=s)' # saxon genitive
137+
r')',
138+
re.MULTILINE | re.UNICODE
139+
)
140+
}
72141

73142

74143
# string checking functions
@@ -399,3 +468,22 @@ def strip_html(string, keep_tag_content=False):
399468
"""
400469
r = HTML_TAG_ONLY_RE if keep_tag_content else HTML_RE
401470
return r.sub('', string)
471+
472+
473+
def prettify(string):
474+
# turns first letter after ".", "?", "!" into uppercase
475+
def uppercase_after_sign(regex_match):
476+
match = regex_match.group(1)
477+
return match[:-1] + match[2].upper()
478+
479+
p = PRETTIFY_RE['DUPLICATES'].sub(lambda m: m.group(1)[0], string)
480+
p = PRETTIFY_RE['RIGHT_SPACE'].sub(lambda m: m.group(1).strip() + ' ', p)
481+
p = PRETTIFY_RE['LEFT_SPACE'].sub(lambda m: ' ' + m.group(1).strip(), p)
482+
p = PRETTIFY_RE['SPACES_AROUND'].sub(lambda m: ' ' + m.group(1).strip() + ' ', p)
483+
p = PRETTIFY_RE['SPACES_INSIDE'].sub(lambda m: m.group(1).strip(), p)
484+
p = PRETTIFY_RE['UPPERCASE_AFTER_SIGN'].sub(uppercase_after_sign, p)
485+
p = PRETTIFY_RE['NO_SPACES'].sub(lambda m: m.group(1).strip(), p)
486+
p = p.strip()
487+
if len(p) > 1:
488+
p = p[0].capitalize() + p[1:]
489+
return p

tests.py

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -976,3 +976,205 @@ def test_should_keep_tag_content_if_specified(self):
976976
</html>
977977
'''
978978
self.assertEqual('content text!', strip_html(multiline_string, keep_tag_content=True).strip())
979+
980+
981+
class PrettifyTestCase(TestCase):
982+
def test_cannot_handle_non_string_objects(self):
983+
self.assertRaises(TypeError, lambda: words_count(None))
984+
self.assertRaises(TypeError, lambda: words_count(False))
985+
self.assertRaises(TypeError, lambda: words_count(0))
986+
self.assertRaises(TypeError, lambda: words_count([]))
987+
self.assertRaises(TypeError, lambda: words_count({'a': 1}))
988+
989+
def test_should_return_empty_string_from_empty_string_or_space_only_string(self):
990+
self.assertEqual('', prettify(''))
991+
self.assertEqual('', prettify(' '))
992+
993+
def test_should_uppercase_first_letter(self):
994+
self.assertEqual('Hello world', prettify('hello world'))
995+
996+
def test_should_strip_string(self):
997+
self.assertEqual('Hello world', prettify(' hello world '))
998+
999+
def test_should_strip_empty_lines(self):
1000+
self.assertEqual('Hello world', prettify('''
1001+
1002+
hello world
1003+
1004+
'''))
1005+
1006+
def test_should_replace_multiple_brackets_with_single_ones(self):
1007+
self.assertEqual('(foo)', prettify('((foo)'))
1008+
self.assertEqual('(foo)', prettify('(foo))'))
1009+
self.assertEqual('(foo)', prettify('((foo))'))
1010+
self.assertEqual('(foo)', prettify('((((((((foo)))'))
1011+
self.assertEqual('[foo]', prettify('[[foo]'))
1012+
self.assertEqual('[foo]', prettify('[foo]]'))
1013+
self.assertEqual('[foo]', prettify('[[foo]]'))
1014+
self.assertEqual('[foo]', prettify('[[[[[[[[foo]]]'))
1015+
self.assertEqual('{foo}', prettify('{{foo}'))
1016+
self.assertEqual('{foo}', prettify('{foo}}'))
1017+
self.assertEqual('{foo}', prettify('{{foo}}'))
1018+
self.assertEqual('{foo}', prettify('{{{{{{{{foo}}}'))
1019+
1020+
def test_should_remove_internal_spaces_in_brackets(self):
1021+
self.assertEqual('(foo)', prettify('( foo)'))
1022+
self.assertEqual('(foo)', prettify('(foo )'))
1023+
self.assertEqual('(foo)', prettify('( foo )'))
1024+
1025+
def test_should_add_spaces_outside_brackets(self):
1026+
self.assertEqual('Boo (bar) baz', prettify('boo(bar)baz'))
1027+
1028+
def test_should_not_add_right_space_after_bracket_if_followed_by_punctuation(self):
1029+
self.assertEqual('Foo (bar)? Yes!', prettify('Foo(bar)? Yes!'))
1030+
self.assertEqual('Foo (bar): Yes!', prettify('Foo(bar): Yes!'))
1031+
self.assertEqual('Foo (bar). Yes!', prettify('Foo(bar). Yes!'))
1032+
self.assertEqual('Foo (bar); yes!', prettify('Foo(bar); yes!'))
1033+
self.assertEqual('Foo (bar), yes!', prettify('Foo(bar), yes!'))
1034+
1035+
def test_should_replace_multiple_commas_with_single_ones(self):
1036+
self.assertEqual('Hello, world', prettify('Hello,,, world'))
1037+
self.assertEqual('Hello, world, banana', prettify('Hello,,, world,, banana'))
1038+
1039+
def test_should_replace_multiple_colons_with_single_ones(self):
1040+
self.assertEqual('Hello: world', prettify('Hello::: world'))
1041+
self.assertEqual('Hello: world: banana', prettify('Hello::: world:: banana'))
1042+
1043+
def test_should_replace_multiple_semicolons_with_single_ones(self):
1044+
self.assertEqual('Hello; world', prettify('Hello;;; world'))
1045+
self.assertEqual('Hello; world; banana', prettify('Hello;;; world;; banana'))
1046+
1047+
def test_should_replace_multiple_double_quotes_with_single_ones(self):
1048+
self.assertEqual('"hello" world', prettify('""hello"" world'))
1049+
self.assertEqual('"hello" world', prettify('""hello" world'))
1050+
self.assertEqual('"hello" world', prettify('"hello"" world'))
1051+
self.assertEqual('"hello" world', prettify('""""""hello""""" world'))
1052+
1053+
def test_should_add_spaces_for_double_quotes(self):
1054+
self.assertEqual('Foo "bar" baz', prettify('foo"bar"baz'))
1055+
self.assertEqual('Foo "bar" baz', prettify('foo"bar" baz'))
1056+
self.assertEqual('Foo "bar" baz', prettify('foo "bar"baz'))
1057+
1058+
def test_should_trim_spaces_inside_double_quotes(self):
1059+
self.assertEqual('Foo "bar" baz', prettify('foo " bar " baz'))
1060+
self.assertEqual('Foo "bar" baz', prettify('foo "bar " baz'))
1061+
self.assertEqual('Foo "bar" baz', prettify('foo " bar" baz'))
1062+
1063+
def test_should_not_add_right_space_after_double_quotes_if_followed_by_punctuation(self):
1064+
self.assertEqual('Foo "bar"? Yes!', prettify('Foo"bar"? Yes!'))
1065+
self.assertEqual('Foo "bar": Yes!', prettify('Foo"bar": Yes!'))
1066+
self.assertEqual('Foo "bar". Yes!', prettify('Foo"bar". Yes!'))
1067+
self.assertEqual('Foo "bar"; yes!', prettify('Foo"bar"; yes!'))
1068+
self.assertEqual('Foo "bar", yes!', prettify('Foo"bar", yes!'))
1069+
1070+
def test_should_replace_multiple_single_quotes_with_single_ones(self):
1071+
self.assertEqual('Dave\'s job', prettify("Dave''s job"))
1072+
self.assertEqual("'destiny'", prettify("'''destiny'''"))
1073+
1074+
def should_fix_saxon_genitive_spaces(self):
1075+
self.assertEqual('Dave\'s dog', prettify('Dave\' s dog'))
1076+
1077+
def test_should_replace_multiple_percentage_with_single_ones(self):
1078+
self.assertEqual('%', prettify('%%%'))
1079+
self.assertEqual('A % b % c', prettify('a %% b %%%%%% c'))
1080+
1081+
def test_should_put_space_after_comma_if_missing(self):
1082+
self.assertEqual('One, two, three', prettify('one,two,three'))
1083+
1084+
def test_should_remove_space_before_comma(self):
1085+
self.assertEqual('One, two, three', prettify('one , two , three'))
1086+
1087+
def test_should_uppercase_first_letter_after_period(self):
1088+
self.assertEqual('Foo. Bar', prettify('Foo. bar'))
1089+
1090+
def test_should_put_space_after_period_if_missing(self):
1091+
self.assertEqual('One. Two. Three', prettify('one.two.three'))
1092+
1093+
def test_should_remove_space_before_period(self):
1094+
self.assertEqual('One. Two. Three', prettify('one . two . three'))
1095+
1096+
def test_should_put_space_after_colon_if_missing(self):
1097+
self.assertEqual('Test: this', prettify('Test:this'))
1098+
1099+
def test_should_remove_space_before_colon(self):
1100+
self.assertEqual('Test: this', prettify('Test :this'))
1101+
self.assertEqual('Test:', prettify('Test :'))
1102+
1103+
def test_should_put_space_after_semicolon_if_missing(self):
1104+
self.assertEqual('Test; this', prettify('Test;this'))
1105+
1106+
def test_should_remove_space_before_semicolon(self):
1107+
self.assertEqual('Test; this', prettify('Test ;this'))
1108+
self.assertEqual('Test;', prettify('Test ;'))
1109+
1110+
def test_should_uppercase_first_letter_after_exclamation(self):
1111+
self.assertEqual('Foo! Bar', prettify('Foo! bar'))
1112+
1113+
def test_should_put_space_after_exclamation_if_missing(self):
1114+
self.assertEqual('Test! This', prettify('Test!this'))
1115+
1116+
def test_should_remove_space_before_exclamation(self):
1117+
self.assertEqual('Test! This', prettify('Test !this'))
1118+
self.assertEqual('Test!', prettify('Test !'))
1119+
1120+
def test_should_uppercase_first_letter_after_question(self):
1121+
self.assertEqual('Foo? Bar', prettify('Foo? bar'))
1122+
1123+
def test_should_put_space_after_question_if_missing(self):
1124+
self.assertEqual('Test? This', prettify('Test?this'))
1125+
1126+
def test_should_remove_space_before_question(self):
1127+
self.assertEqual('Test? This', prettify('Test ?this'))
1128+
self.assertEqual('Test?', prettify('Test ?'))
1129+
1130+
def test_should_remove_space_before_dot(self):
1131+
self.assertEqual('Test. This', prettify('Test . This'))
1132+
self.assertEqual('Test.', prettify('Test .'))
1133+
1134+
def test_should_remove_space_after_number_if_followed_by_percentage(self):
1135+
self.assertEqual('100% python', prettify('100 % python'))
1136+
self.assertEqual('100%', prettify('100 %'))
1137+
1138+
def test_should_add_space_after_percentage_if_missing(self):
1139+
self.assertEqual('100% python code', prettify('100%python code'))
1140+
1141+
def test_should_add_spaces_around_plus_if_missing(self):
1142+
self.assertEqual('5 + 2', prettify('5 +2'))
1143+
self.assertEqual('5 + 2', prettify('5+ 2'))
1144+
self.assertEqual('5 + 2', prettify('5+2'))
1145+
1146+
def test_should_add_spaces_around_minus_if_missing(self):
1147+
self.assertEqual('5 - 2', prettify('5 -2'))
1148+
self.assertEqual('5 - 2', prettify('5- 2'))
1149+
self.assertEqual('5 - 2', prettify('5-2'))
1150+
1151+
def test_should_add_spaces_around_equal_if_missing(self):
1152+
self.assertEqual('5 - 2 = 3', prettify('5 - 2=3'))
1153+
self.assertEqual('5 - 2 = 3', prettify('5 - 2 =3'))
1154+
self.assertEqual('5 - 2 = 3', prettify('5 - 2= 3'))
1155+
1156+
def test_should_add_spaces_around_division_if_missing(self):
1157+
self.assertEqual('5 / 2 = 2.5', prettify('5/ 2 = 2.5'))
1158+
self.assertEqual('5 / 2 = 2.5', prettify('5 /2 = 2.5'))
1159+
self.assertEqual('5 / 2 = 2.5', prettify('5 / 2 = 2.5'))
1160+
1161+
def test_should_add_spaces_around_multiplication_if_missing(self):
1162+
self.assertEqual('5 * 2 = 10', prettify('5* 2 = 10'))
1163+
self.assertEqual('5 * 2 = 10', prettify('5 *2 = 10'))
1164+
self.assertEqual('5 * 2 = 10', prettify('5 * 2 = 10'))
1165+
1166+
def test_should_prettify_string_as_expected(self):
1167+
original = ' unprettified string ,, like this one,will be"prettified" .it\' s awesome!( like python)) '
1168+
pretty = 'Unprettified string, like this one, will be "prettified". It\'s awesome! (like python)'
1169+
self.assertEqual(pretty, prettify(original))
1170+
1171+
def test_should_work_as_expected_for_multiple_lines_string(self):
1172+
original = '''
1173+
1174+
unprettified string ,,
1175+
like this one,will be"prettified"
1176+
.it' s awesome!( like python))
1177+
1178+
'''
1179+
pretty = 'Unprettified string, like this one, will be "prettified". It\'s awesome! (like python)'
1180+
self.assertEqual(pretty, prettify(original))

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy