Skip to content

Commit 970e0a4

Browse files
committed
Merge pull request #7 from Oefenweb/between-quotes-pattern-doesnt-match-any-entries
BETWEEN_QUOTES_PATTERN doesn't match any entries
2 parents 4a9a421 + 87817a1 commit 970e0a4

10 files changed

+541
-36
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,6 @@ clean:
3131
python setup.py clean
3232
rm -rf build deb_dist debian dist MANIFEST *.egg-info deb_dist
3333
find . -name '*.pyc' -print0 | xargs --no-run-if-empty -0 rm
34-
find . -name '*.*coded' -print0 | xargs --no-run-if-empty -0 rm
34+
find . -name '*.decoded' -print0 | xargs --no-run-if-empty -0 rm
35+
find . -name '*.encode' -print0 | xargs --no-run-if-empty -0 rm
3536
find . -name '*.actual' -print0 | xargs --no-run-if-empty -0 rm

bin/locale-decode-category

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,40 +3,41 @@
33
# -*- coding: utf-8 -*-
44

55
import sys
6-
import re
76
from glibc_locale_tools.glibc_locale_tools import *
87

9-
lines = ''
8+
lines = []
109
for line in sys.stdin:
11-
lines += line
10+
lines.append(line)
11+
lines_joined = ''.join(lines).decode('utf-8')
1212

13-
lines = lines.decode('utf-8')
13+
unsafe_spans = get_unsafe_spans(lines, lines_joined)
1414

1515
between_quotes_map = []
16-
for between_quotes in reversed(list(re.finditer(BETWEEN_QUOTES_PATTERN, lines))):
16+
for between_quotes in reversed(list(re.finditer(BETWEEN_QUOTES_PATTERN, lines_joined))):
1717
between_quotes_match = between_quotes.group(1)
1818
between_quotes_match_start = between_quotes.start(1)
1919
between_quotes_match_end = between_quotes.end(1)
2020

21+
if in_unsafe_spans(between_quotes_match_start, between_quotes_match_end, unsafe_spans):
22+
continue
23+
2124
replacement = between_quotes_match
2225
for unicode_matches in reversed(list(re.finditer(UNICODE_PATTERN, between_quotes_match))):
23-
unicode_match = unicode_matches.group(2)
24-
unicode_match_start = unicode_matches.start(1)
25-
unicode_match_end = unicode_matches.end(1)
26-
2726
replacement = replace_positional(replacement,
28-
unicode_match_start, unicode_decode(unicode_match), unicode_match_end)
27+
unicode_matches.start(1),
28+
unicode_decode(unicode_matches.group(2)),
29+
unicode_matches.end(1))
2930

3031
between_quotes_map.append({'start': between_quotes_match_start,
3132
'end': between_quotes_match_end,
3233
'replacement': replacement})
3334

3435
for between_quotes_map_items in between_quotes_map:
35-
lines = replace_positional(lines,
36-
between_quotes_map_items['start'],
37-
between_quotes_map_items['replacement'],
38-
between_quotes_map_items['end'])
36+
lines_joined = replace_positional(lines_joined,
37+
between_quotes_map_items['start'],
38+
between_quotes_map_items['replacement'],
39+
between_quotes_map_items['end'])
3940

40-
sys.stdout.write(lines.encode('utf-8'))
41+
sys.stdout.write(lines_joined.encode('utf-8'))
4142

4243
sys.exit(0)

bin/locale-encode-category

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,40 +3,41 @@
33
# -*- coding: utf-8 -*-
44

55
import sys
6-
import re
76
from glibc_locale_tools.glibc_locale_tools import *
87

9-
lines = ''
8+
lines = []
109
for line in sys.stdin:
11-
lines += line
10+
lines.append(line)
11+
lines_joined = ''.join(lines).decode('utf-8')
1212

13-
lines = lines.decode('utf-8')
13+
unsafe_spans = get_unsafe_spans(lines, lines_joined)
1414

1515
between_quotes_map = []
16-
for between_quotes in reverse_iter(re.finditer(BETWEEN_QUOTES_PATTERN, lines)):
16+
for between_quotes in reverse_iter(re.finditer(BETWEEN_QUOTES_PATTERN, lines_joined)):
1717
between_quotes_match = between_quotes.group(1)
1818
between_quotes_match_start = between_quotes.start(1)
1919
between_quotes_match_end = between_quotes.end(1)
2020

21+
if in_unsafe_spans(between_quotes_match_start, between_quotes_match_end, unsafe_spans):
22+
continue
23+
2124
replacement = between_quotes_match
2225
for to_decode_matches in reversed(list(re.finditer(TO_DECODE_PATTERN, between_quotes_match))):
23-
to_decode_match = to_decode_matches.group(0)
24-
to_decode_match_start = to_decode_matches.start(0)
25-
to_decode_match_end = to_decode_matches.end(0)
26-
2726
replacement = replace_positional(replacement,
28-
to_decode_match_start, unicode_encode(to_decode_match), to_decode_match_end)
27+
to_decode_matches.start(0),
28+
unicode_encode(to_decode_matches.group(0)),
29+
to_decode_matches.end(0))
2930

3031
between_quotes_map.append({'start': between_quotes_match_start,
3132
'end': between_quotes_match_end,
3233
'replacement': replacement})
3334

3435
for between_quotes_map_items in between_quotes_map:
35-
lines = replace_positional(lines,
36-
between_quotes_map_items['start'],
37-
between_quotes_map_items['replacement'],
38-
between_quotes_map_items['end'])
36+
lines_joined = replace_positional(lines_joined,
37+
between_quotes_map_items['start'],
38+
between_quotes_map_items['replacement'],
39+
between_quotes_map_items['end'])
3940

40-
sys.stdout.write(lines.encode('utf-8'))
41+
sys.stdout.write(lines_joined.encode('utf-8'))
4142

4243
sys.exit(0)

glibc_locale_tools/glibc_locale_tools.py

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# -*- coding: utf-8 -*-
22

3-
BETWEEN_QUOTES_PATTERN = r'^[^%].*"([^"]*)"'
3+
import re
4+
5+
6+
BETWEEN_QUOTES_PATTERN = r'"([^"]*)"'
47
"""
58
A re pattern to match a between quotes section, that is not a comment.
69
"""
@@ -10,7 +13,7 @@
1013
A re pattern to match a unicode char (e.g. <U002D>).
1114
"""
1215

13-
TO_DECODE_PATTERN = r'[^\/\n]{1}'
16+
TO_DECODE_PATTERN = r'(/(?!\n)|[^/\n]){1}'
1417
"""
1518
A re pattern to match a string section that needs decoding.
1619
"""
@@ -36,6 +39,11 @@
3639
A re pattern to match a escape_char line.
3740
"""
3841

42+
COMMENT_LINE_WITH_QUOTES_PATTERN = r'^%.*"[^"]*"'
43+
"""
44+
A re pattern to match a comment line with a between quotes section.
45+
"""
46+
3947

4048
def unicode_decode(unicode_char):
4149
"""
@@ -82,3 +90,57 @@ def reverse_iter(iterator):
8290
"""
8391

8492
return reversed(list(iterator))
93+
94+
95+
def between_range(range1, range2):
96+
"""
97+
Checks whether or not range 2 is between range 1.
98+
99+
:param range1: A range
100+
:param range2: A range
101+
:return: Whether or not range 2 is between range 1
102+
"""
103+
104+
return range1['start'] <= range2['start'] <= range1['end'] and range1['start'] <= range2['end'] <= range1['end']
105+
106+
107+
def in_unsafe_spans(match_start, match_end, unsafe_spans):
108+
"""
109+
Checks whether not a range (match start and end) is in unsafe ranges.
110+
111+
:param match_start: A match start position
112+
:param match_end: A match end position
113+
:param unsafe_spans: A list of unsafe spans
114+
:return: Whether not a range is in unsafe ranges
115+
"""
116+
117+
for unsafe_span in unsafe_spans:
118+
if between_range(unsafe_span, {'start': match_start, 'end': match_end}):
119+
return True
120+
121+
return False
122+
123+
124+
def get_unsafe_spans(lines, lines_joined):
125+
"""
126+
Generates a list of unsafe spans.
127+
128+
Unsafe span are comment lines that contain double quotes (that should not be (en|de)coded).
129+
130+
:param lines: A list of lines
131+
:param lines_joined: A string of lines
132+
:return: A list of unsafe spans
133+
"""
134+
135+
unsafe_lines = []
136+
for line in lines:
137+
if re.search(COMMENT_LINE_WITH_QUOTES_PATTERN, line):
138+
unsafe_lines.append(line)
139+
140+
unsafe_lines_pattern = '({0})'.format('|'.join(map(re.escape, unsafe_lines)))
141+
142+
unsafe_spans = []
143+
for unsafe_line in re.finditer(unsafe_lines_pattern, lines_joined):
144+
unsafe_spans.append({'start': unsafe_line.start(0), 'end': unsafe_line.end(0)})
145+
146+
return unsafe_spans
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
comment_char %
2+
escape_char /
3+
abday "søn";"man";/
4+
"tir";"ons";/
5+
"tor";"fre";/
6+
"lør"
7+
day "søndag";/
8+
"mandag";/
9+
"tirsdag";/
10+
"onsdag";/
11+
"torsdag";/
12+
"fredag";/
13+
"lørdag"
14+
abmon "jan";"feb";/
15+
"mar";"apr";/
16+
"maj";"jun";/
17+
"jul";"aug";/
18+
"sep";"okt";/
19+
"nov";"dec"
20+
mon "januar";/
21+
"februar";/
22+
"marts";/
23+
"april";/
24+
"maj";/
25+
"juni";/
26+
"juli";/
27+
"august";/
28+
"september";/
29+
"oktober";/
30+
"november";/
31+
"december"
32+
d_t_fmt "%a %d %b %Y %T %Z"
33+
d_fmt "%d-%m-%Y"
34+
t_fmt "%T"
35+
am_pm "";""
36+
t_fmt_ampm ""
37+
date_fmt "%a %b %e/
38+
%H:%M:%S /
39+
%Z %Y"
40+
week 7;19971130;4
41+
first_weekday 2
42+
first_workday 2
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
comment_char %
2+
escape_char /
3+
abday "Sun";"Mon";/
4+
"Tue";"Wed";/
5+
"Thu";"Fri";/
6+
"Sat"
7+
day "Sunday";/
8+
"Monday";/
9+
"Tuesday";/
10+
"Wednesday";/
11+
"Thursday";/
12+
"Friday";/
13+
"Saturday"
14+
15+
week 7;19971130;7
16+
first_weekday 1
17+
first_workday 2
18+
abmon "Jan";"Feb";/
19+
"Mar";"Apr";/
20+
"May";"Jun";/
21+
"Jul";"Aug";/
22+
"Sep";"Oct";/
23+
"Nov";"Dec"
24+
mon "January";/
25+
"February";/
26+
"March";/
27+
"April";/
28+
"May";/
29+
"June";/
30+
"July";/
31+
"August";/
32+
"September";/
33+
"October";/
34+
"November";/
35+
"December"
36+
% Appropriate date and time representation (%c)
37+
% "%a %d %b %Y %r %Z"
38+
d_t_fmt "%a %d %b %Y %r %Z"
39+
%
40+
% Appropriate date representation (%x)
41+
% "%m/%d/%Y"
42+
d_fmt "%m/%d/%Y"
43+
%
44+
% Appropriate time representation (%X)
45+
% "%r"
46+
t_fmt "%r"
47+
%
48+
% Appropriate AM/PM time representation (%r)
49+
% "%I:%M:%S %p"
50+
t_fmt_ampm "%I:%M:%S /
51+
%p"
52+
%
53+
% Strings for AM/PM
54+
%
55+
am_pm "AM";"PM"
56+
%
57+
% Appropriate date representation (date(1)) "%a %b %e %H:%M:%S %Z %Y"
58+
date_fmt "%a %b %e/
59+
%H:%M:%S /
60+
%Z %Y"
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
comment_char %
2+
escape_char /
3+
abday "zo";"ma";"di";/
4+
"wo";"do";"vr";/
5+
"za"
6+
day "zondag";/
7+
"maandag";/
8+
"dinsdag";/
9+
"woensdag";/
10+
"donderdag";/
11+
"vrijdag";/
12+
"zaterdag"
13+
abmon "jan";"feb";/
14+
"mrt";"apr";/
15+
"mei";"jun";/
16+
"jul";"aug";/
17+
"sep";"okt";/
18+
"nov";"dec"
19+
mon "januari";/
20+
"februari";/
21+
"maart";/
22+
"april";/
23+
"mei";/
24+
"juni";/
25+
"juli";/
26+
"augustus";/
27+
"september";/
28+
"oktober";/
29+
"november";/
30+
"december"
31+
d_t_fmt "%a %d %b %Y %T %Z"
32+
d_fmt "%d-%m-%y"
33+
t_fmt "%T"
34+
am_pm "";""
35+
t_fmt_ampm ""
36+
date_fmt "%a %b %e/
37+
%H:%M:%S /
38+
%Z %Y"
39+
40+
week 7;19971130;4
41+
first_weekday 2
42+
first_workday 2

0 commit comments

Comments
 (0)