Skip to content

Commit ade388b

Browse files
qinzzzqinzzzhunterhector
authored
Fix #347: only create object UnicodeRegex when used. (#349)
* fix #347: only create object UnicodeRegex when used. * fix #347: only create object UnicodeRegex when used. * use lru cache to generate object; update workflow * update requirements * Update setup.py Co-authored-by: qinzzz <wangqinxin2007@126.com> Co-authored-by: Hector <hunterhector@gmail.com>
1 parent d98c2c2 commit ade388b

File tree

3 files changed

+9
-5
lines changed

3 files changed

+9
-5
lines changed

requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,3 @@ sentencepiece >= 0.1.8
77
dill >= 0.3.3
88
nni >= 2.0.0
99
six >= 1.15
10-

setup.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,19 @@
3838
'sentencepiece>=0.1.96',
3939
'mypy_extensions',
4040
'packaging>=19.0',
41-
'asyml-utilities>=0.0.1.dev1'
41+
'six',
42+
'asyml-utilities>=0.0.1.dev1',
4243
],
4344
extras_require={
4445
'torch': ['torch>=1.0.0'],
4546
'examples': [],
4647
'extras': ['Pillow>=3.0', 'tensorboardX>=1.8', 'six>=1.15'],
47-
'dist': ['adaptdl>=0.2.4']
48+
'dist': ['adaptdl>=0.2.4'],
4849
},
4950
package_data={
5051
"texar.torch": [
5152
"../../bin/utils/multi-bleu.perl",
52-
]
53+
],
5354
},
5455
classifiers=[
5556
'Intended Audience :: Developers',

texar/torch/evals/bleu_transformer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import unicodedata
2424
import collections
2525
import math
26+
import functools
2627
import numpy as np
2728

2829
from texar.torch.evals.bleu import corpus_bleu
@@ -156,7 +157,9 @@ def property_chars(prefix):
156157
)
157158

158159

159-
uregex = UnicodeRegex()
160+
@functools.lru_cache(1)
161+
def _get_unicode_regex() -> UnicodeRegex:
162+
return UnicodeRegex()
160163

161164

162165
def bleu_transformer_tokenize(string: str) -> List[str]:
@@ -188,6 +191,7 @@ def bleu_transformer_tokenize(string: str) -> List[str]:
188191
Returns:
189192
a list of tokens
190193
"""
194+
uregex = _get_unicode_regex()
191195
string = uregex.nondigit_punct_re.sub(r"\1 \2 ", string)
192196
string = uregex.punct_nondigit_re.sub(r" \1 \2", string)
193197
string = uregex.symbol_re.sub(r" \1 ", string)

0 commit comments

Comments
 (0)