Skip to content

Commit c2bff77

Browse files
committed
[IMP] spreadsheet: support array literals in formula tokenizer
The tokenizer now recognizes array literals (e.g. {1,2;3,4}) by handling braces and array row separators. This allows formulas using array literals to be properly tokenized. Task: 4735250
1 parent 09d18f5 commit c2bff77

File tree

2 files changed

+49
-0
lines changed

2 files changed

+49
-0
lines changed

src/spreadsheet/tests/test_spreadsheet_tokenizer.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,34 @@ def test_wrong_references(self):
184184
tokenize("=''!A1"),
185185
[("OPERATOR", "="), ("SYMBOL", "''!A1")],
186186
)
187+
188+
def test_literal_array(self):
189+
self.assertEqual(
190+
tokenize("={1,2;3,4}"),
191+
[
192+
("OPERATOR", "="),
193+
("LEFT_BRACE", "{"),
194+
("NUMBER", "1"),
195+
("ARG_SEPARATOR", ","),
196+
("NUMBER", "2"),
197+
("ARRAY_ROW_SEPARATOR", ";"),
198+
("NUMBER", "3"),
199+
("ARG_SEPARATOR", ","),
200+
("NUMBER", "4"),
201+
("RIGHT_BRACE", "}"),
202+
],
203+
)
204+
self.assertEqual(
205+
tokenize("=SUM({1,2})"),
206+
[
207+
("OPERATOR", "="),
208+
("SYMBOL", "SUM"),
209+
("LEFT_PAREN", "("),
210+
("LEFT_BRACE", "{"),
211+
("NUMBER", "1"),
212+
("ARG_SEPARATOR", ","),
213+
("NUMBER", "2"),
214+
("RIGHT_BRACE", "}"),
215+
("RIGHT_PAREN", ")"),
216+
],
217+
)

src/util/spreadsheet/tokenizer.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,9 @@ def tokenize(string, locale=DEFAULT_LOCALE):
8787
while not chars.is_over():
8888
token = (
8989
tokenize_space(chars)
90+
or tokenize_array_row_separator(chars, locale)
9091
or tokenize_args_separator(chars, locale)
92+
or tokenize_braces(chars)
9193
or tokenize_parenthesis(chars)
9294
or tokenize_operator(chars)
9395
or tokenize_string(chars)
@@ -122,6 +124,14 @@ def tokenize_parenthesis(chars):
122124
return parenthesis[value]
123125
return None
124126

127+
braces = {"{": ("LEFT_BRACE", "{"), "}": ("RIGHT_BRACE", "}")}
128+
129+
def tokenize_braces(chars):
130+
value = chars.current
131+
if value in braces:
132+
chars.shift()
133+
return braces[value]
134+
return None
125135

126136
def tokenize_args_separator(chars, locale):
127137
if chars.current == locale["formulaArgSeparator"]:
@@ -140,6 +150,14 @@ def tokenize_operator(chars):
140150

141151
FIRST_POSSIBLE_NUMBER_CHARS = set("0123456789")
142152

153+
def tokenize_array_row_separator(chars, locale):
154+
row_separator = "\\" if locale["formulaArgSeparator"] == ";" else ";"
155+
if not row_separator:
156+
return None
157+
if chars.current == row_separator:
158+
chars.shift()
159+
return "ARRAY_ROW_SEPARATOR", row_separator
160+
return None
143161

144162
def tokenize_number(chars, locale):
145163
if chars.current not in FIRST_POSSIBLE_NUMBER_CHARS and chars.current != locale["decimalSeparator"]:

0 commit comments

Comments
 (0)