Skip to content

Commit 598fa4a

Browse files
authored
Onion
1 parent 1180358 commit 598fa4a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+4981
-0
lines changed

SourceCode/Parser/Lexer.cs

Lines changed: 393 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,393 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace OwnLang
8+
{
9+
class Lexer
10+
{
11+
private static string OPERATOR_CHARS = "+-*/()[]{}=<>!&|;";
12+
private string input;
13+
private List<Token> tokens;
14+
private static Dictionary<string, TokenType> OPERATORS;
15+
static Lexer()
16+
{
17+
OPERATORS = new Dictionary<string, TokenType>();
18+
19+
OPERATORS.Add("++", TokenType.INC);
20+
OPERATORS.Add("--", TokenType.DEC);
21+
22+
OPERATORS.Add("+=", TokenType.PLUSEQ);
23+
OPERATORS.Add("-=", TokenType.MINUSEQ);
24+
OPERATORS.Add("*=", TokenType.STAREQ);
25+
OPERATORS.Add("/=", TokenType.SLASHEQ);
26+
27+
OPERATORS.Add("+", TokenType.PLUS);
28+
OPERATORS.Add("-", TokenType.MINUS);
29+
OPERATORS.Add("*", TokenType.STAR);
30+
OPERATORS.Add("/", TokenType.SLASH);
31+
32+
OPERATORS.Add("(", TokenType.LPAREN);
33+
OPERATORS.Add(")", TokenType.RPAREN);
34+
OPERATORS.Add("[", TokenType.LBRAСKET);
35+
OPERATORS.Add("]", TokenType.RBRACKET);
36+
OPERATORS.Add("{", TokenType.LBRACE);
37+
OPERATORS.Add("}", TokenType.RBRACE);
38+
OPERATORS.Add("=", TokenType.EQ);
39+
OPERATORS.Add("<", TokenType.LT);
40+
OPERATORS.Add(">", TokenType.GT);
41+
OPERATORS.Add(";", TokenType.COMMA);
42+
43+
OPERATORS.Add("!", TokenType.EXCL);
44+
OPERATORS.Add("&", TokenType.AMP);
45+
OPERATORS.Add("|", TokenType.BAR);
46+
47+
OPERATORS.Add("==", TokenType.EQEQ);
48+
OPERATORS.Add("!=", TokenType.EXCLEQ);
49+
OPERATORS.Add("<=", TokenType.LTEQ);
50+
OPERATORS.Add(">=", TokenType.GTEQ);
51+
52+
OPERATORS.Add("&&", TokenType.AMPAMP);
53+
OPERATORS.Add("||", TokenType.BARBAR);
54+
}
55+
private int pos;
56+
private int length;
57+
public Lexer(string input)
58+
{
59+
this.input = input;
60+
length = input.Length;
61+
62+
tokens = new List<Token>();
63+
}
64+
65+
public List<Token> tokenize()
66+
{
67+
while (pos < length)
68+
{
69+
char current = peek(0);
70+
if (Char.IsDigit(current)) tokenizeNumber();
71+
else if (Char.IsLetter(current)) tokenizeWord();
72+
else if (OPERATOR_CHARS.IndexOf(current) != -1)
73+
{
74+
tokenizeOperator();
75+
}
76+
else if (current == '"')
77+
{
78+
tokenizeText();
79+
}
80+
else
81+
{
82+
next();
83+
}
84+
}
85+
return tokens;
86+
}
87+
88+
private void tokenizeText()
89+
{
90+
next();
91+
StringBuilder buffer = new StringBuilder();
92+
char current = peek(0);
93+
while (true)
94+
{
95+
if (current == '\\')
96+
{
97+
current = next();
98+
switch (current)
99+
{
100+
case '"':
101+
{
102+
current = next();
103+
buffer.Append('"');
104+
continue;
105+
}
106+
case 'n':
107+
{
108+
current = next();
109+
buffer.Append('\n');
110+
continue;
111+
}
112+
case 't':
113+
{
114+
current = next();
115+
buffer.Append('\t');
116+
continue;
117+
}
118+
case 'b':
119+
{
120+
current = next();
121+
buffer.Append('\b');
122+
continue;
123+
}
124+
case 'r':
125+
{
126+
current = next();
127+
buffer.Append('\r');
128+
continue;
129+
}
130+
case 'a':
131+
{
132+
current = next();
133+
buffer.Append('\a');
134+
continue;
135+
}
136+
case 'f':
137+
{
138+
current = next();
139+
buffer.Append('\f');
140+
continue;
141+
}
142+
case 'v':
143+
{
144+
current = next();
145+
buffer.Append('\v');
146+
continue;
147+
}
148+
}
149+
buffer.Append('\\');
150+
continue;
151+
}
152+
if (current == '"')
153+
{
154+
break;
155+
}
156+
buffer.Append(current);
157+
current = next();
158+
}
159+
next();
160+
161+
//string toString = buffer.ToString();
162+
addToken(TokenType.TEXT, buffer.ToString());
163+
}
164+
165+
private void tokenizeWord()
166+
{
167+
StringBuilder buffer = new StringBuilder();
168+
char current = peek(0);
169+
while (true)
170+
{
171+
if (!Char.IsLetterOrDigit(current) && (current != '_') && (current != '$'))
172+
{
173+
break;
174+
}
175+
buffer.Append(current);
176+
current = next();
177+
}
178+
string word = buffer.ToString();
179+
switch (word)
180+
{
181+
case "sout":
182+
{
183+
addToken(TokenType.PRINT);
184+
break;
185+
}
186+
case "var":
187+
{
188+
addToken(TokenType.OBJ);
189+
break;
190+
}
191+
case "if":
192+
{
193+
addToken(TokenType.IF);
194+
break;
195+
}
196+
case "else":
197+
{
198+
addToken(TokenType.ELSE);
199+
break;
200+
}
201+
202+
case "try":
203+
{
204+
addToken(TokenType.TRY);
205+
break;
206+
}
207+
208+
case "while":
209+
{
210+
addToken(TokenType.WHILE);
211+
break;
212+
}
213+
case "for":
214+
{
215+
addToken(TokenType.FOR);
216+
break;
217+
}
218+
case "foreach":
219+
{
220+
addToken(TokenType.FOREACH);
221+
break;
222+
}
223+
case "to":
224+
{
225+
addToken(TokenType.TO);
226+
break;
227+
}
228+
229+
case "do":
230+
{
231+
addToken(TokenType.DO);
232+
break;
233+
}
234+
case "break":
235+
{
236+
addToken(TokenType.BREAK);
237+
break;
238+
}
239+
case "continue":
240+
{
241+
addToken(TokenType.CONTINUE);
242+
break;
243+
}
244+
case "def":
245+
{
246+
addToken(TokenType.DEF);
247+
break;
248+
}
249+
case "class":
250+
{
251+
addToken(TokenType.CLASS);
252+
break;
253+
}
254+
case "init":
255+
{
256+
addToken(TokenType.RUN_CLASS);
257+
break;
258+
}
259+
case "return":
260+
{
261+
addToken(TokenType.RETURN);
262+
break;
263+
}
264+
case "use":
265+
{
266+
addToken(TokenType.USE);
267+
break;
268+
}
269+
case "true":
270+
{
271+
addToken(TokenType.TRUE);
272+
break;
273+
}
274+
case "false":
275+
{
276+
addToken(TokenType.FALSE);
277+
break;
278+
}
279+
default:
280+
{
281+
addToken(TokenType.WORD, word);
282+
break;
283+
}
284+
}
285+
}
286+
287+
private void tokenizeOperator()
288+
{
289+
char current = peek(0);
290+
if (current == '/')
291+
{
292+
if (peek(1) == '/')
293+
{
294+
next();
295+
next();
296+
tokenizeComment();
297+
return;
298+
}
299+
else if (peek(1) == '*')
300+
{
301+
next();
302+
next();
303+
tokenizeMultilineComment();
304+
return;
305+
}
306+
}
307+
308+
StringBuilder buffer = new StringBuilder();
309+
while (true)
310+
{
311+
string text = buffer.ToString();
312+
if (!OPERATORS.ContainsKey(text + current) && text != null)
313+
{
314+
addToken(OPERATORS[text]);
315+
return;
316+
}
317+
buffer.Append(current);
318+
current = next();
319+
}
320+
}
321+
322+
private void tokenizeMultilineComment()
323+
{
324+
char current = peek(0);
325+
while (true)
326+
{
327+
if (current == '\0') throw new Exception("Missing close tag");
328+
if (current == '*' && peek(1) == '/') break;
329+
current = next();
330+
}
331+
next();
332+
next();
333+
}
334+
335+
private void tokenizeComment()
336+
{
337+
char current = peek(0);
338+
while ("\r\n\0".IndexOf(current) == -1)
339+
{
340+
current = next();
341+
}
342+
}
343+
344+
private void tokenizeNumber()
345+
{
346+
StringBuilder buffer = new StringBuilder();
347+
char current = peek(0);
348+
while (true)
349+
{
350+
if (current == ',')
351+
{
352+
if (buffer.ToString().IndexOf(',') != -1)
353+
{
354+
throw new Exception("Invalid float number");
355+
}
356+
}
357+
else if (!Char.IsDigit(current))
358+
{
359+
break;
360+
}
361+
buffer.Append(current);
362+
current = next();
363+
}
364+
addToken(TokenType.NUMBER, buffer.ToString());
365+
}
366+
367+
private char next()
368+
{
369+
pos++;
370+
return peek(0);
371+
}
372+
373+
private char peek(int relativePosition)
374+
{
375+
int position = pos + relativePosition;
376+
if (position >= length)
377+
{
378+
return '\0';
379+
}
380+
return input[position];
381+
}
382+
383+
private void addToken(TokenType type)
384+
{
385+
addToken(type, "");
386+
}
387+
388+
private void addToken(TokenType type, string text)
389+
{
390+
tokens.Add(new Token(type, text));
391+
}
392+
}
393+
}

0 commit comments

Comments
 (0)