Line data Source code
1 : /* valageniescanner.vala
2 : *
3 : * Copyright (C) 2008-2012 Jamie McCracken, Jürg Billeter
4 : * Based on code by Jürg Billeter
5 : *
6 : * This library is free software; you can redistribute it and/or
7 : * modify it under the terms of the GNU Lesser General Public
8 : * License as published by the Free Software Foundation; either
9 : * version 2.1 of the License, or (at your option) any later version.
10 :
11 : * This library is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : * Lesser General Public License for more details.
15 :
16 : * You should have received a copy of the GNU Lesser General Public
17 : * License along with this library; if not, write to the Free Software
18 : * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 : *
20 : * Author:
21 : * Jamie McCracken jamiemcc gnome org
22 : */
23 :
24 : using GLib;
25 :
26 : /**
27 : * Lexical scanner for Genie source files.
28 : */
29 255 : public class Vala.Genie.Scanner {
30 2515 : public SourceFile source_file { get; private set; }
31 :
32 86 : public int indent_spaces { get; set;}
33 :
34 : char* begin;
35 : char* current;
36 : char* end;
37 :
38 : int line;
39 : int column;
40 :
41 : int current_indent_level;
42 : int indent_level;
43 : int pending_dedents;
44 :
45 : /* track open parens and braces for automatic line continuations */
46 : int open_parens_count;
47 : int open_brace_count;
48 :
49 : TokenType last_token;
50 : bool parse_started;
51 :
52 85 : Comment _comment;
53 :
54 85 : Conditional[] conditional_stack;
55 :
56 : struct Conditional {
57 : public bool matched;
58 : public bool else_found;
59 : public bool skip_section;
60 : }
61 :
62 85 : State[] state_stack;
63 :
64 : enum State {
65 : PARENS,
66 : BRACE,
67 : BRACKET,
68 : REGEX_LITERAL,
69 : TEMPLATE,
70 : TEMPLATE_PART,
71 : VERBATIM_TEMPLATE
72 : }
73 :
74 170 : public Scanner (SourceFile source_file) {
75 85 : this.source_file = source_file;
76 :
77 85 : begin = source_file.get_mapped_contents ();
78 85 : end = begin + source_file.get_mapped_length ();
79 :
80 85 : current = begin;
81 :
82 85 : _indent_spaces = 0;
83 85 : line = 1;
84 85 : column = 1;
85 85 : current_indent_level = 0;
86 85 : indent_level = 0;
87 85 : pending_dedents = 0;
88 :
89 85 : open_parens_count = 0;
90 85 : open_brace_count = 0;
91 :
92 85 : parse_started = false;
93 85 : last_token = TokenType.NONE;
94 :
95 : }
96 :
97 3198 : inline bool in_template () {
98 3198 : return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
99 : }
100 :
101 3204 : inline bool in_verbatim_template () {
102 3204 : return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.VERBATIM_TEMPLATE);
103 : }
104 :
105 2961 : inline bool in_template_part () {
106 2961 : return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
107 : }
108 :
109 5130 : inline bool is_ident_char (char c) {
110 5130 : return (c.isalnum () || c == '_');
111 : }
112 :
113 2941 : inline bool in_regex_literal () {
114 2941 : return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
115 : }
116 :
117 0 : SourceReference get_source_reference (int offset, int length = 0) {
118 0 : return new SourceReference (source_file, SourceLocation (current, line, column + offset), SourceLocation (current + length, line, column + offset + length));
119 : }
120 :
121 2 : public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
122 : TokenType type;
123 2 : char* begin = current;
124 2 : token_begin = SourceLocation (begin, line, column);
125 :
126 2 : int token_length_in_chars = -1;
127 :
128 2 : if (current >= end) {
129 : type = TokenType.EOF;
130 : } else {
131 2 : switch (current[0]) {
132 : case '/':
133 1 : type = TokenType.CLOSE_REGEX_LITERAL;
134 1 : current++;
135 1 : state_stack.length--;
136 1 : var fl_i = false;
137 1 : var fl_s = false;
138 1 : var fl_m = false;
139 1 : var fl_x = false;
140 1 : while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
141 0 : switch (current[0]) {
142 : case 'i':
143 0 : if (fl_i) {
144 0 : Report.error (get_source_reference (token_length_in_chars), "modifier 'i' used more than once");
145 : }
146 : fl_i = true;
147 : break;
148 : case 's':
149 0 : if (fl_s) {
150 0 : Report.error (get_source_reference (token_length_in_chars), "modifier 's' used more than once");
151 : }
152 : fl_s = true;
153 : break;
154 : case 'm':
155 0 : if (fl_m) {
156 0 : Report.error (get_source_reference (token_length_in_chars), "modifier 'm' used more than once");
157 : }
158 : fl_m = true;
159 : break;
160 : case 'x':
161 0 : if (fl_x) {
162 0 : Report.error (get_source_reference (token_length_in_chars), "modifier 'x' used more than once");
163 : }
164 : fl_x = true;
165 : break;
166 : }
167 0 : current++;
168 0 : token_length_in_chars++;
169 : }
170 : break;
171 : default:
172 6 : type = TokenType.REGEX_LITERAL;
173 : token_length_in_chars = 0;
174 6 : while (current < end && current[0] != '/') {
175 5 : if (current[0] == '\\') {
176 0 : current++;
177 0 : token_length_in_chars++;
178 0 : if (current >= end) {
179 : break;
180 : }
181 :
182 0 : switch (current[0]) {
183 : case '\'':
184 : case '"':
185 : case '\\':
186 : case '/':
187 : case '^':
188 : case '$':
189 : case '.':
190 : case '[':
191 : case ']':
192 : case '{':
193 : case '}':
194 : case '(':
195 : case ')':
196 : case '?':
197 : case '*':
198 : case '+':
199 : case '-':
200 : case '#':
201 : case '&':
202 : case '~':
203 : case ':':
204 : case ';':
205 : case '<':
206 : case '>':
207 : case '|':
208 : case '%':
209 : case '=':
210 : case '@':
211 : case '0':
212 : case 'b':
213 : case 'B':
214 : case 'f':
215 : case 'n':
216 : case 'N':
217 : case 'r':
218 : case 'R':
219 : case 't':
220 : case 'v':
221 : case 'a':
222 : case 'A':
223 : case 'p':
224 : case 'P':
225 : case 'e':
226 : case 'd':
227 : case 'D':
228 : case 's':
229 : case 'S':
230 : case 'w':
231 : case 'W':
232 : case 'G':
233 : case 'z':
234 : case 'Z':
235 0 : current++;
236 0 : token_length_in_chars++;
237 0 : break;
238 : case 'u':
239 : // u escape character has four hex digits
240 0 : current++;
241 0 : token_length_in_chars++;
242 : int digit_length;
243 0 : for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
244 0 : current++;
245 0 : token_length_in_chars++;
246 : }
247 0 : if (digit_length < 1) {
248 0 : Report.error (get_source_reference (token_length_in_chars), "\\u used with no following hex digits");
249 0 : } else if (digit_length < 4) {
250 0 : Report.error (get_source_reference (token_length_in_chars), "incomplete universal character name");
251 : }
252 : break;
253 : case 'x':
254 : // hexadecimal escape character requires two hex digits
255 0 : current++;
256 0 : token_length_in_chars++;
257 : int digit_length;
258 0 : for (digit_length = 0; current < end && current[0].isxdigit ();) {
259 0 : if (current[0] != '0') {
260 0 : digit_length++;
261 : }
262 0 : current++;
263 0 : token_length_in_chars++;
264 : }
265 0 : if (digit_length < 1) {
266 0 : Report.error (get_source_reference (token_length_in_chars), "\\x used with no following hex digits");
267 0 : } else if (digit_length > 2) {
268 0 : Report.error (get_source_reference (token_length_in_chars), "hex escape sequence out of range");
269 : }
270 : break;
271 : default:
272 : // back references \1 through \99
273 0 : if (current[0].isdigit ()) {
274 0 : current++;
275 0 : token_length_in_chars++;
276 0 : if (current[0].isdigit ()) {
277 0 : current++;
278 0 : token_length_in_chars++;
279 : }
280 : } else {
281 0 : Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
282 : }
283 : break;
284 : }
285 5 : } else if (current[0] == '\n') {
286 : break;
287 : } else {
288 5 : unichar u = ((string) current).get_char_validated ((long) (end - current));
289 5 : if (u != (unichar) (-1)) {
290 5 : current += u.to_utf8 (null);
291 5 : token_length_in_chars++;
292 : } else {
293 0 : current++;
294 0 : Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
295 : }
296 : }
297 : }
298 1 : if (current >= end || current[0] == '\n') {
299 0 : Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
300 0 : state_stack.length--;
301 0 : return read_token (out token_begin, out token_end);
302 : }
303 : break;
304 : }
305 : }
306 :
307 2 : if (token_length_in_chars < 0) {
308 1 : column += (int) (current - begin);
309 : } else {
310 1 : column += token_length_in_chars;
311 : }
312 :
313 2 : token_end = SourceLocation (current, line, column - 1);
314 :
315 2 : return type;
316 : }
317 :
318 :
319 0 : public void seek (SourceLocation location) {
320 0 : current = location.pos;
321 0 : line = location.line;
322 0 : column = location.column;
323 :
324 0 : conditional_stack = null;
325 0 : state_stack = null;
326 : }
327 :
328 : public static TokenType get_identifier_or_keyword (char* begin, int len) {
329 970 : switch (len) {
330 : case 2:
331 24 : switch (begin[0]) {
332 : case 'a':
333 0 : if (matches (begin, "as")) return TokenType.AS;
334 : break;
335 : case 'd':
336 4 : if (matches (begin, "do")) return TokenType.DO;
337 : break;
338 : case 'i':
339 18 : switch (begin[1]) {
340 : case 'f':
341 970 : return TokenType.IF;
342 : case 'n':
343 1 : return TokenType.IN;
344 : case 's':
345 0 : return TokenType.IS;
346 : }
347 : break;
348 : case 'o':
349 1 : if (matches (begin, "of")) return TokenType.OF;
350 :
351 1 : if (matches (begin, "or")) return TokenType.OP_OR;
352 : break;
353 : case 't':
354 1 : if (matches (begin, "to")) return TokenType.TO;
355 : break;
356 : }
357 : break;
358 : case 3:
359 140 : switch (begin[0]) {
360 : case 'a':
361 1 : if (matches (begin, "and")) return TokenType.OP_AND;
362 : break;
363 : case 'd':
364 14 : if (matches (begin, "def")) return TokenType.DEF;
365 : break;
366 : case 'f':
367 10 : if (matches (begin, "for")) return TokenType.FOR;
368 : break;
369 : case 'g':
370 2 : if (matches (begin, "get")) return TokenType.GET;
371 : break;
372 : case 'i':
373 47 : if (matches (begin, "isa")) return TokenType.ISA;
374 : break;
375 : case 'n':
376 11 : switch (begin[1]) {
377 : case 'e':
378 11 : if (matches (begin, "new")) return TokenType.NEW;
379 : break;
380 : case 'o':
381 0 : if (matches (begin, "not")) return TokenType.OP_NEG;
382 : break;
383 : }
384 : break;
385 : case 'o':
386 0 : if (matches (begin, "out")) return TokenType.OUT;
387 : break;
388 : case 'r':
389 0 : if (matches (begin, "ref")) return TokenType.REF;
390 : break;
391 : case 's':
392 1 : if (matches (begin, "set")) return TokenType.SET;
393 : break;
394 : case 't':
395 1 : if (matches (begin, "try")) return TokenType.TRY;
396 : break;
397 : case 'v':
398 38 : if (matches (begin, "var")) return TokenType.VAR;
399 : break;
400 : }
401 : break;
402 : case 4:
403 206 : switch (begin[0]) {
404 : case 'c':
405 2 : if (matches (begin, "case")) return TokenType.CASE;
406 : break;
407 : case 'd':
408 0 : if (matches (begin, "dict")) return TokenType.DICT;
409 : break;
410 : case 'e':
411 11 : switch (begin[1]) {
412 : case 'l':
413 9 : if (matches (begin, "else")) return TokenType.ELSE;
414 : break;
415 : case 'n':
416 2 : if (matches (begin, "enum")) return TokenType.ENUM;
417 : break;
418 : }
419 : break;
420 : case 'i':
421 88 : if (matches (begin, "init")) return TokenType.INIT;
422 : break;
423 : case 'l':
424 0 : switch (begin[1]) {
425 : case 'i':
426 0 : if (matches (begin, "list")) return TokenType.LIST;
427 : break;
428 : case 'o':
429 0 : if (matches (begin, "lock")) return TokenType.LOCK;
430 : break;
431 : }
432 : break;
433 :
434 : case 'n':
435 2 : if (matches (begin, "null")) return TokenType.NULL;
436 : break;
437 : case 'p':
438 6 : switch (begin[1]) {
439 : case 'a':
440 1 : if (matches (begin, "pass")) return TokenType.PASS;
441 : break;
442 : case 'r':
443 5 : if (matches (begin, "prop")) return TokenType.PROP;
444 : break;
445 : }
446 : break;
447 : case 's':
448 1 : if (matches (begin, "self")) return TokenType.SELF;
449 : break;
450 : case 't':
451 45 : if (matches (begin, "true")) return TokenType.TRUE;
452 : break;
453 : case 'u':
454 0 : if (matches (begin, "uses")) return TokenType.USES;
455 : break;
456 : case 'v':
457 0 : if (matches (begin, "void")) return TokenType.VOID;
458 : break;
459 : case 'w':
460 2 : switch (begin[1]) {
461 : case 'e':
462 0 : if (matches (begin, "weak")) return TokenType.WEAK;
463 : break;
464 : case 'h':
465 2 : if (matches (begin, "when")) return TokenType.WHEN;
466 : break;
467 : }
468 : break;
469 : }
470 : break;
471 : case 5:
472 93 : switch (begin[0]) {
473 : case 'a':
474 1 : switch (begin[1]) {
475 : case 'r':
476 0 : if (matches (begin, "array")) return TokenType.ARRAY;
477 : break;
478 : case 's':
479 1 : if (matches (begin, "async")) return TokenType.ASYNC;
480 : break;
481 : }
482 : break;
483 : case 'b':
484 2 : if (matches (begin, "break")) return TokenType.BREAK;
485 : break;
486 : case 'c':
487 24 : switch (begin[1]) {
488 : case 'l':
489 13 : if (matches (begin, "class")) return TokenType.CLASS;
490 : break;
491 : case 'o':
492 1 : if (matches (begin, "const")) return TokenType.CONST;
493 : break;
494 : }
495 : break;
496 : case 'e':
497 11 : if (matches (begin, "event")) return TokenType.EVENT;
498 : break;
499 : case 'f':
500 10 : switch (begin[1]) {
501 : case 'a':
502 9 : if (matches (begin, "false")) return TokenType.FALSE;
503 : break;
504 : case 'i':
505 1 : if (matches (begin, "final")) return TokenType.FINAL;
506 : break;
507 : }
508 : break;
509 : case 'o':
510 1 : if (matches (begin, "owned")) return TokenType.OWNED;
511 : break;
512 : case 'p':
513 4 : if (matches (begin, "print")) return TokenType.PRINT;
514 : break;
515 : case 's':
516 0 : if (matches (begin, "super")) return TokenType.SUPER;
517 : break;
518 : case 'r':
519 1 : if (matches (begin, "raise")) return TokenType.RAISE;
520 : break;
521 : case 'w':
522 4 : if (matches (begin, "while")) return TokenType.WHILE;
523 : break;
524 : case 'y':
525 0 : if (matches (begin, "yield")) return TokenType.YIELD;
526 : break;
527 : }
528 : break;
529 : case 6:
530 190 : switch (begin[0]) {
531 : case 'a':
532 112 : if (matches (begin, "assert")) return TokenType.ASSERT;
533 : break;
534 : case 'd':
535 3 : switch (begin[1]) {
536 : case 'e':
537 0 : if (matches (begin, "delete")) return TokenType.DELETE;
538 : break;
539 : case 'o':
540 3 : if (matches (begin, "downto")) return TokenType.DOWNTO;
541 : break;
542 : }
543 : break;
544 : case 'e':
545 1 : switch (begin[1]) {
546 : case 'x':
547 1 : switch (begin[2]) {
548 : case 'c':
549 1 : if (matches (begin, "except")) return TokenType.EXCEPT;
550 : break;
551 : case 't':
552 0 : if (matches (begin, "extern")) return TokenType.EXTERN;
553 : break;
554 : }
555 : break;
556 : }
557 : break;
558 : case 'i':
559 1 : if (matches (begin, "inline")) return TokenType.INLINE;
560 : break;
561 : case 'p':
562 0 : switch (begin[1]) {
563 : case 'a':
564 0 : if (matches (begin, "params")) return TokenType.PARAMS;
565 : break;
566 : case 'u':
567 0 : if (matches (begin, "public")) return TokenType.PUBLIC;
568 : break;
569 : }
570 : break;
571 : case 'r':
572 33 : switch (begin[1]) {
573 : case 'a':
574 1 : if (matches (begin, "raises")) return TokenType.RAISES;
575 : break;
576 : case 'e':
577 32 : if (matches (begin, "return")) return TokenType.RETURN;
578 : break;
579 : }
580 : break;
581 : case 's':
582 29 : switch (begin[1]) {
583 : case 'e':
584 0 : if (matches (begin, "sealed")) return TokenType.SEALED;
585 : break;
586 : case 'i':
587 1 : if (matches (begin, "sizeof")) return TokenType.SIZEOF;
588 : break;
589 : case 't':
590 28 : switch (begin[2]) {
591 : case 'a':
592 2 : if (matches (begin, "static")) return TokenType.STATIC;
593 : break;
594 : case 'r':
595 26 : if (matches (begin, "struct")) return TokenType.STRUCT;
596 : break;
597 : }
598 : break;
599 : }
600 : break;
601 : case 't':
602 1 : if (matches (begin, "typeof")) return TokenType.TYPEOF;
603 : break;
604 : }
605 : break;
606 : case 7:
607 10 : switch (begin[0]) {
608 : case 'd':
609 1 : switch (begin[1]) {
610 : case 'e':
611 1 : if (matches (begin, "default")) return TokenType.DEFAULT;
612 : break;
613 : case 'y':
614 0 : if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
615 : break;
616 : }
617 : break;
618 : case 'e':
619 0 : if (matches (begin, "ensures")) return TokenType.ENSURES;
620 : break;
621 : case 'f':
622 1 : switch (begin[1]) {
623 : case 'i':
624 1 : if (matches (begin, "finally")) return TokenType.FINALLY;
625 : break;
626 : }
627 : break;
628 : case 'p':
629 0 : if (matches (begin, "private")) return TokenType.PRIVATE;
630 : break;
631 : case 'u':
632 0 : if (matches (begin, "unowned")) return TokenType.UNOWNED;
633 : break;
634 : case 'v':
635 0 : if (matches (begin, "virtual")) return TokenType.VIRTUAL;
636 : break;
637 : }
638 : break;
639 : case 8:
640 9 : switch (begin[0]) {
641 : case 'a':
642 3 : if (matches (begin, "abstract")) return TokenType.ABSTRACT;
643 : break;
644 : case 'c':
645 1 : if (matches (begin, "continue")) return TokenType.CONTINUE;
646 : break;
647 : case 'd':
648 1 : if (matches (begin, "delegate")) return TokenType.DELEGATE;
649 : break;
650 : case 'i':
651 0 : if (matches (begin, "internal")) return TokenType.INTERNAL;
652 : break;
653 : case 'o':
654 0 : if (matches (begin, "override")) return TokenType.OVERRIDE;
655 : break;
656 : case 'r':
657 1 : switch (begin[2]) {
658 : case 'a':
659 1 : if (matches (begin, "readonly")) return TokenType.READONLY;
660 : break;
661 : case 'q':
662 0 : if (matches (begin, "requires")) return TokenType.REQUIRES;
663 : break;
664 : }
665 : break;
666 : case 'v':
667 0 : if (matches (begin, "volatile")) return TokenType.VOLATILE;
668 : break;
669 : }
670 : break;
671 : case 9:
672 14 : switch (begin[0]) {
673 : case 'c':
674 5 : if (matches (begin, "construct")) return TokenType.CONSTRUCT;
675 : break;
676 : case 'e':
677 1 : if (matches (begin, "exception")) return TokenType.EXCEPTION;
678 : break;
679 : case 'i':
680 1 : if (matches (begin, "interface")) return TokenType.INTERFACE;
681 : break;
682 : case 'n':
683 0 : if (matches (begin, "namespace")) return TokenType.NAMESPACE;
684 : break;
685 : case 'p':
686 2 : if (matches (begin, "protected")) return TokenType.PROTECTED;
687 : break;
688 : }
689 : break;
690 : case 10:
691 8 : switch (begin[0]) {
692 : case 'i':
693 1 : if (matches (begin, "implements")) return TokenType.IMPLEMENTS;
694 : break;
695 : }
696 : break;
697 : }
698 : return TokenType.IDENTIFIER;
699 : }
700 :
701 :
702 35 : public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
703 35 : bool is_verbatim = in_verbatim_template ();
704 : TokenType type;
705 35 : char* begin = current;
706 35 : token_begin = SourceLocation (begin, line, column);
707 :
708 35 : int token_length_in_chars = -1;
709 :
710 35 : if (current >= end) {
711 : type = TokenType.EOF;
712 : } else {
713 35 : switch (current[0]) {
714 : case '"':
715 10 : if (is_verbatim) {
716 6 : if (current < end -2 && current[1] == '"' && current[2] == '"' && current[3] != '"') {
717 3 : type = TokenType.CLOSE_TEMPLATE;
718 3 : current += 3;
719 3 : state_stack.length--;
720 : } else {
721 3 : type = TokenType.VERBATIM_TEMPLATE_STRING_LITERAL;
722 3 : current++;
723 3 : token_length_in_chars++;
724 3 : state_stack += State.TEMPLATE_PART;
725 : }
726 : } else {
727 4 : type = TokenType.CLOSE_TEMPLATE;
728 4 : current++;
729 4 : state_stack.length--;
730 : }
731 : break;
732 : case '$':
733 13 : token_begin.pos++; // $ is not part of following token
734 13 : current++;
735 13 : if (current[0].isalpha () || current[0] == '_') {
736 : int len = 0;
737 6 : while (current < end && is_ident_char (current[0])) {
738 3 : current++;
739 3 : len++;
740 : }
741 3 : type = TokenType.IDENTIFIER;
742 3 : state_stack += State.TEMPLATE_PART;
743 10 : } else if (current[0] == '(') {
744 8 : current++;
745 8 : column += 2;
746 16 : state_stack += State.PARENS;
747 8 : return read_token (out token_begin, out token_end);
748 2 : } else if (current[0] == '$') {
749 2 : type = is_verbatim ? TokenType.VERBATIM_TEMPLATE_STRING_LITERAL : TokenType.TEMPLATE_STRING_LITERAL;
750 2 : current++;
751 2 : state_stack += State.TEMPLATE_PART;
752 : } else {
753 0 : Report.error (get_source_reference (1), "unexpected character");
754 0 : return read_template_token (out token_begin, out token_end);
755 : }
756 : break;
757 : default:
758 12 : type = is_verbatim ? TokenType.VERBATIM_TEMPLATE_STRING_LITERAL : TokenType.TEMPLATE_STRING_LITERAL;
759 12 : token_length_in_chars = 0;
760 44 : while (current < end && current[0] != '"' && current[0] != '$') {
761 32 : if (current[0] == '\\' && !is_verbatim) {
762 0 : current++;
763 0 : token_length_in_chars++;
764 0 : if (current >= end) {
765 : break;
766 : }
767 :
768 0 : switch (current[0]) {
769 : case '\'':
770 : case '"':
771 : case '\\':
772 : case '0':
773 : case 'b':
774 : case 'f':
775 : case 'n':
776 : case 'r':
777 : case 't':
778 : case 'v':
779 0 : current++;
780 0 : token_length_in_chars++;
781 0 : break;
782 : case 'u':
783 : // u escape character has four hex digits
784 0 : current++;
785 0 : token_length_in_chars++;
786 : int digit_length;
787 0 : for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
788 0 : current++;
789 0 : token_length_in_chars++;
790 : }
791 0 : if (digit_length < 1) {
792 0 : Report.error (get_source_reference (token_length_in_chars), "\\u used with no following hex digits");
793 0 : } else if (digit_length < 4) {
794 0 : Report.error (get_source_reference (token_length_in_chars), "incomplete universal character name");
795 : }
796 : break;
797 : case 'x':
798 : // hexadecimal escape character requires two hex digits
799 0 : current++;
800 0 : token_length_in_chars++;
801 : int digit_length;
802 0 : for (digit_length = 0; current < end && current[0].isxdigit ();) {
803 0 : if (current[0] != '0') {
804 0 : digit_length++;
805 : }
806 0 : current++;
807 0 : token_length_in_chars++;
808 : }
809 0 : if (digit_length < 1) {
810 0 : Report.error (get_source_reference (token_length_in_chars), "\\x used with no following hex digits");
811 0 : } else if (digit_length > 2) {
812 0 : Report.error (get_source_reference (token_length_in_chars), "hex escape sequence out of range");
813 : }
814 : break;
815 : default:
816 0 : Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
817 : break;
818 : }
819 32 : } else if (current[0] == '\n') {
820 2 : current++;
821 2 : line++;
822 2 : column = 1;
823 2 : token_length_in_chars = 1;
824 : } else {
825 30 : unichar u = ((string) current).get_char_validated ((long) (end - current));
826 30 : if (u != (unichar) (-1)) {
827 30 : current += u.to_utf8 (null);
828 30 : token_length_in_chars++;
829 : } else {
830 0 : current++;
831 0 : Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
832 : }
833 : }
834 : }
835 12 : if (current >= end) {
836 0 : Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
837 0 : state_stack.length--;
838 0 : return read_token (out token_begin, out token_end);
839 : }
840 24 : state_stack += State.TEMPLATE_PART;
841 12 : break;
842 : }
843 : }
844 :
845 27 : if (token_length_in_chars < 0) {
846 12 : column += (int) (current - begin);
847 : } else {
848 15 : column += token_length_in_chars;
849 : }
850 :
851 27 : token_end = SourceLocation (current, line, column - 1);
852 :
853 27 : return type;
854 : }
855 :
856 :
857 2996 : public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
858 2996 : if (current == null) {
859 0 : token_begin = SourceLocation (current, line, column);
860 0 : token_end = SourceLocation (current, line, column);
861 0 : return TokenType.EOF;
862 : }
863 :
864 2996 : if (in_template () || in_verbatim_template ()) {
865 35 : return read_template_token (out token_begin, out token_end);
866 2961 : } else if (in_template_part ()) {
867 20 : state_stack.length--;
868 :
869 20 : token_begin = SourceLocation (current, line, column);
870 20 : token_end = SourceLocation (current, line, column - 1);
871 :
872 20 : return TokenType.COMMA;
873 2941 : } else if (in_regex_literal ()) {
874 2 : return read_regex_token (out token_begin, out token_end);
875 : }
876 :
877 :
878 :
879 : /* emit dedents if outstanding before checking any other chars */
880 :
881 2939 : if (pending_dedents > 0) {
882 24 : pending_dedents--;
883 24 : indent_level--;
884 :
885 24 : token_begin = SourceLocation (current, line, column);
886 24 : token_end = SourceLocation (current, line, column);
887 :
888 24 : last_token = TokenType.DEDENT;
889 :
890 24 : return TokenType.DEDENT;
891 : }
892 :
893 2915 : if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
894 : /* scrub whitespace (excluding newlines) and comments */
895 2912 : space ();
896 : }
897 :
898 :
899 : /* handle explicit line continuation (lines ending with "\") */
900 2915 : while (current < end && current[0] == '\\' && current[1] == '\n') {
901 0 : current += 2;
902 0 : line++;
903 0 : skip_space_tabs ();
904 : }
905 :
906 : /* handle automatic line continuations (when inside parens or braces) */
907 2923 : while (current < end && current[0] == '\n' && (open_parens_count > 0 || open_brace_count > 0)) {
908 8 : current++;
909 8 : line++;
910 8 : skip_space_tabs ();
911 : }
912 :
913 :
914 : /* handle non-consecutive new line once parsing is underway - EOL */
915 2915 : if (newline () && parse_started && last_token != TokenType.EOL && last_token != TokenType.SEMICOLON) {
916 476 : token_begin = SourceLocation (current, line, column);
917 476 : token_end = SourceLocation (current, line, column);
918 :
919 476 : last_token = TokenType.EOL;
920 :
921 476 : return TokenType.EOL;
922 : }
923 :
924 :
925 2631 : while (skip_newlines ()) {
926 490 : token_begin = SourceLocation (current, line, column);
927 :
928 490 : current_indent_level = count_tabs ();
929 :
930 : /* if its an empty new line then ignore */
931 490 : if (current_indent_level == -1) {
932 14 : continue;
933 : }
934 :
935 476 : if (current_indent_level > indent_level) {
936 161 : indent_level = current_indent_level;
937 :
938 161 : token_end = SourceLocation (current, line, column);
939 :
940 161 : last_token = TokenType.INDENT;
941 :
942 161 : return TokenType.INDENT;
943 315 : } else if (current_indent_level < indent_level) {
944 137 : indent_level--;
945 :
946 137 : pending_dedents = (indent_level - current_indent_level);
947 137 : token_end = SourceLocation (current, line, column);
948 :
949 137 : last_token = TokenType.DEDENT;
950 :
951 137 : return TokenType.DEDENT;
952 : }
953 : }
954 :
955 : TokenType type;
956 2141 : char* begin = current;
957 2141 : token_begin = SourceLocation (begin, line, column);
958 :
959 2141 : int token_length_in_chars = -1;
960 :
961 2141 : parse_started = true;
962 :
963 2141 : if (current >= end) {
964 85 : if (indent_level > 0) {
965 0 : indent_level--;
966 :
967 0 : pending_dedents = indent_level;
968 :
969 0 : type = TokenType.DEDENT;
970 : } else {
971 : type = TokenType.EOF;
972 : }
973 2056 : } else if (current[0].isalpha () || current[0] == '_') {
974 : int len = 0;
975 4886 : while (current < end && is_ident_char (current[0])) {
976 3916 : current++;
977 3916 : len++;
978 : }
979 970 : type = get_identifier_or_keyword (begin, len);
980 1086 : } else if (current[0] == '@') {
981 7 : if (current < end - 1 && current[1] == '"') {
982 7 : current += 1;
983 7 : if (current < end - 5 && current[1] == '"' && current[2] == '"') {
984 3 : current += 3;
985 6 : state_stack += State.VERBATIM_TEMPLATE;
986 : } else {
987 4 : current += 1;
988 8 : state_stack += State.TEMPLATE;
989 : }
990 : type = TokenType.OPEN_TEMPLATE;
991 : } else {
992 0 : token_begin.pos++; // @ is not part of the identifier
993 0 : current++;
994 0 : int len = 0;
995 0 : while (current < end && is_ident_char (current[0])) {
996 0 : current++;
997 0 : len++;
998 : }
999 : type = TokenType.IDENTIFIER;
1000 : }
1001 1079 : } else if (current[0].isdigit ()) {
1002 395 : while (current < end && current[0].isdigit ()) {
1003 249 : current++;
1004 : }
1005 146 : type = TokenType.INTEGER_LITERAL;
1006 146 : if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
1007 3 : current++;
1008 9 : while (current < end && current[0].isdigit ()) {
1009 6 : current++;
1010 : }
1011 3 : if (current < end && current[0].tolower () == 'e') {
1012 0 : current++;
1013 0 : if (current < end && (current[0] == '+' || current[0] == '-')) {
1014 0 : current++;
1015 : }
1016 0 : while (current < end && current[0].isdigit ()) {
1017 0 : current++;
1018 : }
1019 : }
1020 : type = TokenType.REAL_LITERAL;
1021 143 : } else if (current < end && current[0].tolower () == 'e') {
1022 0 : current++;
1023 0 : if (current < end && (current[0] == '+' || current[0] == '-')) {
1024 0 : current++;
1025 : }
1026 0 : while (current < end && current[0].isdigit ()) {
1027 0 : current++;
1028 : }
1029 : type = TokenType.REAL_LITERAL;
1030 143 : } else if (current < end && current == begin + 1
1031 97 : && begin[0] == '0'
1032 17 : && (begin[1] == 'x' || begin[1] == 'X')
1033 2 : && begin[2].isxdigit ()) {
1034 : // hexadecimal integer literal
1035 2 : current++;
1036 12 : while (current < end && current[0].isxdigit ()) {
1037 10 : current++;
1038 : }
1039 141 : } else if (current < end && current == begin + 1
1040 95 : && begin[0] == '0'
1041 15 : && (begin[1] == 'b' || begin[1] == 'B' || begin[1] == 'o' || begin[1] == 'O')
1042 3 : && begin[2].isdigit ()) {
1043 : // binary or octal integer literal
1044 3 : current++;
1045 27 : while (current < end && current[0].isdigit ()) {
1046 24 : current++;
1047 : }
1048 : }
1049 146 : if (current < end) {
1050 146 : bool real_literal = (type == TokenType.REAL_LITERAL);
1051 :
1052 146 : switch (current[0]) {
1053 : case 'l':
1054 : case 'L':
1055 3 : if (type == TokenType.INTEGER_LITERAL) {
1056 3 : current++;
1057 3 : if (current < end && current[0].tolower () == 'l') {
1058 3 : current++;
1059 : }
1060 : }
1061 : break;
1062 : case 'u':
1063 : case 'U':
1064 2 : if (type == TokenType.INTEGER_LITERAL) {
1065 2 : current++;
1066 2 : if (current < end && current[0].tolower () == 'l') {
1067 0 : current++;
1068 0 : if (current < end && current[0].tolower () == 'l') {
1069 0 : current++;
1070 : }
1071 : }
1072 : }
1073 : break;
1074 : case 'f':
1075 : case 'F':
1076 : case 'd':
1077 : case 'D':
1078 0 : type = TokenType.REAL_LITERAL;
1079 0 : current++;
1080 0 : break;
1081 : }
1082 :
1083 146 : if (!real_literal && is_ident_char (current[0])) {
1084 : // allow identifiers to start with a digit
1085 : // as long as they contain at least one char
1086 0 : while (current < end && is_ident_char (current[0])) {
1087 0 : current++;
1088 : }
1089 : type = TokenType.IDENTIFIER;
1090 : }
1091 : }
1092 : } else {
1093 933 : switch (current[0]) {
1094 : case '{':
1095 2 : type = TokenType.OPEN_BRACE;
1096 2 : open_brace_count++;
1097 4 : state_stack += State.BRACE;
1098 2 : current++;
1099 2 : break;
1100 : case '}':
1101 2 : type = TokenType.CLOSE_BRACE;
1102 2 : open_brace_count--;
1103 2 : if (state_stack.length > 0) {
1104 2 : state_stack.length--;
1105 : }
1106 2 : current++;
1107 2 : break;
1108 : case '(':
1109 194 : type = TokenType.OPEN_PARENS;
1110 194 : open_parens_count++;
1111 388 : state_stack += State.PARENS;
1112 194 : current++;
1113 194 : break;
1114 : case ')':
1115 202 : type = TokenType.CLOSE_PARENS;
1116 202 : open_parens_count--;
1117 202 : current++;
1118 202 : if (state_stack.length > 0) {
1119 202 : state_stack.length--;
1120 : }
1121 202 : if (in_template () || in_verbatim_template ()) {
1122 : type = TokenType.COMMA;
1123 : }
1124 : break;
1125 : case '[':
1126 16 : type = TokenType.OPEN_BRACKET;
1127 32 : state_stack += State.BRACKET;
1128 16 : current++;
1129 16 : break;
1130 : case ']':
1131 16 : type = TokenType.CLOSE_BRACKET;
1132 16 : if (state_stack.length > 0) {
1133 16 : state_stack.length--;
1134 : }
1135 16 : current++;
1136 16 : break;
1137 : case '.':
1138 41 : type = TokenType.DOT;
1139 41 : current++;
1140 41 : if (current < end - 1) {
1141 41 : if (current[0] == '.' && current[1] == '.') {
1142 0 : type = TokenType.ELLIPSIS;
1143 0 : current += 2;
1144 : }
1145 : }
1146 : break;
1147 : case ':':
1148 98 : type = TokenType.COLON;
1149 98 : current++;
1150 98 : break;
1151 : case ',':
1152 11 : type = TokenType.COMMA;
1153 11 : current++;
1154 11 : break;
1155 : case ';':
1156 1 : type = TokenType.SEMICOLON;
1157 1 : current++;
1158 1 : break;
1159 : case '#':
1160 0 : type = TokenType.HASH;
1161 0 : current++;
1162 0 : break;
1163 : case '?':
1164 2 : type = TokenType.INTERR;
1165 2 : current++;
1166 2 : break;
1167 : case '|':
1168 3 : type = TokenType.BITWISE_OR;
1169 3 : current++;
1170 3 : if (current < end) {
1171 3 : switch (current[0]) {
1172 : case '=':
1173 1 : type = TokenType.ASSIGN_BITWISE_OR;
1174 1 : current++;
1175 1 : break;
1176 : case '|':
1177 1 : type = TokenType.OP_OR;
1178 1 : current++;
1179 1 : break;
1180 : }
1181 : }
1182 : break;
1183 : case '&':
1184 3 : type = TokenType.BITWISE_AND;
1185 3 : current++;
1186 3 : if (current < end) {
1187 3 : switch (current[0]) {
1188 : case '=':
1189 1 : type = TokenType.ASSIGN_BITWISE_AND;
1190 1 : current++;
1191 1 : break;
1192 : case '&':
1193 1 : type = TokenType.OP_AND;
1194 1 : current++;
1195 1 : break;
1196 : }
1197 : }
1198 : break;
1199 : case '^':
1200 2 : type = TokenType.CARRET;
1201 2 : current++;
1202 2 : if (current < end && current[0] == '=') {
1203 1 : type = TokenType.ASSIGN_BITWISE_XOR;
1204 1 : current++;
1205 : }
1206 : break;
1207 : case '~':
1208 1 : type = TokenType.TILDE;
1209 1 : current++;
1210 1 : break;
1211 : case '=':
1212 209 : type = TokenType.ASSIGN;
1213 209 : current++;
1214 209 : if (current < end) {
1215 209 : switch (current[0]) {
1216 : case '=':
1217 83 : type = TokenType.OP_EQ;
1218 83 : current++;
1219 83 : break;
1220 : case '>':
1221 0 : type = TokenType.LAMBDA;
1222 0 : current++;
1223 0 : break;
1224 : }
1225 : }
1226 : break;
1227 : case '<':
1228 8 : type = TokenType.OP_LT;
1229 8 : current++;
1230 8 : if (current < end) {
1231 8 : switch (current[0]) {
1232 : case '=':
1233 2 : type = TokenType.OP_LE;
1234 2 : current++;
1235 2 : break;
1236 : case '<':
1237 2 : type = TokenType.OP_SHIFT_LEFT;
1238 2 : current++;
1239 2 : if (current < end && current[0] == '=') {
1240 1 : type = TokenType.ASSIGN_SHIFT_LEFT;
1241 1 : current++;
1242 : }
1243 : break;
1244 : }
1245 : }
1246 : break;
1247 : case '>':
1248 11 : type = TokenType.OP_GT;
1249 11 : current++;
1250 11 : if (current < end && current[0] == '=') {
1251 4 : type = TokenType.OP_GE;
1252 4 : current++;
1253 : }
1254 : break;
1255 : case '!':
1256 8 : type = TokenType.OP_NEG;
1257 8 : current++;
1258 8 : if (current < end && current[0] == '=') {
1259 4 : type = TokenType.OP_NE;
1260 4 : current++;
1261 : }
1262 : break;
1263 : case '+':
1264 12 : type = TokenType.PLUS;
1265 12 : current++;
1266 12 : if (current < end) {
1267 12 : switch (current[0]) {
1268 : case '=':
1269 2 : type = TokenType.ASSIGN_ADD;
1270 2 : current++;
1271 2 : break;
1272 : case '+':
1273 6 : type = TokenType.OP_INC;
1274 6 : current++;
1275 6 : break;
1276 : }
1277 : }
1278 : break;
1279 : case '-':
1280 9 : type = TokenType.MINUS;
1281 9 : current++;
1282 9 : if (current < end) {
1283 9 : switch (current[0]) {
1284 : case '=':
1285 1 : type = TokenType.ASSIGN_SUB;
1286 1 : current++;
1287 1 : break;
1288 : case '-':
1289 3 : type = TokenType.OP_DEC;
1290 3 : current++;
1291 3 : break;
1292 : case '>':
1293 0 : type = TokenType.OP_PTR;
1294 0 : current++;
1295 0 : break;
1296 : }
1297 : }
1298 : break;
1299 : case '*':
1300 2 : type = TokenType.STAR;
1301 2 : current++;
1302 2 : if (current < end && current[0] == '=') {
1303 1 : type = TokenType.ASSIGN_MUL;
1304 1 : current++;
1305 : }
1306 : break;
1307 : case '/':
1308 3 : switch (last_token) {
1309 : case TokenType.ASSIGN:
1310 : case TokenType.COMMA:
1311 : case TokenType.MINUS:
1312 : case TokenType.OP_AND:
1313 : case TokenType.OP_EQ:
1314 : case TokenType.OP_GE:
1315 : case TokenType.OP_GT:
1316 : case TokenType.OP_INC:
1317 : case TokenType.OP_LE:
1318 : case TokenType.OP_LT:
1319 : case TokenType.OP_NE:
1320 : case TokenType.OP_NEG:
1321 : case TokenType.OP_OR:
1322 : case TokenType.OPEN_BRACE:
1323 : case TokenType.OPEN_PARENS:
1324 : case TokenType.PLUS:
1325 : case TokenType.RETURN:
1326 1 : type = TokenType.OPEN_REGEX_LITERAL;
1327 2 : state_stack += State.REGEX_LITERAL;
1328 1 : current++;
1329 1 : break;
1330 : default:
1331 2 : type = TokenType.DIV;
1332 2 : current++;
1333 2 : if (current < end && current[0] == '=') {
1334 1 : type = TokenType.ASSIGN_DIV;
1335 1 : current++;
1336 : }
1337 : break;
1338 : }
1339 : break;
1340 :
1341 : case '%':
1342 2 : type = TokenType.PERCENT;
1343 2 : current++;
1344 2 : if (current < end && current[0] == '=') {
1345 1 : type = TokenType.ASSIGN_PERCENT;
1346 1 : current++;
1347 : }
1348 : break;
1349 : case '\'':
1350 : case '"':
1351 75 : if (begin[0] == '\'') {
1352 : type = TokenType.CHARACTER_LITERAL;
1353 73 : } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1354 1 : type = TokenType.VERBATIM_STRING_LITERAL;
1355 1 : token_length_in_chars = 6;
1356 1 : current += 3;
1357 18 : while (current < end - 4) {
1358 18 : if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
1359 : break;
1360 17 : } else if (current[0] == '\n') {
1361 2 : current++;
1362 2 : line++;
1363 2 : column = 1;
1364 2 : token_length_in_chars = 3;
1365 : } else {
1366 15 : unichar u = ((string) current).get_char_validated ((long) (end - current));
1367 15 : if (u != (unichar) (-1)) {
1368 15 : current += u.to_utf8 (null);
1369 15 : token_length_in_chars++;
1370 : } else {
1371 0 : Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1372 : }
1373 : }
1374 : }
1375 1 : if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1376 1 : current += 3;
1377 : } else {
1378 0 : Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"\"\"");
1379 : }
1380 : break;
1381 : } else {
1382 : type = TokenType.STRING_LITERAL;
1383 : }
1384 74 : token_length_in_chars = 2;
1385 74 : current++;
1386 390 : while (current < end && current[0] != begin[0]) {
1387 316 : if (current[0] == '\\') {
1388 7 : current++;
1389 7 : token_length_in_chars++;
1390 7 : if (current >= end) {
1391 : break;
1392 : }
1393 :
1394 7 : switch (current[0]) {
1395 : case '\'':
1396 : case '"':
1397 : case '\\':
1398 : case '0':
1399 : case 'b':
1400 : case 'f':
1401 : case 'n':
1402 : case 'r':
1403 : case 't':
1404 : case 'v':
1405 7 : current++;
1406 7 : token_length_in_chars++;
1407 7 : break;
1408 : case 'u':
1409 : // u escape character has four hex digits
1410 0 : current++;
1411 0 : token_length_in_chars++;
1412 : int digit_length;
1413 0 : for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
1414 0 : current++;
1415 0 : token_length_in_chars++;
1416 : }
1417 0 : if (digit_length < 1) {
1418 0 : Report.error (get_source_reference (token_length_in_chars), "\\u used with no following hex digits");
1419 0 : } else if (digit_length < 4) {
1420 0 : Report.error (get_source_reference (token_length_in_chars), "incomplete universal character name");
1421 : }
1422 : break;
1423 : case 'x':
1424 : // hexadecimal escape character requires two hex digits
1425 0 : current++;
1426 0 : token_length_in_chars++;
1427 : int digit_length;
1428 0 : for (digit_length = 0; current < end && current[0].isxdigit ();) {
1429 0 : if (current[0] != '0') {
1430 0 : digit_length++;
1431 : }
1432 0 : current++;
1433 0 : token_length_in_chars++;
1434 : }
1435 0 : if (digit_length < 1) {
1436 0 : Report.error (get_source_reference (token_length_in_chars), "\\x used with no following hex digits");
1437 0 : } else if (digit_length > 2) {
1438 0 : Report.error (get_source_reference (token_length_in_chars), "hex escape sequence out of range");
1439 : }
1440 : break;
1441 : default:
1442 0 : Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
1443 : break;
1444 : }
1445 309 : } else if (current[0] == '\n') {
1446 0 : current++;
1447 0 : line++;
1448 0 : column = 1;
1449 0 : token_length_in_chars = 1;
1450 : } else {
1451 309 : unichar u = ((string) current).get_char_validated ((long) (end - current));
1452 309 : if (u != (unichar) (-1)) {
1453 309 : current += u.to_utf8 (null);
1454 309 : token_length_in_chars++;
1455 : } else {
1456 0 : current++;
1457 0 : Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1458 : }
1459 : }
1460 316 : if (current < end && begin[0] == '\'' && current[0] != '\'') {
1461 : // multiple characters in single character literal
1462 0 : Report.error (get_source_reference (token_length_in_chars), "invalid character literal");
1463 : }
1464 : }
1465 74 : if (current < end) {
1466 74 : current++;
1467 : } else {
1468 0 : Report.error (get_source_reference (token_length_in_chars), "syntax error, expected %c", begin[0]);
1469 : }
1470 : break;
1471 : default:
1472 0 : unichar u = ((string) current).get_char_validated ((long) (end - current));
1473 0 : if (u != (unichar) (-1)) {
1474 0 : current += u.to_utf8 (null);
1475 0 : Report.error (get_source_reference (0), "syntax error, unexpected character");
1476 : } else {
1477 0 : current++;
1478 0 : Report.error (get_source_reference (0), "invalid UTF-8 character");
1479 : }
1480 0 : column++;
1481 0 : return read_token (out token_begin, out token_end);
1482 : }
1483 : }
1484 :
1485 2141 : if (token_length_in_chars < 0) {
1486 2066 : column += (int) (current - begin);
1487 : } else {
1488 75 : column += token_length_in_chars;
1489 : }
1490 :
1491 2141 : token_end = SourceLocation (current, line, column - 1);
1492 2141 : last_token = type;
1493 :
1494 2141 : return type;
1495 : }
1496 :
1497 490 : int count_tabs ()
1498 : {
1499 :
1500 490 : int tab_count = 0;
1501 :
1502 :
1503 490 : if (_indent_spaces == 0) {
1504 908 : while (current < end && current[0] == '\t') {
1505 421 : current++;
1506 421 : column++;
1507 421 : tab_count++;
1508 : }
1509 : } else {
1510 : int space_count = 0;
1511 5 : while (current < end && current[0] == ' ') {
1512 2 : current++;
1513 2 : column++;
1514 2 : space_count++;
1515 : }
1516 :
1517 3 : tab_count = space_count / _indent_spaces;
1518 :
1519 : }
1520 :
1521 : /* ignore comments and whitespace and other lines that contain no code */
1522 :
1523 490 : space ();
1524 :
1525 490 : if ((current < end) && (current[0] == '\n')) return -1;
1526 :
1527 490 : return tab_count;
1528 : }
1529 :
1530 564 : static bool matches (char* begin, string keyword) {
1531 564 : char* keyword_array = (char *) keyword;
1532 564 : long len = keyword.length;
1533 2810 : for (int i = 0; i < len; i++) {
1534 2378 : if (begin[i] != keyword_array[i]) {
1535 : return false;
1536 : }
1537 : }
1538 : return true;
1539 : }
1540 :
1541 4463 : bool whitespace () {
1542 : bool found = false;
1543 5463 : while (current < end && current[0].isspace () && current[0] != '\n' ) {
1544 :
1545 1000 : found = true;
1546 1000 : current++;
1547 1000 : column++;
1548 : }
1549 :
1550 4463 : if ((column == 1) && (current < end) && (current[0] == '#')) {
1551 22 : pp_directive ();
1552 22 : return true;
1553 : }
1554 :
1555 4463 : return found;
1556 : }
1557 :
1558 6097 : inline bool newline () {
1559 6097 : if (current[0] == '\n') {
1560 : return true;
1561 : }
1562 :
1563 : return false;
1564 : }
1565 :
1566 2631 : bool skip_newlines () {
1567 : bool new_lines = false;
1568 :
1569 3182 : while (newline ()) {
1570 551 : current++;
1571 :
1572 551 : line++;
1573 551 : column = 1;
1574 551 : current_indent_level = 0;
1575 :
1576 551 : new_lines = true;
1577 : }
1578 :
1579 : return new_lines;
1580 : }
1581 :
1582 3578 : bool comment (bool file_comment = false) {
1583 3578 : if (current == null
1584 3578 : || current > end - 2
1585 3240 : || current[0] != '/'
1586 3 : || (current[1] != '/' && current[1] != '*')) {
1587 3578 : return false;
1588 : }
1589 :
1590 :
1591 0 : if (current[1] == '/') {
1592 : // single-line comment
1593 :
1594 0 : SourceReference source_reference = null;
1595 0 : if (file_comment) {
1596 0 : source_reference = get_source_reference (0);
1597 : }
1598 :
1599 0 : current += 2;
1600 :
1601 : // skip until end of line or end of file
1602 0 : while (current < end && current[0] != '\n') {
1603 0 : current++;
1604 : }
1605 :
1606 : /* do not ignore EOL if comment does not exclusively occupy the line */
1607 0 : if (current[0] == '\n' && last_token == TokenType.EOL) {
1608 0 : current++;
1609 0 : line++;
1610 0 : column = 1;
1611 0 : current_indent_level = 0;
1612 : }
1613 :
1614 0 : if (source_reference != null) {
1615 0 : push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1616 : }
1617 :
1618 : } else {
1619 : // delimited comment
1620 0 : SourceReference source_reference = null;
1621 0 : if (file_comment && current[2] == '*') {
1622 3578 : return false;
1623 : }
1624 :
1625 0 : if (current[2] == '*' || file_comment) {
1626 0 : source_reference = get_source_reference (0);
1627 : }
1628 :
1629 0 : current += 2;
1630 0 : char* begin = current;
1631 :
1632 0 : while (current < end - 1
1633 0 : && (current[0] != '*' || current[1] != '/')) {
1634 0 : if (current[0] == '\n') {
1635 0 : line++;
1636 0 : column = 0;
1637 : }
1638 0 : current++;
1639 0 : column++;
1640 : }
1641 0 : if (current == end - 1) {
1642 0 : Report.error (get_source_reference (0), "syntax error, expected */");
1643 0 : return true;
1644 : }
1645 :
1646 0 : if (source_reference != null) {
1647 0 : string comment = ((string) begin).substring (0, (long) (current - begin));
1648 0 : push_comment (comment, source_reference, file_comment);
1649 : }
1650 :
1651 0 : current += 2;
1652 0 : column += 2;
1653 : }
1654 :
1655 3578 : return true;
1656 : }
1657 :
1658 8 : bool skip_tabs () {
1659 : bool found = false;
1660 8 : while (current < end && current[0] == '\t' ) {
1661 0 : current++;
1662 0 : column++;
1663 0 : found = true;
1664 : }
1665 :
1666 : return found;
1667 : }
1668 :
1669 8 : void skip_space_tabs () {
1670 20 : while (whitespace () || skip_tabs () || comment () ) {
1671 : }
1672 :
1673 : }
1674 :
1675 3402 : void space () {
1676 4364 : while (whitespace () || comment ()) {
1677 : }
1678 : }
1679 :
1680 85 : public void parse_file_comments () {
1681 85 : while (whitespace () || comment (true)) {
1682 : }
1683 :
1684 : }
1685 :
1686 0 : void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1687 0 : if (comment_item[0] == '*') {
1688 0 : if (_comment != null) {
1689 : // extra doc comment, add it to source file comments
1690 0 : source_file.add_comment (_comment);
1691 : }
1692 0 : _comment = new Comment (comment_item, source_reference);
1693 : }
1694 :
1695 0 : if (file_comment) {
1696 0 : source_file.add_comment (new Comment (comment_item, source_reference));
1697 0 : _comment = null;
1698 : }
1699 : }
1700 :
1701 : /**
1702 : * Clears and returns the content of the comment stack.
1703 : *
1704 : * @return saved comment
1705 : */
1706 456 : public Comment? pop_comment () {
1707 456 : if (_comment == null) {
1708 456 : return null;
1709 : }
1710 :
1711 0 : var comment = _comment;
1712 0 : _comment = null;
1713 0 : return comment;
1714 : }
1715 :
1716 97 : bool pp_whitespace () {
1717 : bool found = false;
1718 111 : while (current < end && current[0].isspace () && current[0] != '\n') {
1719 14 : found = true;
1720 14 : current++;
1721 14 : column++;
1722 : }
1723 : return found;
1724 : }
1725 :
1726 83 : void pp_space () {
1727 97 : while (pp_whitespace () || comment ()) {
1728 : }
1729 : }
1730 :
1731 22 : void pp_directive () {
1732 : // hash sign
1733 22 : current++;
1734 22 : column++;
1735 :
1736 22 : pp_space ();
1737 :
1738 22 : char* begin = current;
1739 22 : int len = 0;
1740 103 : while (current < end && current[0].isalnum ()) {
1741 81 : current++;
1742 81 : column++;
1743 81 : len++;
1744 : }
1745 :
1746 22 : if (len == 2 && matches (begin, "if")) {
1747 7 : parse_pp_if ();
1748 15 : } else if (len == 4 && matches (begin, "elif")) {
1749 1 : parse_pp_elif ();
1750 14 : } else if (len == 4 && matches (begin, "else")) {
1751 7 : parse_pp_else ();
1752 7 : } else if (len == 5 && matches (begin, "endif")) {
1753 7 : parse_pp_endif ();
1754 : } else {
1755 0 : Report.error (get_source_reference (-len, len), "syntax error, invalid preprocessing directive");
1756 : }
1757 :
1758 22 : if (conditional_stack.length > 0
1759 15 : && conditional_stack[conditional_stack.length - 1].skip_section) {
1760 : // skip lines until next preprocessing directive
1761 : bool bol = false;
1762 195 : while (current < end) {
1763 195 : if (bol && current < end && current[0] == '#') {
1764 : // go back to begin of line
1765 8 : current -= (column - 1);
1766 8 : column = 1;
1767 8 : return;
1768 : }
1769 187 : if (current[0] == '\n') {
1770 16 : line++;
1771 16 : column = 0;
1772 16 : bol = true;
1773 171 : } else if (!current[0].isspace ()) {
1774 157 : bol = false;
1775 : }
1776 187 : current++;
1777 187 : column++;
1778 : }
1779 : }
1780 : }
1781 :
1782 22 : void pp_eol () {
1783 22 : pp_space ();
1784 22 : if (current >= end || current[0] != '\n') {
1785 0 : Report.error (get_source_reference (0), "syntax error, expected newline");
1786 : }
1787 : }
1788 :
1789 7 : void parse_pp_if () {
1790 7 : pp_space ();
1791 :
1792 7 : bool condition = parse_pp_expression ();
1793 :
1794 7 : pp_eol ();
1795 :
1796 14 : conditional_stack += Conditional ();
1797 :
1798 7 : if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1799 : // condition true => process code within if
1800 2 : conditional_stack[conditional_stack.length - 1].matched = true;
1801 : } else {
1802 : // skip lines until next preprocessing directive
1803 5 : conditional_stack[conditional_stack.length - 1].skip_section = true;
1804 : }
1805 : }
1806 :
1807 1 : void parse_pp_elif () {
1808 1 : pp_space ();
1809 :
1810 1 : bool condition = parse_pp_expression ();
1811 :
1812 1 : pp_eol ();
1813 :
1814 1 : if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1815 0 : Report.error (get_source_reference (0), "syntax error, unexpected #elif");
1816 0 : return;
1817 : }
1818 :
1819 1 : if (condition && !conditional_stack[conditional_stack.length - 1].matched
1820 1 : && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1821 : // condition true => process code within if
1822 1 : conditional_stack[conditional_stack.length - 1].matched = true;
1823 1 : conditional_stack[conditional_stack.length - 1].skip_section = false;
1824 : } else {
1825 : // skip lines until next preprocessing directive
1826 0 : conditional_stack[conditional_stack.length - 1].skip_section = true;
1827 : }
1828 : }
1829 :
1830 7 : void parse_pp_else () {
1831 7 : pp_eol ();
1832 :
1833 7 : if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1834 0 : Report.error (get_source_reference (0), "syntax error, unexpected #else");
1835 0 : return;
1836 : }
1837 :
1838 7 : if (!conditional_stack[conditional_stack.length - 1].matched
1839 4 : && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1840 : // condition true => process code within if
1841 4 : conditional_stack[conditional_stack.length - 1].matched = true;
1842 4 : conditional_stack[conditional_stack.length - 1].skip_section = false;
1843 : } else {
1844 : // skip lines until next preprocessing directive
1845 3 : conditional_stack[conditional_stack.length - 1].skip_section = true;
1846 : }
1847 : }
1848 :
1849 7 : void parse_pp_endif () {
1850 7 : pp_eol ();
1851 :
1852 7 : if (conditional_stack.length == 0) {
1853 0 : Report.error (get_source_reference (0), "syntax error, unexpected #endif");
1854 0 : return;
1855 : }
1856 :
1857 7 : conditional_stack.length--;
1858 : }
1859 :
1860 11 : bool parse_pp_symbol () {
1861 : int len = 0;
1862 84 : while (current < end && is_ident_char (current[0])) {
1863 73 : current++;
1864 73 : column++;
1865 73 : len++;
1866 : }
1867 :
1868 11 : if (len == 0) {
1869 0 : Report.error (get_source_reference (0), "syntax error, expected identifier");
1870 0 : return false;
1871 : }
1872 :
1873 11 : string identifier = ((string) (current - len)).substring (0, len);
1874 : bool defined;
1875 11 : if (identifier == "true") {
1876 : defined = true;
1877 10 : } else if (identifier == "false") {
1878 : defined = false;
1879 : } else {
1880 10 : defined = source_file.context.is_defined (identifier);
1881 : }
1882 :
1883 11 : return defined;
1884 : }
1885 :
1886 11 : bool parse_pp_primary_expression () {
1887 11 : if (current >= end) {
1888 0 : Report.error (get_source_reference (0), "syntax error, expected identifier");
1889 11 : } else if (is_ident_char (current[0])) {
1890 11 : return parse_pp_symbol ();
1891 0 : } else if (current[0] == '(') {
1892 0 : current++;
1893 0 : column++;
1894 0 : pp_space ();
1895 0 : bool result = parse_pp_expression ();
1896 0 : pp_space ();
1897 0 : if (current < end && current[0] == ')') {
1898 0 : current++;
1899 0 : column++;
1900 : } else {
1901 0 : Report.error (get_source_reference (0), "syntax error, expected `)'");
1902 : }
1903 0 : return result;
1904 : } else {
1905 0 : Report.error (get_source_reference (0), "syntax error, expected identifier");
1906 : }
1907 11 : return false;
1908 : }
1909 :
1910 12 : bool parse_pp_unary_expression () {
1911 12 : if (current < end && current[0] == '!') {
1912 1 : current++;
1913 1 : column++;
1914 1 : pp_space ();
1915 1 : return !parse_pp_unary_expression ();
1916 : }
1917 :
1918 11 : return parse_pp_primary_expression ();
1919 : }
1920 :
1921 10 : bool parse_pp_equality_expression () {
1922 10 : bool left = parse_pp_unary_expression ();
1923 10 : pp_space ();
1924 11 : while (true) {
1925 11 : if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1926 1 : current += 2;
1927 1 : column += 2;
1928 1 : pp_space ();
1929 1 : bool right = parse_pp_unary_expression ();
1930 1 : left = (left == right);
1931 10 : } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1932 0 : current += 2;
1933 0 : column += 2;
1934 0 : pp_space ();
1935 0 : bool right = parse_pp_unary_expression ();
1936 0 : left = (left != right);
1937 : } else {
1938 : break;
1939 : }
1940 : }
1941 : return left;
1942 : }
1943 :
1944 9 : bool parse_pp_and_expression () {
1945 9 : bool left = parse_pp_equality_expression ();
1946 9 : pp_space ();
1947 10 : while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1948 1 : current += 2;
1949 1 : column += 2;
1950 1 : pp_space ();
1951 1 : bool right = parse_pp_equality_expression ();
1952 1 : left = left && right;
1953 : }
1954 : return left;
1955 : }
1956 :
1957 8 : bool parse_pp_or_expression () {
1958 8 : bool left = parse_pp_and_expression ();
1959 8 : pp_space ();
1960 9 : while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1961 1 : current += 2;
1962 1 : column += 2;
1963 1 : pp_space ();
1964 1 : bool right = parse_pp_and_expression ();
1965 1 : left = left || right;
1966 : }
1967 : return left;
1968 : }
1969 :
1970 8 : bool parse_pp_expression () {
1971 8 : return parse_pp_or_expression ();
1972 : }
1973 : }
1974 :
|