Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/modules/gdscript/gdscript_tokenizer.h
10277 views
1
/**************************************************************************/
2
/* gdscript_tokenizer.h */
3
/**************************************************************************/
4
/* This file is part of: */
5
/* GODOT ENGINE */
6
/* https://godotengine.org */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10
/* */
11
/* Permission is hereby granted, free of charge, to any person obtaining */
12
/* a copy of this software and associated documentation files (the */
13
/* "Software"), to deal in the Software without restriction, including */
14
/* without limitation the rights to use, copy, modify, merge, publish, */
15
/* distribute, sublicense, and/or sell copies of the Software, and to */
16
/* permit persons to whom the Software is furnished to do so, subject to */
17
/* the following conditions: */
18
/* */
19
/* The above copyright notice and this permission notice shall be */
20
/* included in all copies or substantial portions of the Software. */
21
/* */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29
/**************************************************************************/
30
31
#pragma once
32
33
#include "core/templates/hash_map.h"
34
#include "core/templates/list.h"
35
#include "core/templates/vector.h"
36
#include "core/variant/variant.h"
37
38
class GDScriptTokenizer {
39
public:
40
enum CursorPlace {
41
CURSOR_NONE,
42
CURSOR_BEGINNING,
43
CURSOR_MIDDLE,
44
CURSOR_END,
45
};
46
47
struct Token {
48
// If this enum changes, please increment the TOKENIZER_VERSION in gdscript_tokenizer_buffer.h
49
enum Type {
50
EMPTY,
51
// Basic
52
ANNOTATION,
53
IDENTIFIER,
54
LITERAL,
55
// Comparison
56
LESS,
57
LESS_EQUAL,
58
GREATER,
59
GREATER_EQUAL,
60
EQUAL_EQUAL,
61
BANG_EQUAL,
62
// Logical
63
AND,
64
OR,
65
NOT,
66
AMPERSAND_AMPERSAND,
67
PIPE_PIPE,
68
BANG,
69
// Bitwise
70
AMPERSAND,
71
PIPE,
72
TILDE,
73
CARET,
74
LESS_LESS,
75
GREATER_GREATER,
76
// Math
77
PLUS,
78
MINUS,
79
STAR,
80
STAR_STAR,
81
SLASH,
82
PERCENT,
83
// Assignment
84
EQUAL,
85
PLUS_EQUAL,
86
MINUS_EQUAL,
87
STAR_EQUAL,
88
STAR_STAR_EQUAL,
89
SLASH_EQUAL,
90
PERCENT_EQUAL,
91
LESS_LESS_EQUAL,
92
GREATER_GREATER_EQUAL,
93
AMPERSAND_EQUAL,
94
PIPE_EQUAL,
95
CARET_EQUAL,
96
// Control flow
97
IF,
98
ELIF,
99
ELSE,
100
FOR,
101
WHILE,
102
BREAK,
103
CONTINUE,
104
PASS,
105
RETURN,
106
MATCH,
107
WHEN,
108
// Keywords
109
AS,
110
ASSERT,
111
AWAIT,
112
BREAKPOINT,
113
CLASS,
114
CLASS_NAME,
115
TK_CONST, // Conflict with WinAPI.
116
ENUM,
117
EXTENDS,
118
FUNC,
119
TK_IN, // Conflict with WinAPI.
120
IS,
121
NAMESPACE,
122
PRELOAD,
123
SELF,
124
SIGNAL,
125
STATIC,
126
SUPER,
127
TRAIT,
128
VAR,
129
TK_VOID, // Conflict with WinAPI.
130
YIELD,
131
// Punctuation
132
BRACKET_OPEN,
133
BRACKET_CLOSE,
134
BRACE_OPEN,
135
BRACE_CLOSE,
136
PARENTHESIS_OPEN,
137
PARENTHESIS_CLOSE,
138
COMMA,
139
SEMICOLON,
140
PERIOD,
141
PERIOD_PERIOD,
142
PERIOD_PERIOD_PERIOD,
143
COLON,
144
DOLLAR,
145
FORWARD_ARROW,
146
UNDERSCORE,
147
// Whitespace
148
NEWLINE,
149
INDENT,
150
DEDENT,
151
// Constants
152
CONST_PI,
153
CONST_TAU,
154
CONST_INF,
155
CONST_NAN,
156
// Error message improvement
157
VCS_CONFLICT_MARKER,
158
BACKTICK,
159
QUESTION_MARK,
160
// Special
161
ERROR,
162
TK_EOF, // "EOF" is reserved
163
TK_MAX
164
};
165
166
Type type = EMPTY;
167
Variant literal;
168
int start_line = 0, end_line = 0, start_column = 0, end_column = 0;
169
int cursor_position = -1;
170
CursorPlace cursor_place = CURSOR_NONE;
171
String source;
172
173
const char *get_name() const;
174
String get_debug_name() const;
175
bool can_precede_bin_op() const;
176
bool is_identifier() const;
177
bool is_node_name() const;
178
StringName get_identifier() const { return literal; }
179
180
Token(Type p_type) {
181
type = p_type;
182
}
183
184
Token() {}
185
};
186
187
#ifdef TOOLS_ENABLED
188
struct CommentData {
189
String comment;
190
// true: Comment starts at beginning of line or after indentation.
191
// false: Inline comment (starts after some code).
192
bool new_line = false;
193
CommentData() {}
194
CommentData(const String &p_comment, bool p_new_line) {
195
comment = p_comment;
196
new_line = p_new_line;
197
}
198
};
199
virtual const HashMap<int, CommentData> &get_comments() const = 0;
200
#endif // TOOLS_ENABLED
201
202
static String get_token_name(Token::Type p_token_type);
203
204
#ifdef TOOLS_ENABLED
205
// This is a temporary solution, as Tokens are not able to store their position, only lines and columns.
206
virtual int get_current_position() const { return 0; }
207
virtual String get_source_code() const { return ""; }
208
#endif // TOOLS_ENABLED
209
210
virtual int get_cursor_line() const = 0;
211
virtual int get_cursor_column() const = 0;
212
virtual void set_cursor_position(int p_line, int p_column) = 0;
213
virtual void set_multiline_mode(bool p_state) = 0;
214
virtual bool is_past_cursor() const = 0;
215
virtual void push_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.
216
virtual void pop_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.
217
virtual bool is_text() = 0;
218
219
virtual Token scan() = 0;
220
221
virtual ~GDScriptTokenizer() {}
222
};
223
224
class GDScriptTokenizerText : public GDScriptTokenizer {
225
String source;
226
const char32_t *_source = nullptr;
227
const char32_t *_current = nullptr;
228
int line = -1, column = -1;
229
int cursor_line = -1, cursor_column = -1;
230
int tab_size = 4;
231
232
// Keep track of multichar tokens.
233
const char32_t *_start = nullptr;
234
int start_line = 0, start_column = 0;
235
236
// Info cache.
237
bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
238
bool multiline_mode = false;
239
List<Token> error_stack;
240
bool pending_newline = false;
241
Token last_token;
242
Token last_newline;
243
int pending_indents = 0;
244
List<int> indent_stack;
245
List<List<int>> indent_stack_stack; // For lambdas, which require manipulating the indentation point.
246
List<char32_t> paren_stack;
247
char32_t indent_char = '\0';
248
int position = 0;
249
int length = 0;
250
Vector<int> continuation_lines;
251
#ifdef DEBUG_ENABLED
252
Vector<String> keyword_list;
253
#endif // DEBUG_ENABLED
254
255
#ifdef TOOLS_ENABLED
256
HashMap<int, CommentData> comments;
257
#endif // TOOLS_ENABLED
258
259
_FORCE_INLINE_ bool _is_at_end() { return position >= length; }
260
_FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
261
int indent_level() const { return indent_stack.size(); }
262
bool has_error() const { return !error_stack.is_empty(); }
263
Token pop_error();
264
char32_t _advance();
265
String _get_indent_char_name(char32_t ch);
266
void _skip_whitespace();
267
void check_indent();
268
269
#ifdef DEBUG_ENABLED
270
void make_keyword_list();
271
#endif // DEBUG_ENABLED
272
273
Token make_error(const String &p_message);
274
void push_error(const String &p_message);
275
void push_error(const Token &p_error);
276
Token make_paren_error(char32_t p_paren);
277
Token make_token(Token::Type p_type);
278
Token make_literal(const Variant &p_literal);
279
Token make_identifier(const StringName &p_identifier);
280
Token check_vcs_marker(char32_t p_test, Token::Type p_double_type);
281
void push_paren(char32_t p_char);
282
bool pop_paren(char32_t p_expected);
283
284
void newline(bool p_make_token);
285
Token number();
286
Token potential_identifier();
287
Token string();
288
Token annotation();
289
290
public:
291
void set_source_code(const String &p_source_code);
292
293
const Vector<int> &get_continuation_lines() const { return continuation_lines; }
294
295
#ifdef TOOLS_ENABLED
296
virtual int get_current_position() const override { return position; }
297
virtual String get_source_code() const override { return source; }
298
#endif // TOOLS_ENABLED
299
300
virtual int get_cursor_line() const override;
301
virtual int get_cursor_column() const override;
302
virtual void set_cursor_position(int p_line, int p_column) override;
303
virtual void set_multiline_mode(bool p_state) override;
304
virtual bool is_past_cursor() const override;
305
virtual void push_expression_indented_block() override; // For lambdas, or blocks inside expressions.
306
virtual void pop_expression_indented_block() override; // For lambdas, or blocks inside expressions.
307
virtual bool is_text() override { return true; }
308
309
#ifdef TOOLS_ENABLED
310
virtual const HashMap<int, CommentData> &get_comments() const override {
311
return comments;
312
}
313
#endif // TOOLS_ENABLED
314
315
virtual Token scan() override;
316
317
GDScriptTokenizerText();
318
};
319
320