Path: blob/master/modules/gdscript/gdscript_tokenizer.h
10277 views
/**************************************************************************/1/* gdscript_tokenizer.h */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#pragma once3132#include "core/templates/hash_map.h"33#include "core/templates/list.h"34#include "core/templates/vector.h"35#include "core/variant/variant.h"3637class GDScriptTokenizer {38public:39enum CursorPlace {40CURSOR_NONE,41CURSOR_BEGINNING,42CURSOR_MIDDLE,43CURSOR_END,44};4546struct Token {47// If this enum changes, please increment the TOKENIZER_VERSION in gdscript_tokenizer_buffer.h48enum Type {49EMPTY,50// Basic51ANNOTATION,52IDENTIFIER,53LITERAL,54// Comparison55LESS,56LESS_EQUAL,57GREATER,58GREATER_EQUAL,59EQUAL_EQUAL,60BANG_EQUAL,61// Logical62AND,63OR,64NOT,65AMPERSAND_AMPERSAND,66PIPE_PIPE,67BANG,68// Bitwise69AMPERSAND,70PIPE,71TILDE,72CARET,73LESS_LESS,74GREATER_GREATER,75// Math76PLUS,77MINUS,78STAR,79STAR_STAR,80SLASH,81PERCENT,82// Assignment83EQUAL,84PLUS_EQUAL,85MINUS_EQUAL,86STAR_EQUAL,87STAR_STAR_EQUAL,88SLASH_EQUAL,89PERCENT_EQUAL,90LESS_LESS_EQUAL,91GREATER_GREATER_EQUAL,92AMPERSAND_EQUAL,93PIPE_EQUAL,94CARET_EQUAL,95// Control flow96IF,97ELIF,98ELSE,99FOR,100WHILE,101BREAK,102CONTINUE,103PASS,104RETURN,105MATCH,106WHEN,107// Keywords108AS,109ASSERT,110AWAIT,111BREAKPOINT,112CLASS,113CLASS_NAME,114TK_CONST, // Conflict with WinAPI.115ENUM,116EXTENDS,117FUNC,118TK_IN, // Conflict with WinAPI.119IS,120NAMESPACE,121PRELOAD,122SELF,123SIGNAL,124STATIC,125SUPER,126TRAIT,127VAR,128TK_VOID, // Conflict with WinAPI.129YIELD,130// Punctuation131BRACKET_OPEN,132BRACKET_CLOSE,133BRACE_OPEN,134BRACE_CLOSE,135PARENTHESIS_OPEN,136PARENTHESIS_CLOSE,137COMMA,138SEMICOLON,139PERIOD,140PERIOD_PERIOD,141PERIOD_PERIOD_PERIOD,142COLON,143DOLLAR,144FORWARD_ARROW,145UNDERSCORE,146// Whitespace147NEWLINE,148INDENT,149DEDENT,150// Constants151CONST_PI,152CONST_TAU,153CONST_INF,154CONST_NAN,155// Error message improvement156VCS_CONFLICT_MARKER,157BACKTICK,158QUESTION_MARK,159// Special160ERROR,161TK_EOF, // "EOF" is reserved162TK_MAX163};164165Type type = EMPTY;166Variant literal;167int start_line = 0, end_line = 0, start_column = 0, end_column = 0;168int cursor_position = -1;169CursorPlace cursor_place = CURSOR_NONE;170String source;171172const char *get_name() const;173String get_debug_name() const;174bool can_precede_bin_op() const;175bool is_identifier() const;176bool is_node_name() const;177StringName get_identifier() const { return literal; }178179Token(Type p_type) {180type = p_type;181}182183Token() {}184};185186#ifdef TOOLS_ENABLED187struct CommentData {188String comment;189// true: Comment starts at beginning of line or after indentation.190// false: Inline comment (starts after some code).191bool new_line = false;192CommentData() {}193CommentData(const String &p_comment, bool p_new_line) {194comment = p_comment;195new_line = p_new_line;196}197};198virtual const HashMap<int, CommentData> &get_comments() const = 0;199#endif // TOOLS_ENABLED200201static String get_token_name(Token::Type p_token_type);202203#ifdef TOOLS_ENABLED204// This is a temporary solution, as Tokens are not able to store their position, only lines and columns.205virtual int get_current_position() const { return 0; }206virtual String get_source_code() const { return ""; }207#endif // TOOLS_ENABLED208209virtual int get_cursor_line() const = 0;210virtual int get_cursor_column() const = 0;211virtual void set_cursor_position(int p_line, int p_column) = 0;212virtual void set_multiline_mode(bool p_state) = 0;213virtual bool is_past_cursor() const = 0;214virtual void push_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.215virtual void pop_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.216virtual bool is_text() = 0;217218virtual Token scan() = 0;219220virtual ~GDScriptTokenizer() {}221};222223class GDScriptTokenizerText : public GDScriptTokenizer {224String source;225const char32_t *_source = nullptr;226const char32_t *_current = nullptr;227int line = -1, column = -1;228int cursor_line = -1, cursor_column = -1;229int tab_size = 4;230231// Keep track of multichar tokens.232const char32_t *_start = nullptr;233int start_line = 0, start_column = 0;234235// Info cache.236bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.237bool multiline_mode = false;238List<Token> error_stack;239bool pending_newline = false;240Token last_token;241Token last_newline;242int pending_indents = 0;243List<int> indent_stack;244List<List<int>> indent_stack_stack; // For lambdas, which require manipulating the indentation point.245List<char32_t> paren_stack;246char32_t indent_char = '\0';247int position = 0;248int length = 0;249Vector<int> continuation_lines;250#ifdef DEBUG_ENABLED251Vector<String> keyword_list;252#endif // DEBUG_ENABLED253254#ifdef TOOLS_ENABLED255HashMap<int, CommentData> comments;256#endif // TOOLS_ENABLED257258_FORCE_INLINE_ bool _is_at_end() { return position >= length; }259_FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }260int indent_level() const { return indent_stack.size(); }261bool has_error() const { return !error_stack.is_empty(); }262Token pop_error();263char32_t _advance();264String _get_indent_char_name(char32_t ch);265void _skip_whitespace();266void check_indent();267268#ifdef DEBUG_ENABLED269void make_keyword_list();270#endif // DEBUG_ENABLED271272Token make_error(const String &p_message);273void push_error(const String &p_message);274void push_error(const Token &p_error);275Token make_paren_error(char32_t p_paren);276Token make_token(Token::Type p_type);277Token make_literal(const Variant &p_literal);278Token make_identifier(const StringName &p_identifier);279Token check_vcs_marker(char32_t p_test, Token::Type p_double_type);280void push_paren(char32_t p_char);281bool pop_paren(char32_t p_expected);282283void newline(bool p_make_token);284Token number();285Token potential_identifier();286Token string();287Token annotation();288289public:290void set_source_code(const String &p_source_code);291292const Vector<int> &get_continuation_lines() const { return continuation_lines; }293294#ifdef TOOLS_ENABLED295virtual int get_current_position() const override { return position; }296virtual String get_source_code() const override { return source; }297#endif // TOOLS_ENABLED298299virtual int get_cursor_line() const override;300virtual int get_cursor_column() const override;301virtual void set_cursor_position(int p_line, int p_column) override;302virtual void set_multiline_mode(bool p_state) override;303virtual bool is_past_cursor() const override;304virtual void push_expression_indented_block() override; // For lambdas, or blocks inside expressions.305virtual void pop_expression_indented_block() override; // For lambdas, or blocks inside expressions.306virtual bool is_text() override { return true; }307308#ifdef TOOLS_ENABLED309virtual const HashMap<int, CommentData> &get_comments() const override {310return comments;311}312#endif // TOOLS_ENABLED313314virtual Token scan() override;315316GDScriptTokenizerText();317};318319320