Path: blob/master/modules/gdscript/gdscript_tokenizer_buffer.cpp
10277 views
/**************************************************************************/1/* gdscript_tokenizer_buffer.cpp */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#include "gdscript_tokenizer_buffer.h"3132#include "core/io/compression.h"33#include "core/io/marshalls.h"3435int GDScriptTokenizerBuffer::_token_to_binary(const Token &p_token, Vector<uint8_t> &r_buffer, int p_start, HashMap<StringName, uint32_t> &r_identifiers_map, HashMap<Variant, uint32_t, VariantHasher, VariantComparator> &r_constants_map) {36int pos = p_start;3738int token_type = p_token.type & TOKEN_MASK;3940switch (p_token.type) {41case GDScriptTokenizer::Token::ANNOTATION:42case GDScriptTokenizer::Token::IDENTIFIER: {43// Add identifier to map.44int identifier_pos;45StringName id = p_token.get_identifier();46if (r_identifiers_map.has(id)) {47identifier_pos = r_identifiers_map[id];48} else {49identifier_pos = r_identifiers_map.size();50r_identifiers_map[id] = identifier_pos;51}52token_type |= identifier_pos << TOKEN_BITS;53} break;54case GDScriptTokenizer::Token::ERROR:55case GDScriptTokenizer::Token::LITERAL: {56// Add literal to map.57int constant_pos;58if (r_constants_map.has(p_token.literal)) {59constant_pos = r_constants_map[p_token.literal];60} else {61constant_pos = r_constants_map.size();62r_constants_map[p_token.literal] = constant_pos;63}64token_type |= constant_pos << TOKEN_BITS;65} break;66default:67break;68}6970// Encode token.71int token_len;72if (token_type & TOKEN_MASK) {73token_len = 8;74r_buffer.resize(pos + token_len);75encode_uint32(token_type | TOKEN_BYTE_MASK, &r_buffer.write[pos]);76pos += 4;77} else {78token_len = 5;79r_buffer.resize(pos + token_len);80r_buffer.write[pos] = token_type;81pos++;82}83encode_uint32(p_token.start_line, &r_buffer.write[pos]);84return token_len;85}8687GDScriptTokenizer::Token GDScriptTokenizerBuffer::_binary_to_token(const uint8_t *p_buffer) {88Token token;89const uint8_t *b = p_buffer;9091uint32_t token_type = decode_uint32(b);92token.type = (Token::Type)(token_type & TOKEN_MASK);93if (token_type & TOKEN_BYTE_MASK) {94b += 4;95} else {96b++;97}98token.start_line = decode_uint32(b);99token.end_line = token.start_line;100101token.literal = token.get_name();102if (token.type == Token::CONST_NAN) {103token.literal = String("NAN"); // Special case since name and notation are different.104}105106switch (token.type) {107case GDScriptTokenizer::Token::ANNOTATION:108case GDScriptTokenizer::Token::IDENTIFIER: {109// Get name from map.110int identifier_pos = token_type >> TOKEN_BITS;111if (unlikely(identifier_pos >= identifiers.size())) {112Token error;113error.type = Token::ERROR;114error.literal = "Identifier index out of bounds.";115return error;116}117token.literal = identifiers[identifier_pos];118} break;119case GDScriptTokenizer::Token::ERROR:120case GDScriptTokenizer::Token::LITERAL: {121// Get literal from map.122int constant_pos = token_type >> TOKEN_BITS;123if (unlikely(constant_pos >= constants.size())) {124Token error;125error.type = Token::ERROR;126error.literal = "Constant index out of bounds.";127return error;128}129token.literal = constants[constant_pos];130} break;131default:132break;133}134135return token;136}137138Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {139const uint8_t *buf = p_buffer.ptr();140ERR_FAIL_COND_V(p_buffer.size() < 12 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);141142int version = decode_uint32(&buf[4]);143ERR_FAIL_COND_V_MSG(version != TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is not compatible with this engine version.");144145int decompressed_size = decode_uint32(&buf[8]);146147Vector<uint8_t> contents;148if (decompressed_size == 0) {149contents = p_buffer.slice(12);150} else {151contents.resize(decompressed_size);152const int64_t result = Compression::decompress(contents.ptrw(), contents.size(), &buf[12], p_buffer.size() - 12, Compression::MODE_ZSTD);153ERR_FAIL_COND_V_MSG(result != decompressed_size, ERR_INVALID_DATA, "Error decompressing GDScript tokenizer buffer.");154}155156int total_len = contents.size();157buf = contents.ptr();158uint32_t identifier_count = decode_uint32(&buf[0]);159uint32_t constant_count = decode_uint32(&buf[4]);160uint32_t token_line_count = decode_uint32(&buf[8]);161uint32_t token_count = decode_uint32(&buf[12]);162163const uint8_t *b = &buf[16];164total_len -= 16;165166identifiers.resize(identifier_count);167for (uint32_t i = 0; i < identifier_count; i++) {168uint32_t len = decode_uint32(b);169total_len -= 4;170ERR_FAIL_COND_V((len * 4u) > (uint32_t)total_len, ERR_INVALID_DATA);171b += 4;172Vector<uint32_t> cs;173cs.resize(len);174for (uint32_t j = 0; j < len; j++) {175uint8_t tmp[4];176for (uint32_t k = 0; k < 4; k++) {177tmp[k] = b[j * 4 + k] ^ 0xb6;178}179cs.write[j] = decode_uint32(tmp);180}181182String s = String::utf32(Span(reinterpret_cast<const char32_t *>(cs.ptr()), len));183b += len * 4;184total_len -= len * 4;185identifiers.write[i] = s;186}187188constants.resize(constant_count);189for (uint32_t i = 0; i < constant_count; i++) {190Variant v;191int len;192Error err = decode_variant(v, b, total_len, &len, false);193if (err) {194return err;195}196b += len;197total_len -= len;198constants.write[i] = v;199}200201for (uint32_t i = 0; i < token_line_count; i++) {202ERR_FAIL_COND_V(total_len < 8, ERR_INVALID_DATA);203uint32_t token_index = decode_uint32(b);204b += 4;205uint32_t line = decode_uint32(b);206b += 4;207total_len -= 8;208token_lines[token_index] = line;209}210for (uint32_t i = 0; i < token_line_count; i++) {211ERR_FAIL_COND_V(total_len < 8, ERR_INVALID_DATA);212uint32_t token_index = decode_uint32(b);213b += 4;214uint32_t column = decode_uint32(b);215b += 4;216total_len -= 8;217token_columns[token_index] = column;218}219220tokens.resize(token_count);221for (uint32_t i = 0; i < token_count; i++) {222int token_len = 5;223if ((*b) & TOKEN_BYTE_MASK) {224token_len = 8;225}226ERR_FAIL_COND_V(total_len < token_len, ERR_INVALID_DATA);227Token token = _binary_to_token(b);228b += token_len;229ERR_FAIL_INDEX_V(token.type, Token::TK_MAX, ERR_INVALID_DATA);230tokens.write[i] = token;231total_len -= token_len;232}233234ERR_FAIL_COND_V(total_len > 0, ERR_INVALID_DATA);235236return OK;237}238239Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code, CompressMode p_compress_mode) {240HashMap<StringName, uint32_t> identifier_map;241HashMap<Variant, uint32_t, VariantHasher, VariantComparator> constant_map;242Vector<uint8_t> token_buffer;243HashMap<uint32_t, uint32_t> token_lines;244HashMap<uint32_t, uint32_t> token_columns;245246GDScriptTokenizerText tokenizer;247tokenizer.set_source_code(p_code);248tokenizer.set_multiline_mode(true); // Ignore whitespace tokens.249Token current = tokenizer.scan();250int token_pos = 0;251int last_token_line = 0;252int token_counter = 0;253254while (current.type != Token::TK_EOF) {255int token_len = _token_to_binary(current, token_buffer, token_pos, identifier_map, constant_map);256token_pos += token_len;257if (token_counter > 0 && current.start_line > last_token_line) {258token_lines[token_counter] = current.start_line;259token_columns[token_counter] = current.start_column;260}261last_token_line = current.end_line;262263current = tokenizer.scan();264token_counter++;265}266267// Reverse maps.268Vector<StringName> rev_identifier_map;269rev_identifier_map.resize(identifier_map.size());270for (const KeyValue<StringName, uint32_t> &E : identifier_map) {271rev_identifier_map.write[E.value] = E.key;272}273Vector<Variant> rev_constant_map;274rev_constant_map.resize(constant_map.size());275for (const KeyValue<Variant, uint32_t> &E : constant_map) {276rev_constant_map.write[E.value] = E.key;277}278HashMap<uint32_t, uint32_t> rev_token_lines;279for (const KeyValue<uint32_t, uint32_t> &E : token_lines) {280rev_token_lines[E.value] = E.key;281}282283// Remove continuation lines from map.284for (int line : tokenizer.get_continuation_lines()) {285if (rev_token_lines.has(line)) {286token_lines.erase(rev_token_lines[line]);287token_columns.erase(rev_token_lines[line]);288}289}290291Vector<uint8_t> contents;292contents.resize(16);293encode_uint32(identifier_map.size(), &contents.write[0]);294encode_uint32(constant_map.size(), &contents.write[4]);295encode_uint32(token_lines.size(), &contents.write[8]);296encode_uint32(token_counter, &contents.write[12]);297298int buf_pos = 16;299300// Save identifiers.301for (const StringName &id : rev_identifier_map) {302String s = id.operator String();303int len = s.length();304305contents.resize(buf_pos + (len + 1) * 4);306307encode_uint32(len, &contents.write[buf_pos]);308buf_pos += 4;309310for (int i = 0; i < len; i++) {311uint8_t tmp[4];312encode_uint32(s[i], tmp);313314for (int b = 0; b < 4; b++) {315contents.write[buf_pos + b] = tmp[b] ^ 0xb6;316}317318buf_pos += 4;319}320}321322// Save constants.323for (const Variant &v : rev_constant_map) {324int len;325// Objects cannot be constant, never encode objects.326Error err = encode_variant(v, nullptr, len, false);327ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");328contents.resize(buf_pos + len);329encode_variant(v, &contents.write[buf_pos], len, false);330buf_pos += len;331}332333// Save lines and columns.334contents.resize(buf_pos + token_lines.size() * 16);335for (const KeyValue<uint32_t, uint32_t> &e : token_lines) {336encode_uint32(e.key, &contents.write[buf_pos]);337buf_pos += 4;338encode_uint32(e.value, &contents.write[buf_pos]);339buf_pos += 4;340}341for (const KeyValue<uint32_t, uint32_t> &e : token_columns) {342encode_uint32(e.key, &contents.write[buf_pos]);343buf_pos += 4;344encode_uint32(e.value, &contents.write[buf_pos]);345buf_pos += 4;346}347348// Store tokens.349contents.append_array(token_buffer);350351Vector<uint8_t> buf;352353// Save header.354buf.resize(12);355buf.write[0] = 'G';356buf.write[1] = 'D';357buf.write[2] = 'S';358buf.write[3] = 'C';359encode_uint32(TOKENIZER_VERSION, &buf.write[4]);360361switch (p_compress_mode) {362case COMPRESS_NONE:363encode_uint32(0u, &buf.write[8]);364buf.append_array(contents);365break;366367case COMPRESS_ZSTD: {368encode_uint32(contents.size(), &buf.write[8]);369Vector<uint8_t> compressed;370const int64_t max_size = Compression::get_max_compressed_buffer_size(contents.size(), Compression::MODE_ZSTD);371compressed.resize(max_size);372373const int64_t compressed_size = Compression::compress(compressed.ptrw(), contents.ptr(), contents.size(), Compression::MODE_ZSTD);374ERR_FAIL_COND_V_MSG(compressed_size < 0, Vector<uint8_t>(), "Error compressing GDScript tokenizer buffer.");375compressed.resize(compressed_size);376377buf.append_array(compressed);378} break;379}380381return buf;382}383384int GDScriptTokenizerBuffer::get_cursor_line() const {385return 0;386}387388int GDScriptTokenizerBuffer::get_cursor_column() const {389return 0;390}391392void GDScriptTokenizerBuffer::set_cursor_position(int p_line, int p_column) {393}394395void GDScriptTokenizerBuffer::set_multiline_mode(bool p_state) {396multiline_mode = p_state;397}398399bool GDScriptTokenizerBuffer::is_past_cursor() const {400return false;401}402403void GDScriptTokenizerBuffer::push_expression_indented_block() {404indent_stack_stack.push_back(indent_stack);405}406407void GDScriptTokenizerBuffer::pop_expression_indented_block() {408ERR_FAIL_COND(indent_stack_stack.is_empty());409indent_stack = indent_stack_stack.back()->get();410indent_stack_stack.pop_back();411}412413GDScriptTokenizer::Token GDScriptTokenizerBuffer::scan() {414// Add final newline.415if (current >= tokens.size() && !last_token_was_newline) {416Token newline;417newline.type = Token::NEWLINE;418newline.start_line = current_line;419newline.end_line = current_line;420last_token_was_newline = true;421return newline;422}423424// Resolve pending indentation change.425if (pending_indents > 0) {426pending_indents--;427Token indent;428indent.type = Token::INDENT;429indent.start_line = current_line;430indent.end_line = current_line;431return indent;432} else if (pending_indents < 0) {433pending_indents++;434Token dedent;435dedent.type = Token::DEDENT;436dedent.start_line = current_line;437dedent.end_line = current_line;438return dedent;439}440441if (current >= tokens.size()) {442if (!indent_stack.is_empty()) {443pending_indents -= indent_stack.size();444indent_stack.clear();445return scan();446}447Token eof;448eof.type = Token::TK_EOF;449return eof;450};451452if (!last_token_was_newline && token_lines.has(current)) {453current_line = token_lines[current];454uint32_t current_column = token_columns[current];455456// Check if there's a need to indent/dedent.457if (!multiline_mode) {458uint32_t previous_indent = 0;459if (!indent_stack.is_empty()) {460previous_indent = indent_stack.back()->get();461}462if (current_column - 1 > previous_indent) {463pending_indents++;464indent_stack.push_back(current_column - 1);465} else {466while (current_column - 1 < previous_indent) {467pending_indents--;468indent_stack.pop_back();469if (indent_stack.is_empty()) {470break;471}472previous_indent = indent_stack.back()->get();473}474}475476Token newline;477newline.type = Token::NEWLINE;478newline.start_line = current_line;479newline.end_line = current_line;480last_token_was_newline = true;481482return newline;483}484}485486last_token_was_newline = false;487488Token token = tokens[current++];489return token;490}491492493