Path: blob/master/thirdparty/pcre2/src/pcre2_compile_cgroup.c
14710 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016-2024 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041#include "pcre2_compile.h"4243/*************************************************44* Compute the hash code from a capture name *45*************************************************/4647/* This function returns with a simple hash code48computed from the name of a capture group.4950Arguments:51name name of the capture group52length the length of the name5354Returns: hash code55*/5657uint16_t58PRIV(compile_get_hash_from_name)(PCRE2_SPTR name, uint32_t length)59{60uint16_t hash;6162PCRE2_ASSERT(length > 0);6364hash = (uint16_t)((name[0] & 0x7f) | ((name[length - 1] & 0xff) << 7));65PCRE2_ASSERT(hash <= NAMED_GROUP_HASH_MASK);66return hash;67}686970/*************************************************71* Get the descriptor of a known named capture *72*************************************************/7374/* This function returns the descriptor in the75named group list of a known capture group.7677Arguments:78name name of the capture group79length the length of the name8081Returns: pointer to the descriptor when found,82NULL otherwise83*/8485named_group *86PRIV(compile_find_named_group)(PCRE2_SPTR name,87uint32_t length, compile_block *cb)88{89uint16_t hash = PRIV(compile_get_hash_from_name)(name, length);90named_group *ng;91named_group *end = cb->named_groups + cb->names_found;9293for (ng = cb->named_groups; ng < end; ng++)94if (length == ng->length && hash == NAMED_GROUP_GET_HASH(ng) &&95PRIV(strncmp)(name, ng->name, length) == 0) return ng;9697return NULL;98}99100101/*************************************************102* Add an entry to the name/number table *103*************************************************/104105/* This function is called between compiling passes to add an entry to the106name/number table, maintaining alphabetical order. Checking for permitted107and forbidden duplicates has already been done.108109Arguments:110cb the compile data block111nb named group entry112tablecount the count of names in the table so far113114Returns: new tablecount115*/116117uint32_t118PRIV(compile_add_name_to_table)(compile_block *cb,119named_group *ng, uint32_t tablecount)120{121uint32_t i;122PCRE2_SPTR name = ng->name;123int length = ng->length;124uint32_t duplicate_count = 1;125126PCRE2_UCHAR *slot = cb->name_table;127128PCRE2_ASSERT(length > 0);129130if ((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) != 0)131{132named_group *ng_it;133named_group *end = cb->named_groups + cb->names_found;134135for (ng_it = ng + 1; ng_it < end; ng_it++)136if (ng_it->name == name) duplicate_count++;137}138139for (i = 0; i < tablecount; i++)140{141int crc = memcmp(name, slot + IMM2_SIZE, CU2BYTES(length));142if (crc == 0 && slot[IMM2_SIZE + length] != 0)143crc = -1; /* Current name is a substring */144145/* Make space in the table and break the loop for an earlier name. For a146duplicate or later name, carry on. We do this for duplicates so that in the147simple case (when ?(| is not used) they are in order of their numbers. In all148cases they are in the order in which they appear in the pattern. */149150if (crc < 0)151{152(void)memmove(slot + cb->name_entry_size * duplicate_count, slot,153CU2BYTES((tablecount - i) * cb->name_entry_size));154break;155}156157/* Continue the loop for a later or duplicate name */158159slot += cb->name_entry_size;160}161162tablecount += duplicate_count;163164while (TRUE)165{166PUT2(slot, 0, ng->number);167memcpy(slot + IMM2_SIZE, name, CU2BYTES(length));168169/* Add a terminating zero and fill the rest of the slot with zeroes so that170the memory is all initialized. Otherwise valgrind moans about uninitialized171memory when saving serialized compiled patterns. */172173memset(slot + IMM2_SIZE + length, 0,174CU2BYTES(cb->name_entry_size - length - IMM2_SIZE));175176if (--duplicate_count == 0) break;177178while (TRUE)179{180++ng;181if (ng->name == name) break;182}183184slot += cb->name_entry_size;185}186187return tablecount;188}189190191/*************************************************192* Find details of duplicate group names *193*************************************************/194195/* This is called from compile_branch() when it needs to know the index and196count of duplicates in the names table when processing named backreferences,197either directly, or as conditions.198199Arguments:200name points to the name201length the length of the name202indexptr where to put the index203countptr where to put the count of duplicates204errorcodeptr where to put an error code205cb the compile block206207Returns: TRUE if OK, FALSE if not, error code set208*/209210BOOL211PRIV(compile_find_dupname_details)(PCRE2_SPTR name, uint32_t length,212int *indexptr, int *countptr, int *errorcodeptr, compile_block *cb)213{214uint32_t i, groupnumber;215int count;216PCRE2_UCHAR *slot = cb->name_table;217218/* Find the first entry in the table */219220for (i = 0; i < cb->names_found; i++)221{222if (PRIV(strncmp)(name, slot + IMM2_SIZE, length) == 0 &&223slot[IMM2_SIZE + length] == 0) break;224slot += cb->name_entry_size;225}226227/* This should not occur, because this function is called only when we know we228have duplicate names. Give an internal error. */229230/* LCOV_EXCL_START */231if (i >= cb->names_found)232{233PCRE2_DEBUG_UNREACHABLE();234*errorcodeptr = ERR53;235cb->erroroffset = name - cb->start_pattern;236return FALSE;237}238/* LCOV_EXCL_STOP */239240/* Record the index and then see how many duplicates there are, updating the241backref map and maximum back reference as we do. */242243*indexptr = i;244count = 0;245246for (;;)247{248count++;249groupnumber = GET2(slot, 0);250cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1;251if (groupnumber > cb->top_backref) cb->top_backref = groupnumber;252if (++i >= cb->names_found) break;253slot += cb->name_entry_size;254if (PRIV(strncmp)(name, slot + IMM2_SIZE, length) != 0 ||255(slot + IMM2_SIZE)[length] != 0) break;256}257258*countptr = count;259return TRUE;260}261262263/* Process the capture list of scan substring and recurse264operations. Since at least one argument must be present,265a 0 return value represents error. */266267static size_t268PRIV(compile_process_capture_list)(uint32_t *pptr, PCRE2_SIZE offset,269int *errorcodeptr, compile_block *cb)270{271size_t i, size = 0;272named_group *ng;273PCRE2_SPTR name;274uint32_t length;275named_group *end = cb->named_groups + cb->names_found;276277while (TRUE)278{279++pptr;280281switch (META_CODE(*pptr))282{283case META_OFFSET:284GETPLUSOFFSET(offset, pptr);285continue;286287case META_CAPTURE_NAME:288offset += META_DATA(*pptr);289length = *(++pptr);290name = cb->start_pattern + offset;291292ng = PRIV(compile_find_named_group)(name, length, cb);293294if (ng == NULL)295{296*errorcodeptr = ERR15;297cb->erroroffset = offset;298return 0;299}300301if ((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) == 0)302{303pptr[-1] = META_CAPTURE_NUMBER;304pptr[0] = ng->number;305size++;306continue;307}308309/* Remains only for duplicated names. */310pptr[-1] = META_CAPTURE_NAME;311pptr[0] = (uint32_t)(ng - cb->named_groups);312size++;313name = ng->name;314315while (++ng < end)316if (ng->name == name) size++;317continue;318319case META_CAPTURE_NUMBER:320offset += META_DATA(*pptr);321322i = *(++pptr);323if (i > cb->bracount)324{325*errorcodeptr = ERR15;326cb->erroroffset = offset;327return 0;328}329if (i > cb->top_backref) cb->top_backref = (uint16_t)i;330size++;331continue;332333default:334break;335}336337PCRE2_ASSERT(size > 0);338return size;339}340}341342343/*******************************************************344* Parse the arguments of scan substring operations *345********************************************************/346347/* This function parses the arguments of scan substring operations.348349Arguments:350pptr_start points to the current parsed pattern pointer351offset argument starting offset in the pattern352errorcodeptr where to put an error code353cb the compile block354lengthptr NULL during the real compile phase355points to length accumulator during pre-compile phase356357Returns: TRUE if OK, FALSE if not, error code set358*/359360uint32_t *361PRIV(compile_parse_scan_substr_args)(uint32_t *pptr,362int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr)363{364uint8_t *captures;365uint8_t *capture_ptr;366uint8_t bit;367PCRE2_SPTR name;368named_group *ng;369named_group *end = cb->named_groups + cb->names_found;370BOOL all_found;371size_t size;372373PCRE2_ASSERT(*pptr == META_OFFSET);374if (PRIV(compile_process_capture_list)(pptr - 1, 0, errorcodeptr, cb) == 0)375return NULL;376377/* Align to bytes. Since the highest capture can378be equal to bracount, +1 is added before the aligning. */379size = (cb->bracount + 1 + 7) >> 3;380captures = (uint8_t*)cb->cx->memctl.malloc(size, cb->cx->memctl.memory_data);381if (captures == NULL)382{383*errorcodeptr = ERR21;384READPLUSOFFSET(cb->erroroffset, pptr);385return NULL;386}387388memset(captures, 0, size);389390while (TRUE)391{392switch (META_CODE(*pptr))393{394case META_OFFSET:395pptr++;396SKIPOFFSET(pptr);397continue;398399case META_CAPTURE_NAME:400ng = cb->named_groups + pptr[1];401PCRE2_ASSERT((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) != 0);402pptr += 2;403name = ng->name;404405all_found = TRUE;406do407{408if (ng->name != name) continue;409410capture_ptr = captures + (ng->number >> 3);411PCRE2_ASSERT(capture_ptr < captures + size);412bit = (uint8_t)(1 << (ng->number & 0x7));413414if ((*capture_ptr & bit) == 0)415{416*capture_ptr |= bit;417all_found = FALSE;418}419}420while (++ng < end);421422if (!all_found)423{424*lengthptr += 1 + 2 * IMM2_SIZE;425continue;426}427428pptr[-2] = META_CAPTURE_NUMBER;429pptr[-1] = 0;430continue;431432case META_CAPTURE_NUMBER:433pptr += 2;434435capture_ptr = captures + (pptr[-1] >> 3);436PCRE2_ASSERT(capture_ptr < captures + size);437bit = (uint8_t)(1 << (pptr[-1] & 0x7));438439if ((*capture_ptr & bit) != 0)440{441pptr[-1] = 0;442continue;443}444445*capture_ptr |= bit;446*lengthptr += 1 + IMM2_SIZE;447continue;448449default:450break;451}452453break;454}455456cb->cx->memctl.free(captures, cb->cx->memctl.memory_data);457return pptr - 1;458}459460461/* Implement heapsort heapify algorithm. */462463static void do_heapify_u16(uint16_t *captures, size_t size, size_t i)464{465size_t max;466size_t left;467size_t right;468uint16_t tmp;469470while (TRUE)471{472max = i;473left = (i << 1) + 1;474right = left + 1;475476if (left < size && captures[left] > captures[max]) max = left;477if (right < size && captures[right] > captures[max]) max = right;478if (i == max) return;479480tmp = captures[i];481captures[i] = captures[max];482captures[max] = tmp;483i = max;484}485}486487488/*************************************************489* Parse the arguments of recurse operations *490*************************************************/491492/* This function parses the arguments of recurse operations.493494Arguments:495pptr_start the current parsed pattern pointer496offset argument starting offset in the pattern497errorcodeptr where to put an error code498cb the compile block499lengthptr NULL during the real compile phase500points to length accumulator during pre-compile phase501502Returns: TRUE if OK, FALSE if not, error code set503*/504505BOOL506PRIV(compile_parse_recurse_args)(uint32_t *pptr_start,507PCRE2_SIZE offset, int *errorcodeptr, compile_block *cb)508{509uint32_t *pptr = pptr_start;510size_t i, size;511PCRE2_SPTR name;512named_group *ng;513named_group *end = cb->named_groups + cb->names_found;514recurse_arguments *args;515uint16_t *captures;516uint16_t *current;517uint16_t *captures_end;518uint16_t tmp;519520/* Process all arguments, compute the required size. */521522size = PRIV(compile_process_capture_list)(pptr, offset, errorcodeptr, cb);523if (size == 0) return FALSE;524525args = cb->cx->memctl.malloc(526sizeof(recurse_arguments) + size * sizeof(uint16_t), cb->cx->memctl.memory_data);527528if (args == NULL)529{530*errorcodeptr = ERR21;531cb->erroroffset = offset;532return FALSE;533}534535args->header.next = NULL;536#ifdef PCRE2_DEBUG537args->header.type = CDATA_RECURSE_ARGS;538#endif539args->size = size;540541/* Caching the pre-processed capture list. */542if (cb->last_data != NULL)543cb->last_data->next = &args->header;544else545cb->first_data = &args->header;546547cb->last_data = &args->header;548549/* Create the capture list size. */550551captures = (uint16_t*)(args + 1);552553while (TRUE)554{555++pptr;556557switch (META_CODE(*pptr))558{559case META_OFFSET:560SKIPOFFSET(pptr);561continue;562563case META_CAPTURE_NAME:564ng = cb->named_groups + *(++pptr);565PCRE2_ASSERT((ng->hash_dup & NAMED_GROUP_IS_DUPNAME) != 0);566*captures++ = (uint16_t)(ng->number);567568name = ng->name;569570while (++ng < end)571if (ng->name == name) *captures++ = (uint16_t)(ng->number);572continue;573574case META_CAPTURE_NUMBER:575*captures++ = *(++pptr);576continue;577578default:579break;580}581582break;583}584585PCRE2_ASSERT(size == (size_t)(captures - (uint16_t*)(args + 1)));586args->skip_size = (size_t)(pptr - pptr_start) - 1;587588if (size == 1) return TRUE;589590/* Sort captures. */591592captures = (uint16_t*)(args + 1);593i = (size >> 1) - 1;594while (TRUE)595{596do_heapify_u16(captures, size, i);597if (i == 0) break;598i--;599}600601for (i = size - 1; i > 0; i--)602{603tmp = captures[0];604captures[0] = captures[i];605captures[i] = tmp;606607do_heapify_u16(captures, i, 0);608}609610/* Remove duplicates. */611612captures_end = captures + size;613tmp = *captures++;614current = captures;615616while (current < captures_end)617{618if (*current != tmp)619{620tmp = *current;621*captures++ = tmp;622}623624current++;625}626627args->size = (size_t)(captures - (uint16_t*)(args + 1));628return TRUE;629}630631/* End of pcre2_compile_cgroup.c */632633634