Path: blob/master/thirdparty/pcre2/src/pcre2_match_next.c
14710 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8Original API code Copyright (c) 1997-2012 University of Cambridge9New API code Copyright (c) 2016-2024 University of Cambridge1011-----------------------------------------------------------------------------12Redistribution and use in source and binary forms, with or without13modification, are permitted provided that the following conditions are met:1415* Redistributions of source code must retain the above copyright notice,16this list of conditions and the following disclaimer.1718* Redistributions in binary form must reproduce the above copyright19notice, this list of conditions and the following disclaimer in the20documentation and/or other materials provided with the distribution.2122* Neither the name of the University of Cambridge nor the names of its23contributors may be used to endorse or promote products derived from24this software without specific prior written permission.2526THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE36POSSIBILITY OF SUCH DAMAGE.37-----------------------------------------------------------------------------38*/394041#include "pcre2_internal.h"42434445/* Advance the offset by one code unit, and return the new value.46It is only called when the offset is not at the end of the subject. */4748static PCRE2_SIZE do_bumpalong(pcre2_match_data *match_data,49PCRE2_SIZE offset)50{51PCRE2_SPTR subject = match_data->subject;52PCRE2_SIZE subject_length = match_data->subject_length;53#ifdef SUPPORT_UNICODE54BOOL utf = (match_data->code->overall_options & PCRE2_UTF) != 0;55#endif5657/* Skip over CRLF as an atomic sequence, if CRLF is configured as a newline58sequence. */5960if (subject[offset] == CHAR_CR && offset + 1 < subject_length &&61subject[offset + 1] == CHAR_LF)62{63switch(match_data->code->newline_convention)64{65case PCRE2_NEWLINE_CRLF:66case PCRE2_NEWLINE_ANY:67case PCRE2_NEWLINE_ANYCRLF:68return offset + 2;69}70}7172/* Advance by one full character if in UTF mode. */7374#ifdef SUPPORT_UNICODE75if (utf)76{77PCRE2_SPTR next = subject + offset + 1;78PCRE2_SPTR subject_end = subject + subject_length;7980(void)subject_end; /* Suppress warning; 32-bit FORWARDCHARTEST ignores this */81FORWARDCHARTEST(next, subject_end);82return next - subject;83}84#endif8586return offset + 1;87}88899091/*************************************************92* Advance the match *93*************************************************/9495PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION96pcre2_next_match(pcre2_match_data *match_data, PCRE2_SIZE *pstart_offset,97uint32_t *poptions)98{99int rc = match_data->rc;100PCRE2_SIZE start_offset = match_data->start_offset;101PCRE2_SIZE *ovector = match_data->ovector;102103/* Match error, or no match: no further iteration possible. In previous versions104of PCRE2, we recommended that clients use a strategy which involved retrying in105certain cases after PCRE2_ERROR_NOMATCH, but this is no longer required. */106107if (rc < 0)108return FALSE;109110/* Match succeeded: get the start offset for the next match */111112/* Although \K can affect the position of ovector[0], there are no ways to do113anything surprising with ovector[1], which must always be >= start_offset. */114115PCRE2_ASSERT(ovector[1] >= start_offset);116117/* Special handling for patterns which contain \K in a lookaround, which enables118the match start to be pushed back to before the starting search offset119(ovector[0] < start_offset) or after the match ends (ovector[0] > ovector[1]).120This is not a problem if ovector[1] > start_offset, because in this case, we can121just attempt the next match at ovector[1]: we are making progress, which is all122that we require.123124However, if we have ovector[1] == start_offset, then we have a very rare case125which must be handled specially, because it's a non-empty match which126nonetheless fails to make progress through the subject. */127128if (ovector[0] != start_offset && ovector[1] == start_offset)129{130/* If the match end is at the end of the subject, we are done. */131132if (start_offset >= match_data->subject_length)133return FALSE;134135/* Otherwise, bump along by one code unit, and do a normal search. */136137*pstart_offset = do_bumpalong(match_data, ovector[1]);138*poptions = 0;139return TRUE;140}141142/* If the previous match was for an empty string, we are finished if we are at143the end of the subject. Otherwise, arrange to run another match at the same144point to see if a non-empty match can be found. */145146if (ovector[0] == ovector[1])147{148/* If the match is at the end of the subject, we are done. */149150if (ovector[0] >= match_data->subject_length)151return FALSE;152153/* Otherwise, continue at this exact same point, but we must set the flag154which ensures that we don't return the exact same empty match again. */155156*pstart_offset = ovector[1];157*poptions = PCRE2_NOTEMPTY_ATSTART;158return TRUE;159}160161/* Finally, we must be in the happy state of a non-empty match, where the end of162the match is further on in the subject than start_offset, so we are easily able163to continue and make progress. */164165*pstart_offset = ovector[1];166*poptions = 0;167return TRUE;168}169170/* End of pcre2_match_next.c */171172173