Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_match_next.c
14710 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
Original API code Copyright (c) 1997-2012 University of Cambridge
10
New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
* Redistributions of source code must retain the above copyright notice,
17
this list of conditions and the following disclaimer.
18
19
* Redistributions in binary form must reproduce the above copyright
20
notice, this list of conditions and the following disclaimer in the
21
documentation and/or other materials provided with the distribution.
22
23
* Neither the name of the University of Cambridge nor the names of its
24
contributors may be used to endorse or promote products derived from
25
this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#include "pcre2_internal.h"
43
44
45
46
/* Advance the offset by one code unit, and return the new value.
47
It is only called when the offset is not at the end of the subject. */
48
49
static PCRE2_SIZE do_bumpalong(pcre2_match_data *match_data,
50
PCRE2_SIZE offset)
51
{
52
PCRE2_SPTR subject = match_data->subject;
53
PCRE2_SIZE subject_length = match_data->subject_length;
54
#ifdef SUPPORT_UNICODE
55
BOOL utf = (match_data->code->overall_options & PCRE2_UTF) != 0;
56
#endif
57
58
/* Skip over CRLF as an atomic sequence, if CRLF is configured as a newline
59
sequence. */
60
61
if (subject[offset] == CHAR_CR && offset + 1 < subject_length &&
62
subject[offset + 1] == CHAR_LF)
63
{
64
switch(match_data->code->newline_convention)
65
{
66
case PCRE2_NEWLINE_CRLF:
67
case PCRE2_NEWLINE_ANY:
68
case PCRE2_NEWLINE_ANYCRLF:
69
return offset + 2;
70
}
71
}
72
73
/* Advance by one full character if in UTF mode. */
74
75
#ifdef SUPPORT_UNICODE
76
if (utf)
77
{
78
PCRE2_SPTR next = subject + offset + 1;
79
PCRE2_SPTR subject_end = subject + subject_length;
80
81
(void)subject_end; /* Suppress warning; 32-bit FORWARDCHARTEST ignores this */
82
FORWARDCHARTEST(next, subject_end);
83
return next - subject;
84
}
85
#endif
86
87
return offset + 1;
88
}
89
90
91
92
/*************************************************
93
* Advance the match *
94
*************************************************/
95
96
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
97
pcre2_next_match(pcre2_match_data *match_data, PCRE2_SIZE *pstart_offset,
98
uint32_t *poptions)
99
{
100
int rc = match_data->rc;
101
PCRE2_SIZE start_offset = match_data->start_offset;
102
PCRE2_SIZE *ovector = match_data->ovector;
103
104
/* Match error, or no match: no further iteration possible. In previous versions
105
of PCRE2, we recommended that clients use a strategy which involved retrying in
106
certain cases after PCRE2_ERROR_NOMATCH, but this is no longer required. */
107
108
if (rc < 0)
109
return FALSE;
110
111
/* Match succeeded: get the start offset for the next match */
112
113
/* Although \K can affect the position of ovector[0], there are no ways to do
114
anything surprising with ovector[1], which must always be >= start_offset. */
115
116
PCRE2_ASSERT(ovector[1] >= start_offset);
117
118
/* Special handling for patterns which contain \K in a lookaround, which enables
119
the match start to be pushed back to before the starting search offset
120
(ovector[0] < start_offset) or after the match ends (ovector[0] > ovector[1]).
121
This is not a problem if ovector[1] > start_offset, because in this case, we can
122
just attempt the next match at ovector[1]: we are making progress, which is all
123
that we require.
124
125
However, if we have ovector[1] == start_offset, then we have a very rare case
126
which must be handled specially, because it's a non-empty match which
127
nonetheless fails to make progress through the subject. */
128
129
if (ovector[0] != start_offset && ovector[1] == start_offset)
130
{
131
/* If the match end is at the end of the subject, we are done. */
132
133
if (start_offset >= match_data->subject_length)
134
return FALSE;
135
136
/* Otherwise, bump along by one code unit, and do a normal search. */
137
138
*pstart_offset = do_bumpalong(match_data, ovector[1]);
139
*poptions = 0;
140
return TRUE;
141
}
142
143
/* If the previous match was for an empty string, we are finished if we are at
144
the end of the subject. Otherwise, arrange to run another match at the same
145
point to see if a non-empty match can be found. */
146
147
if (ovector[0] == ovector[1])
148
{
149
/* If the match is at the end of the subject, we are done. */
150
151
if (ovector[0] >= match_data->subject_length)
152
return FALSE;
153
154
/* Otherwise, continue at this exact same point, but we must set the flag
155
which ensures that we don't return the exact same empty match again. */
156
157
*pstart_offset = ovector[1];
158
*poptions = PCRE2_NOTEMPTY_ATSTART;
159
return TRUE;
160
}
161
162
/* Finally, we must be in the happy state of a non-empty match, where the end of
163
the match is further on in the subject than start_offset, so we are easily able
164
to continue and make progress. */
165
166
*pstart_offset = ovector[1];
167
*poptions = 0;
168
return TRUE;
169
}
170
171
/* End of pcre2_match_next.c */
172
173