CoCalc -- BytecodeName.java

GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/sun/invoke/util/BytecodeName.java
⁴¹¹⁵⁹ views
1
/*
2
 * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
package sun.invoke.util;
27

28
/**
29
 * Utility routines for dealing with bytecode-level names.
30
 * Includes universal mangling rules for the JVM.
31
 *
32
 * <h3>Avoiding Dangerous Characters </h3>
33
 *
34
 * <p>
35
 * The JVM defines a very small set of characters which are illegal
36
 * in name spellings.  We will slightly extend and regularize this set
37
 * into a group of <cite>dangerous characters</cite>.
38
 * These characters will then be replaced, in mangled names, by escape sequences.
39
 * In addition, accidental escape sequences must be further escaped.
40
 * Finally, a special prefix will be applied if and only if
41
 * the mangling would otherwise fail to begin with the escape character.
42
 * This happens to cover the corner case of the null string,
43
 * and also clearly marks symbols which need demangling.
44
 * </p>
45
 * <p>
46
 * Dangerous characters are the union of all characters forbidden
47
 * or otherwise restricted by the JVM specification,
48
 * plus their mates, if they are brackets
49
 * (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
50
 * <code><big><b>&lt;</b></big></code> and <code><big><b>&gt;</b></big></code>),
51
 * plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
52
 * There is no distinction between type, method, and field names.
53
 * This makes it easier to convert between mangled names of different
54
 * types, since they do not need to be decoded (demangled).
55
 * </p>
56
 * <p>
57
 * The escape character is backslash <code><big><b>\</b></big></code>
58
 * (also known as reverse solidus).
59
 * This character is, until now, unheard of in bytecode names,
60
 * but traditional in the proposed role.
61
 *
62
 * </p>
63
 * <h3> Replacement Characters </h3>
64
 *
65
 *
66
 * <p>
67
 * Every escape sequence is two characters
68
 * (in fact, two UTF8 bytes) beginning with
69
 * the escape character and followed by a
70
 * <cite>replacement character</cite>.
71
 * (Since the replacement character is never a backslash,
72
 * iterated manglings do not double in size.)
73
 * </p>
74
 * <p>
75
 * Each dangerous character has some rough visual similarity
76
 * to its corresponding replacement character.
77
 * This makes mangled symbols easier to recognize by sight.
78
 * </p>
79
 * <p>
80
 * The dangerous characters are
81
 * <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
82
 * <code><big><b>.</b></big></code> (dot, also a package delimiter),
83
 * <code><big><b>;</b></big></code> (semicolon, used in signatures),
84
 * <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
85
 * <code><big><b>&lt;</b></big></code> (left angle),
86
 * <code><big><b>&gt;</b></big></code> (right angle),
87
 * <code><big><b>[</b></big></code> (left square bracket, used in array types),
88
 * <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
89
 * and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
90
 * Their replacements are, respectively,
91
 * <code><big><b>|</b></big></code> (vertical bar),
92
 * <code><big><b>,</b></big></code> (comma),
93
 * <code><big><b>?</b></big></code> (question mark),
94
 * <code><big><b>%</b></big></code> (percent),
95
 * <code><big><b>^</b></big></code> (caret),
96
 * <code><big><b>_</b></big></code> (underscore), and
97
 * <code><big><b>{</b></big></code> (left curly bracket),
98
 * <code><big><b>}</b></big></code> (right curly bracket),
99
 * <code><big><b>!</b></big></code> (exclamation mark).
100
 * In addition, the replacement character for the escape character itself is
101
 * <code><big><b>-</b></big></code> (hyphen),
102
 * and the replacement character for the null prefix is
103
 * <code><big><b>=</b></big></code> (equal sign).
104
 * </p>
105
 * <p>
106
 * An escape character <code><big><b>\</b></big></code>
107
 * followed by any of these replacement characters
108
 * is an escape sequence, and there are no other escape sequences.
109
 * An equal sign is only part of an escape sequence
110
 * if it is the second character in the whole string, following a backslash.
111
 * Two consecutive backslashes do <em>not</em> form an escape sequence.
112
 * </p>
113
 * <p>
114
 * Each escape sequence replaces a so-called <cite>original character</cite>
115
 * which is either one of the dangerous characters or the escape character.
116
 * A null prefix replaces an initial null string, not a character.
117
 * </p>
118
 * <p>
119
 * All this implies that escape sequences cannot overlap and may be
120
 * determined all at once for a whole string.  Note that a spelling
121
 * string can contain <cite>accidental escapes</cite>, apparent escape
122
 * sequences which must not be interpreted as manglings.
123
 * These are disabled by replacing their leading backslash with an
124
 * escape sequence (<code><big><b>\-</b></big></code>).  To mangle a string, three logical steps
125
 * are required, though they may be carried out in one pass:
126
 * </p>
127
 * <ol>
128
 *   <li>In each accidental escape, replace the backslash with an escape sequence
129
 * (<code><big><b>\-</b></big></code>).</li>
130
 *   <li>Replace each dangerous character with an escape sequence
131
 * (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
132
 *   <li>If the first two steps introduced any change, <em>and</em>
133
 * if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
134
 * </ol>
135
 *
136
 * To demangle a mangled string that begins with an escape,
137
 * remove any null prefix, and then replace (in parallel)
138
 * each escape sequence by its original character.
139
 * <p>Spelling strings which contain accidental
140
 * escapes <em>must</em> have them replaced, even if those
141
 * strings do not contain dangerous characters.
142
 * This restriction means that mangling a string always
143
 * requires a scan of the string for escapes.
144
 * But then, a scan would be required anyway,
145
 * to check for dangerous characters.
146
 *
147
 * </p>
148
 * <h3> Nice Properties </h3>
149
 *
150
 * <p>
151
 * If a bytecode name does not contain any escape sequence,
152
 * demangling is a no-op:  The string demangles to itself.
153
 * Such a string is called <cite>self-mangling</cite>.
154
 * Almost all strings are self-mangling.
155
 * In practice, to demangle almost any name &ldquo;found in nature&rdquo;,
156
 * simply verify that it does not begin with a backslash.
157
 * </p>
158
 * <p>
159
 * Mangling is a one-to-one function, while demangling
160
 * is a many-to-one function.
161
 * A mangled string is defined as <cite>validly mangled</cite> if
162
 * it is in fact the unique mangling of its spelling string.
163
 * Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
164
 * <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
165
 * <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
166
 * If a language back-end or runtime is using mangled names,
167
 * it should never present an invalidly mangled bytecode
168
 * name to the JVM.  If the runtime encounters one,
169
 * it should also report an error, since such an occurrence
170
 * probably indicates a bug in name encoding which
171
 * will lead to errors in linkage.
172
 * However, this note does not propose that the JVM verifier
173
 * detect invalidly mangled names.
174
 * </p>
175
 * <p>
176
 * As a result of these rules, it is a simple matter to
177
 * compute validly mangled substrings and concatenations
178
 * of validly mangled strings, and (with a little care)
179
 * these correspond to corresponding operations on their
180
 * spelling strings.
181
 * </p>
182
 * <ul>
183
 *   <li>Any prefix of a validly mangled string is also validly mangled,
184
 * although a null prefix may need to be removed.</li>
185
 *   <li>Any suffix of a validly mangled string is also validly mangled,
186
 * although a null prefix may need to be added.</li>
187
 *   <li>Two validly mangled strings, when concatenated,
188
 * are also validly mangled, although any null prefix
189
 * must be removed from the second string,
190
 * and a trailing backslash on the first string may need escaping,
191
 * if it would participate in an accidental escape when followed
192
 * by the first character of the second string.</li>
193
 * </ul>
194
 * <p>If languages that include non-Java symbol spellings use this
195
 * mangling convention, they will enjoy the following advantages:
196
 * </p>
197
 * <ul>
198
 *   <li>They can interoperate via symbols they share in common.</li>
199
 *   <li>Low-level tools, such as backtrace printers, will have readable displays.</li>
200
 *   <li>Future JVM and language extensions can safely use the dangerous characters
201
 * for structuring symbols, but will never interfere with valid spellings.</li>
202
 *   <li>Runtimes and compilers can use standard libraries for mangling and demangling.</li>
203
 *   <li>Occasional transliterations and name composition will be simple and regular,
204
 * for classes, methods, and fields.</li>
205
 *   <li>Bytecode names will continue to be compact.
206
 * When mangled, spellings will at most double in length, either in
207
 * UTF8 or UTF16 format, and most will not change at all.</li>
208
 * </ul>
209
 *
210
 *
211
 * <h3> Suggestions for Human Readable Presentations </h3>
212
 *
213
 *
214
 * <p>
215
 * For human readable displays of symbols,
216
 * it will be better to present a string-like quoted
217
 * representation of the spelling, because JVM users
218
 * are generally familiar with such tokens.
219
 * We suggest using single or double quotes before and after
220
 * mangled symbols which are not valid Java identifiers,
221
 * with quotes, backslashes, and non-printing characters
222
 * escaped as if for literals in the Java language.
223
 * </p>
224
 * <p>
225
 * For example, an HTML-like spelling
226
 * <code><big><b>&lt;pre&gt;</b></big></code> mangles to
227
 * <code><big><b>\^pre\_</b></big></code> and could
228
 * display more cleanly as
229
 * <code><big><b>'&lt;pre&gt;'</b></big></code>,
230
 * with the quotes included.
231
 * Such string-like conventions are <em>not</em> suitable
232
 * for mangled bytecode names, in part because
233
 * dangerous characters must be eliminated, rather
234
 * than just quoted.  Otherwise internally structured
235
 * strings like package prefixes and method signatures
236
 * could not be reliably parsed.
237
 * </p>
238
 * <p>
239
 * In such human-readable displays, invalidly mangled
240
 * names should <em>not</em> be demangled and quoted,
241
 * for this would be misleading.  Likewise, JVM symbols
242
 * which contain dangerous characters (like dots in field
243
 * names or brackets in method names) should not be
244
 * simply quoted.  The bytecode names
245
 * <code><big><b>\=phase\,1</b></big></code> and
246
 * <code><big><b>phase.1</b></big></code> are distinct,
247
 * and in demangled displays they should be presented as
248
 * <code><big><b>'phase.1'</b></big></code> and something like
249
 * <code><big><b>'phase'.1</b></big></code>, respectively.
250
 * </p>
251
 *
252
 * @author John Rose
253
 * @version 1.2, 02/06/2008
254
 * @see http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
255
 */
256
public class BytecodeName {
257
    private BytecodeName() { }  // static only class
258

259
    /** Given a source name, produce the corresponding bytecode name.
260
     * The source name should not be qualified, because any syntactic
261
     * markers (dots, slashes, dollar signs, colons, etc.) will be mangled.
262
     * @param s the source name
263
     * @return a valid bytecode name which represents the source name
264
     */
265
    public static String toBytecodeName(String s) {
266
        String bn = mangle(s);
267
        assert((Object)bn == s || looksMangled(bn)) : bn;
268
        assert(s.equals(toSourceName(bn))) : s;
269
        return bn;
270
    }
271

272
    /** Given an unqualified bytecode name, produce the corresponding source name.
273
     * The bytecode name must not contain dangerous characters.
274
     * In particular, it must not be qualified or segmented by colon {@code ':'}.
275
     * @param s the bytecode name
276
     * @return the source name, which may possibly have unsafe characters
277
     * @throws IllegalArgumentException if the bytecode name is not {@link #isSafeBytecodeName safe}
278
     * @see #isSafeBytecodeName(java.lang.String)
279
     */
280
    public static String toSourceName(String s) {
281
        checkSafeBytecodeName(s);
282
        String sn = s;
283
        if (looksMangled(s)) {
284
            sn = demangle(s);
285
            assert(s.equals(mangle(sn))) : s+" => "+sn+" => "+mangle(sn);
286
        }
287
        return sn;
288
    }
289

290
    /**
291
     * Given a bytecode name from a classfile, separate it into
292
     * components delimited by dangerous characters.
293
     * Each resulting array element will be either a dangerous character,
294
     * or else a safe bytecode name.
295
     * (The safe name might possibly be mangled to hide further dangerous characters.)
296
     * For example, the qualified class name {@code java/lang/String}
297
     * will be parsed into the array {@code {"java", '/', "lang", '/', "String"}}.
298
     * The name {@code <init>} will be parsed into {@code {'<', "init", '>'}}.
299
     * The name {@code foo/bar$:baz} will be parsed into
300
     * {@code {"foo", '/', "bar", '$', ':', "baz"}}.
301
     * The name {@code ::\=:foo:\=bar\!baz} will be parsed into
302
     * {@code {':', ':', "", ':', "foo", ':', "bar:baz"}}.
303
     */
304
    public static Object[] parseBytecodeName(String s) {
305
        int slen = s.length();
306
        Object[] res = null;
307
        for (int pass = 0; pass <= 1; pass++) {
308
            int fillp = 0;
309
            int lasti = 0;
310
            for (int i = 0; i <= slen; i++) {
311
                int whichDC = -1;
312
                if (i < slen) {
313
                    whichDC = DANGEROUS_CHARS.indexOf(s.charAt(i));
314
                    if (whichDC < DANGEROUS_CHAR_FIRST_INDEX)  continue;
315
                }
316
                // got to end of string or next dangerous char
317
                if (lasti < i) {
318
                    // normal component
319
                    if (pass != 0)
320
                        res[fillp] = toSourceName(s.substring(lasti, i));
321
                    fillp++;
322
                    lasti = i+1;
323
                }
324
                if (whichDC >= DANGEROUS_CHAR_FIRST_INDEX) {
325
                    if (pass != 0)
326
                        res[fillp] = DANGEROUS_CHARS_CA[whichDC];
327
                    fillp++;
328
                    lasti = i+1;
329
                }
330
            }
331
            if (pass != 0)  break;
332
            // between passes, build the result array
333
            res = new Object[fillp];
334
            if (fillp <= 1 && lasti == 0) {
335
                if (fillp != 0)  res[0] = toSourceName(s);
336
                break;
337
            }
338
        }
339
        return res;
340
    }
341

342
    /**
343
     * Given a series of components, create a bytecode name for a classfile.
344
     * This is the inverse of {@link #parseBytecodeName(java.lang.String)}.
345
     * Each component must either be an interned one-character string of
346
     * a dangerous character, or else a safe bytecode name.
347
     * @param components a series of name components
348
     * @return the concatenation of all components
349
     * @throws IllegalArgumentException if any component contains an unsafe
350
     *          character, and is not an interned one-character string
351
     * @throws NullPointerException if any component is null
352
     */
353
    public static String unparseBytecodeName(Object[] components) {
354
        Object[] components0 = components;
355
        for (int i = 0; i < components.length; i++) {
356
            Object c = components[i];
357
            if (c instanceof String) {
358
                String mc = toBytecodeName((String) c);
359
                if (i == 0 && components.length == 1)
360
                    return mc;  // usual case
361
                if ((Object)mc != c) {
362
                    if (components == components0)
363
                        components = components.clone();
364
                    components[i] = c = mc;
365
                }
366
            }
367
        }
368
        return appendAll(components);
369
    }
370
    private static String appendAll(Object[] components) {
371
        if (components.length <= 1) {
372
            if (components.length == 1) {
373
                return String.valueOf(components[0]);
374
            }
375
            return "";
376
        }
377
        int slen = 0;
378
        for (Object c : components) {
379
            if (c instanceof String)
380
                slen += String.valueOf(c).length();
381
            else
382
                slen += 1;
383
        }
384
        StringBuilder sb = new StringBuilder(slen);
385
        for (Object c : components) {
386
            sb.append(c);
387
        }
388
        return sb.toString();
389
    }
390

391
    /**
392
     * Given a bytecode name, produce the corresponding display name.
393
     * This is the source name, plus quotes if needed.
394
     * If the bytecode name contains dangerous characters,
395
     * assume that they are being used as punctuation,
396
     * and pass them through unchanged.
397
     * Non-empty runs of non-dangerous characters are demangled
398
     * if necessary, and the resulting names are quoted if
399
     * they are not already valid Java identifiers, or if
400
     * they contain a dangerous character (i.e., dollar sign "$").
401
     * Single quotes are used when quoting.
402
     * Within quoted names, embedded single quotes and backslashes
403
     * are further escaped by prepended backslashes.
404
     *
405
     * @param s the original bytecode name (which may be qualified)
406
     * @return a human-readable presentation
407
     */
408
    public static String toDisplayName(String s) {
409
        Object[] components = parseBytecodeName(s);
410
        for (int i = 0; i < components.length; i++) {
411
            if (!(components[i] instanceof String))
412
                continue;
413
            String sn = (String) components[i];
414
            // note that the name is already demangled!
415
            //sn = toSourceName(sn);
416
            if (!isJavaIdent(sn) || sn.indexOf('$') >=0 ) {
417
                components[i] = quoteDisplay(sn);
418
            }
419
        }
420
        return appendAll(components);
421
    }
422
    private static boolean isJavaIdent(String s) {
423
        int slen = s.length();
424
        if (slen == 0)  return false;
425
        if (!Character.isJavaIdentifierStart(s.charAt(0)))
426
            return false;
427
        for (int i = 1; i < slen; i++) {
428
            if (!Character.isJavaIdentifierPart(s.charAt(i)))
429
                return false;
430
        }
431
        return true;
432
    }
433
    private static String quoteDisplay(String s) {
434
        // TO DO:  Replace wierd characters in s by C-style escapes.
435
        return "'"+s.replaceAll("['\\\\]", "\\\\$0")+"'";
436
    }
437

438
    private static void checkSafeBytecodeName(String s)
439
            throws IllegalArgumentException {
440
        if (!isSafeBytecodeName(s)) {
441
            throw new IllegalArgumentException(s);
442
        }
443
    }
444

445
    /**
446
     * Report whether a simple name is safe as a bytecode name.
447
     * Such names are acceptable in class files as class, method, and field names.
448
     * Additionally, they are free of "dangerous" characters, even if those
449
     * characters are legal in some (or all) names in class files.
450
     * @param s the proposed bytecode name
451
     * @return true if the name is non-empty and all of its characters are safe
452
     */
453
    public static boolean isSafeBytecodeName(String s) {
454
        if (s.isEmpty())  return false;
455
        // check occurrences of each DANGEROUS char
456
        for (char xc : DANGEROUS_CHARS_A) {
457
            if (xc == ESCAPE_C)  continue;  // not really that dangerous
458
            if (s.indexOf(xc) >= 0)  return false;
459
        }
460
        return true;
461
    }
462

463
    /**
464
     * Report whether a character is safe in a bytecode name.
465
     * This is true of any unicode character except the following
466
     * <em>dangerous characters</em>: {@code ".;:$[]<>/"}.
467
     * @param c the proposed character
468
     * @return true if the character is safe to use in classfiles
469
     */
470
    public static boolean isSafeBytecodeChar(char c) {
471
        return DANGEROUS_CHARS.indexOf(c) < DANGEROUS_CHAR_FIRST_INDEX;
472
    }
473

474
    private static boolean looksMangled(String s) {
475
        return s.charAt(0) == ESCAPE_C;
476
    }
477

478
    private static String mangle(String s) {
479
        if (s.isEmpty())
480
            return NULL_ESCAPE;
481

482
        // build this lazily, when we first need an escape:
483
        StringBuilder sb = null;
484

485
        for (int i = 0, slen = s.length(); i < slen; i++) {
486
            char c = s.charAt(i);
487

488
            boolean needEscape = false;
489
            if (c == ESCAPE_C) {
490
                if (i+1 < slen) {
491
                    char c1 = s.charAt(i+1);
492
                    if ((i == 0 && c1 == NULL_ESCAPE_C)
493
                        || c1 != originalOfReplacement(c1)) {
494
                        // an accidental escape
495
                        needEscape = true;
496
                    }
497
                }
498
            } else {
499
                needEscape = isDangerous(c);
500
            }
501

502
            if (!needEscape) {
503
                if (sb != null)  sb.append(c);
504
                continue;
505
            }
506

507
            // build sb if this is the first escape
508
            if (sb == null) {
509
                sb = new StringBuilder(s.length()+10);
510
                // mangled names must begin with a backslash:
511
                if (s.charAt(0) != ESCAPE_C && i > 0)
512
                    sb.append(NULL_ESCAPE);
513
                // append the string so far, which is unremarkable:
514
                sb.append(s, 0, i);
515
            }
516

517
            // rewrite \ to \-, / to \|, etc.
518
            sb.append(ESCAPE_C);
519
            sb.append(replacementOf(c));
520
        }
521

522
        if (sb != null)   return sb.toString();
523

524
        return s;
525
    }
526

527
    private static String demangle(String s) {
528
        // build this lazily, when we first meet an escape:
529
        StringBuilder sb = null;
530

531
        int stringStart = 0;
532
        if (s.startsWith(NULL_ESCAPE))
533
            stringStart = 2;
534

535
        for (int i = stringStart, slen = s.length(); i < slen; i++) {
536
            char c = s.charAt(i);
537

538
            if (c == ESCAPE_C && i+1 < slen) {
539
                // might be an escape sequence
540
                char rc = s.charAt(i+1);
541
                char oc = originalOfReplacement(rc);
542
                if (oc != rc) {
543
                    // build sb if this is the first escape
544
                    if (sb == null) {
545
                        sb = new StringBuilder(s.length());
546
                        // append the string so far, which is unremarkable:
547
                        sb.append(s, stringStart, i);
548
                    }
549
                    ++i;  // skip both characters
550
                    c = oc;
551
                }
552
            }
553

554
            if (sb != null)
555
                sb.append(c);
556
        }
557

558
        if (sb != null)   return sb.toString();
559

560
        return s.substring(stringStart);
561
    }
562

563
    static char ESCAPE_C = '\\';
564
    // empty escape sequence to avoid a null name or illegal prefix
565
    static char NULL_ESCAPE_C = '=';
566
    static String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C;
567

568
    static final String DANGEROUS_CHARS   = "\\/.;:$[]<>"; // \\ must be first
569
    static final String REPLACEMENT_CHARS =  "-|,?!%{}^_";
570
    static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\
571
    static char[] DANGEROUS_CHARS_A   = DANGEROUS_CHARS.toCharArray();
572
    static char[] REPLACEMENT_CHARS_A = REPLACEMENT_CHARS.toCharArray();
573
    static final Character[] DANGEROUS_CHARS_CA;
574
    static {
575
        Character[] dcca = new Character[DANGEROUS_CHARS.length()];
576
        for (int i = 0; i < dcca.length; i++)
577
            dcca[i] = Character.valueOf(DANGEROUS_CHARS.charAt(i));
578
        DANGEROUS_CHARS_CA = dcca;
579
    }
580

581
    static final long[] SPECIAL_BITMAP = new long[2];  // 128 bits
582
    static {
583
        String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS;
584
        //System.out.println("SPECIAL = "+SPECIAL);
585
        for (char c : SPECIAL.toCharArray()) {
586
            SPECIAL_BITMAP[c >>> 6] |= 1L << c;
587
        }
588
    }
589
    static boolean isSpecial(char c) {
590
        if ((c >>> 6) < SPECIAL_BITMAP.length)
591
            return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0;
592
        else
593
            return false;
594
    }
595
    static char replacementOf(char c) {
596
        if (!isSpecial(c))  return c;
597
        int i = DANGEROUS_CHARS.indexOf(c);
598
        if (i < 0)  return c;
599
        return REPLACEMENT_CHARS.charAt(i);
600
    }
601
    static char originalOfReplacement(char c) {
602
        if (!isSpecial(c))  return c;
603
        int i = REPLACEMENT_CHARS.indexOf(c);
604
        if (i < 0)  return c;
605
        return DANGEROUS_CHARS.charAt(i);
606
    }
607
    static boolean isDangerous(char c) {
608
        if (!isSpecial(c))  return false;
609
        return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX);
610
    }
611
    static int indexOfDangerousChar(String s, int from) {
612
        for (int i = from, slen = s.length(); i < slen; i++) {
613
            if (isDangerous(s.charAt(i)))
614
                return i;
615
        }
616
        return -1;
617
    }
618
    static int lastIndexOfDangerousChar(String s, int from) {
619
        for (int i = Math.min(from, s.length()-1); i >= 0; i--) {
620
            if (isDangerous(s.charAt(i)))
621
                return i;
622
        }
623
        return -1;
624
    }
625

626

627
}
628

629
Product

Resources

Company