Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/sun/net/www/ParseUtil.java
41159 views
1
/*
2
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package sun.net.www;
27
28
import java.io.File;
29
import java.net.MalformedURLException;
30
import java.net.URI;
31
import java.net.URISyntaxException;
32
import java.net.URL;
33
import java.nio.ByteBuffer;
34
import java.nio.CharBuffer;
35
import java.nio.charset.CharacterCodingException;
36
import java.nio.charset.CharsetDecoder;
37
import java.nio.charset.CharsetEncoder;
38
import java.nio.charset.CoderResult;
39
import java.nio.charset.CodingErrorAction;
40
import java.util.HexFormat;
41
42
import sun.nio.cs.UTF_8;
43
44
/**
45
* A class that contains useful routines common to sun.net.www
46
* @author Mike McCloskey
47
*/
48
49
public final class ParseUtil {
50
51
private static final HexFormat HEX_UPPERCASE = HexFormat.of().withUpperCase();
52
53
private ParseUtil() {}
54
55
/**
56
* Constructs an encoded version of the specified path string suitable
57
* for use in the construction of a URL.
58
*
59
* A path separator is replaced by a forward slash. The string is UTF8
60
* encoded. The % escape sequence is used for characters that are above
61
* 0x7F or those defined in RFC2396 as reserved or excluded in the path
62
* component of a URL.
63
*/
64
public static String encodePath(String path) {
65
return encodePath(path, true);
66
}
67
/*
68
* flag indicates whether path uses platform dependent
69
* File.separatorChar or not. True indicates path uses platform
70
* dependent File.separatorChar.
71
*/
72
public static String encodePath(String path, boolean flag) {
73
if (flag && File.separatorChar != '/') {
74
return encodePath(path, 0, File.separatorChar);
75
} else {
76
int index = firstEncodeIndex(path);
77
if (index > -1) {
78
return encodePath(path, index, '/');
79
} else {
80
return path;
81
}
82
}
83
}
84
85
private static int firstEncodeIndex(String path) {
86
int len = path.length();
87
for (int i = 0; i < len; i++) {
88
char c = path.charAt(i);
89
// Ordering in the following test is performance sensitive,
90
// and typically paths have most chars in the a-z range, then
91
// in the symbol range '&'-':' (includes '.', '/' and '0'-'9')
92
// and more rarely in the A-Z range.
93
if (c >= 'a' && c <= 'z' ||
94
c >= '&' && c <= ':' ||
95
c >= 'A' && c <= 'Z') {
96
continue;
97
} else if (c > 0x007F || match(c, L_ENCODED, H_ENCODED)) {
98
return i;
99
}
100
}
101
return -1;
102
}
103
104
private static String encodePath(String path, int index, char sep) {
105
char[] pathCC = path.toCharArray();
106
char[] retCC = new char[pathCC.length * 2 + 16 - index];
107
if (index > 0) {
108
System.arraycopy(pathCC, 0, retCC, 0, index);
109
}
110
int retLen = index;
111
112
for (int i = index; i < pathCC.length; i++) {
113
char c = pathCC[i];
114
if (c == sep)
115
retCC[retLen++] = '/';
116
else {
117
if (c <= 0x007F) {
118
if (c >= 'a' && c <= 'z' ||
119
c >= 'A' && c <= 'Z' ||
120
c >= '0' && c <= '9') {
121
retCC[retLen++] = c;
122
} else if (match(c, L_ENCODED, H_ENCODED)) {
123
retLen = escape(retCC, c, retLen);
124
} else {
125
retCC[retLen++] = c;
126
}
127
} else if (c > 0x07FF) {
128
retLen = escape(retCC, (char)(0xE0 | ((c >> 12) & 0x0F)), retLen);
129
retLen = escape(retCC, (char)(0x80 | ((c >> 6) & 0x3F)), retLen);
130
retLen = escape(retCC, (char)(0x80 | ((c >> 0) & 0x3F)), retLen);
131
} else {
132
retLen = escape(retCC, (char)(0xC0 | ((c >> 6) & 0x1F)), retLen);
133
retLen = escape(retCC, (char)(0x80 | ((c >> 0) & 0x3F)), retLen);
134
}
135
}
136
//worst case scenario for character [0x7ff-] every single
137
//character will be encoded into 9 characters.
138
if (retLen + 9 > retCC.length) {
139
int newLen = retCC.length * 2 + 16;
140
if (newLen < 0) {
141
newLen = Integer.MAX_VALUE;
142
}
143
char[] buf = new char[newLen];
144
System.arraycopy(retCC, 0, buf, 0, retLen);
145
retCC = buf;
146
}
147
}
148
return new String(retCC, 0, retLen);
149
}
150
151
/**
152
* Appends the URL escape sequence for the specified char to the
153
* specified character array.
154
*/
155
private static int escape(char[] cc, char c, int index) {
156
cc[index++] = '%';
157
cc[index++] = Character.forDigit((c >> 4) & 0xF, 16);
158
cc[index++] = Character.forDigit(c & 0xF, 16);
159
return index;
160
}
161
162
/**
163
* Un-escape and return the character at position i in string s.
164
*/
165
private static byte unescape(String s, int i) {
166
return (byte) Integer.parseInt(s, i + 1, i + 3, 16);
167
}
168
169
170
/**
171
* Returns a new String constructed from the specified String by replacing
172
* the URL escape sequences and UTF8 encoding with the characters they
173
* represent.
174
*/
175
public static String decode(String s) {
176
int n = s.length();
177
if ((n == 0) || (s.indexOf('%') < 0))
178
return s;
179
180
StringBuilder sb = new StringBuilder(n);
181
ByteBuffer bb = ByteBuffer.allocate(n);
182
CharBuffer cb = CharBuffer.allocate(n);
183
CharsetDecoder dec = UTF_8.INSTANCE.newDecoder()
184
.onMalformedInput(CodingErrorAction.REPORT)
185
.onUnmappableCharacter(CodingErrorAction.REPORT);
186
187
char c = s.charAt(0);
188
for (int i = 0; i < n;) {
189
assert c == s.charAt(i);
190
if (c != '%') {
191
sb.append(c);
192
if (++i >= n)
193
break;
194
c = s.charAt(i);
195
continue;
196
}
197
bb.clear();
198
int ui = i;
199
for (;;) {
200
assert (n - i >= 2);
201
try {
202
bb.put(unescape(s, i));
203
} catch (NumberFormatException e) {
204
throw new IllegalArgumentException();
205
}
206
i += 3;
207
if (i >= n)
208
break;
209
c = s.charAt(i);
210
if (c != '%')
211
break;
212
}
213
bb.flip();
214
cb.clear();
215
dec.reset();
216
CoderResult cr = dec.decode(bb, cb, true);
217
if (cr.isError())
218
throw new IllegalArgumentException("Error decoding percent encoded characters");
219
cr = dec.flush(cb);
220
if (cr.isError())
221
throw new IllegalArgumentException("Error decoding percent encoded characters");
222
sb.append(cb.flip().toString());
223
}
224
225
return sb.toString();
226
}
227
228
public static URL fileToEncodedURL(File file)
229
throws MalformedURLException
230
{
231
String path = file.getAbsolutePath();
232
path = ParseUtil.encodePath(path);
233
if (!path.startsWith("/")) {
234
path = "/" + path;
235
}
236
if (!path.endsWith("/") && file.isDirectory()) {
237
path = path + "/";
238
}
239
return new URL("file", "", path);
240
}
241
242
public static java.net.URI toURI(URL url) {
243
String protocol = url.getProtocol();
244
String auth = url.getAuthority();
245
String path = url.getPath();
246
String query = url.getQuery();
247
String ref = url.getRef();
248
if (path != null && !(path.startsWith("/")))
249
path = "/" + path;
250
251
//
252
// In java.net.URI class, a port number of -1 implies the default
253
// port number. So get it stripped off before creating URI instance.
254
//
255
if (auth != null && auth.endsWith(":-1"))
256
auth = auth.substring(0, auth.length() - 3);
257
258
java.net.URI uri;
259
try {
260
uri = createURI(protocol, auth, path, query, ref);
261
} catch (java.net.URISyntaxException e) {
262
uri = null;
263
}
264
return uri;
265
}
266
267
//
268
// createURI() and its auxiliary code are cloned from java.net.URI.
269
// Most of the code are just copy and paste, except that quote()
270
// has been modified to avoid double-escape.
271
//
272
// Usually it is unacceptable, but we're forced to do it because
273
// otherwise we need to change public API, namely java.net.URI's
274
// multi-argument constructors. It turns out that the changes cause
275
// incompatibilities so can't be done.
276
//
277
private static URI createURI(String scheme,
278
String authority,
279
String path,
280
String query,
281
String fragment) throws URISyntaxException
282
{
283
String s = toString(scheme, null,
284
authority, null, null, -1,
285
path, query, fragment);
286
checkPath(s, scheme, path);
287
return new URI(s);
288
}
289
290
private static String toString(String scheme,
291
String opaquePart,
292
String authority,
293
String userInfo,
294
String host,
295
int port,
296
String path,
297
String query,
298
String fragment)
299
{
300
StringBuilder sb = new StringBuilder();
301
if (scheme != null) {
302
sb.append(scheme);
303
sb.append(':');
304
}
305
appendSchemeSpecificPart(sb, opaquePart,
306
authority, userInfo, host, port,
307
path, query);
308
appendFragment(sb, fragment);
309
return sb.toString();
310
}
311
312
private static void appendSchemeSpecificPart(StringBuilder sb,
313
String opaquePart,
314
String authority,
315
String userInfo,
316
String host,
317
int port,
318
String path,
319
String query)
320
{
321
if (opaquePart != null) {
322
/* check if SSP begins with an IPv6 address
323
* because we must not quote a literal IPv6 address
324
*/
325
if (opaquePart.startsWith("//[")) {
326
int end = opaquePart.indexOf(']');
327
if (end != -1 && opaquePart.indexOf(':')!=-1) {
328
String doquote, dontquote;
329
if (end == opaquePart.length()) {
330
dontquote = opaquePart;
331
doquote = "";
332
} else {
333
dontquote = opaquePart.substring(0,end+1);
334
doquote = opaquePart.substring(end+1);
335
}
336
sb.append (dontquote);
337
sb.append(quote(doquote, L_URIC, H_URIC));
338
}
339
} else {
340
sb.append(quote(opaquePart, L_URIC, H_URIC));
341
}
342
} else {
343
appendAuthority(sb, authority, userInfo, host, port);
344
if (path != null)
345
sb.append(quote(path, L_PATH, H_PATH));
346
if (query != null) {
347
sb.append('?');
348
sb.append(quote(query, L_URIC, H_URIC));
349
}
350
}
351
}
352
353
private static void appendAuthority(StringBuilder sb,
354
String authority,
355
String userInfo,
356
String host,
357
int port)
358
{
359
if (host != null) {
360
sb.append("//");
361
if (userInfo != null) {
362
sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
363
sb.append('@');
364
}
365
boolean needBrackets = ((host.indexOf(':') >= 0)
366
&& !host.startsWith("[")
367
&& !host.endsWith("]"));
368
if (needBrackets) sb.append('[');
369
sb.append(host);
370
if (needBrackets) sb.append(']');
371
if (port != -1) {
372
sb.append(':');
373
sb.append(port);
374
}
375
} else if (authority != null) {
376
sb.append("//");
377
if (authority.startsWith("[")) {
378
int end = authority.indexOf(']');
379
if (end != -1 && authority.indexOf(':')!=-1) {
380
String doquote, dontquote;
381
if (end == authority.length()) {
382
dontquote = authority;
383
doquote = "";
384
} else {
385
dontquote = authority.substring(0,end+1);
386
doquote = authority.substring(end+1);
387
}
388
sb.append (dontquote);
389
sb.append(quote(doquote,
390
L_REG_NAME | L_SERVER,
391
H_REG_NAME | H_SERVER));
392
}
393
} else {
394
sb.append(quote(authority,
395
L_REG_NAME | L_SERVER,
396
H_REG_NAME | H_SERVER));
397
}
398
}
399
}
400
401
private static void appendFragment(StringBuilder sb, String fragment) {
402
if (fragment != null) {
403
sb.append('#');
404
sb.append(quote(fragment, L_URIC, H_URIC));
405
}
406
}
407
408
// Quote any characters in s that are not permitted
409
// by the given mask pair
410
//
411
private static String quote(String s, long lowMask, long highMask) {
412
int n = s.length();
413
StringBuilder sb = null;
414
CharsetEncoder encoder = null;
415
boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
416
for (int i = 0; i < s.length(); i++) {
417
char c = s.charAt(i);
418
if (c < '\u0080') {
419
if (!match(c, lowMask, highMask) && !isEscaped(s, i)) {
420
if (sb == null) {
421
sb = new StringBuilder();
422
sb.append(s, 0, i);
423
}
424
appendEscape(sb, (byte)c);
425
} else {
426
if (sb != null)
427
sb.append(c);
428
}
429
} else if (allowNonASCII
430
&& (Character.isSpaceChar(c)
431
|| Character.isISOControl(c))) {
432
if (encoder == null) {
433
encoder = UTF_8.INSTANCE.newEncoder();
434
}
435
if (sb == null) {
436
sb = new StringBuilder();
437
sb.append(s, 0, i);
438
}
439
appendEncoded(encoder, sb, c);
440
} else {
441
if (sb != null)
442
sb.append(c);
443
}
444
}
445
return (sb == null) ? s : sb.toString();
446
}
447
448
//
449
// To check if the given string has an escaped triplet
450
// at the given position
451
//
452
private static boolean isEscaped(String s, int pos) {
453
if (s == null || (s.length() <= (pos + 2)))
454
return false;
455
456
return s.charAt(pos) == '%'
457
&& match(s.charAt(pos + 1), L_HEX, H_HEX)
458
&& match(s.charAt(pos + 2), L_HEX, H_HEX);
459
}
460
461
private static void appendEncoded(CharsetEncoder encoder,
462
StringBuilder sb, char c) {
463
ByteBuffer bb = null;
464
try {
465
bb = encoder.encode(CharBuffer.wrap("" + c));
466
} catch (CharacterCodingException x) {
467
assert false;
468
}
469
while (bb.hasRemaining()) {
470
int b = bb.get() & 0xff;
471
if (b >= 0x80)
472
appendEscape(sb, (byte)b);
473
else
474
sb.append((char)b);
475
}
476
}
477
478
private static void appendEscape(StringBuilder sb, byte b) {
479
sb.append('%');
480
HEX_UPPERCASE.toHexDigits(sb, b);
481
}
482
483
// Tell whether the given character is permitted by the given mask pair
484
private static boolean match(char c, long lowMask, long highMask) {
485
if (c < 64)
486
return ((1L << c) & lowMask) != 0;
487
if (c < 128)
488
return ((1L << (c - 64)) & highMask) != 0;
489
return false;
490
}
491
492
// If a scheme is given then the path, if given, must be absolute
493
//
494
private static void checkPath(String s, String scheme, String path)
495
throws URISyntaxException
496
{
497
if (scheme != null) {
498
if (path != null && !path.isEmpty() && path.charAt(0) != '/')
499
throw new URISyntaxException(s,
500
"Relative path in absolute URI");
501
}
502
}
503
504
505
// -- Character classes for parsing --
506
507
// To save startup time, we manually calculate the low-/highMask constants.
508
// For reference, the following methods were used to calculate the values:
509
510
// Compute a low-order mask for the characters
511
// between first and last, inclusive
512
// private static long lowMask(char first, char last) {
513
// long m = 0;
514
// int f = Math.max(Math.min(first, 63), 0);
515
// int l = Math.max(Math.min(last, 63), 0);
516
// for (int i = f; i <= l; i++)
517
// m |= 1L << i;
518
// return m;
519
// }
520
521
// Compute the low-order mask for the characters in the given string
522
// private static long lowMask(String chars) {
523
// int n = chars.length();
524
// long m = 0;
525
// for (int i = 0; i < n; i++) {
526
// char c = chars.charAt(i);
527
// if (c < 64)
528
// m |= (1L << c);
529
// }
530
// return m;
531
// }
532
533
// Compute a high-order mask for the characters
534
// between first and last, inclusive
535
// private static long highMask(char first, char last) {
536
// long m = 0;
537
// int f = Math.max(Math.min(first, 127), 64) - 64;
538
// int l = Math.max(Math.min(last, 127), 64) - 64;
539
// for (int i = f; i <= l; i++)
540
// m |= 1L << i;
541
// return m;
542
// }
543
544
// Compute the high-order mask for the characters in the given string
545
// private static long highMask(String chars) {
546
// int n = chars.length();
547
// long m = 0;
548
// for (int i = 0; i < n; i++) {
549
// char c = chars.charAt(i);
550
// if ((c >= 64) && (c < 128))
551
// m |= (1L << (c - 64));
552
// }
553
// return m;
554
// }
555
556
557
// Character-class masks
558
559
// digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
560
// "8" | "9"
561
private static final long L_DIGIT = 0x3FF000000000000L; // lowMask('0', '9');
562
private static final long H_DIGIT = 0L;
563
564
// hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
565
// "a" | "b" | "c" | "d" | "e" | "f"
566
private static final long L_HEX = L_DIGIT;
567
private static final long H_HEX = 0x7E0000007EL; // highMask('A', 'F') | highMask('a', 'f');
568
569
// upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
570
// "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
571
// "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
572
private static final long L_UPALPHA = 0L;
573
private static final long H_UPALPHA = 0x7FFFFFEL; // highMask('A', 'Z');
574
575
// lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
576
// "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
577
// "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
578
private static final long L_LOWALPHA = 0L;
579
private static final long H_LOWALPHA = 0x7FFFFFE00000000L; // highMask('a', 'z');
580
581
// alpha = lowalpha | upalpha
582
private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
583
private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
584
585
// alphanum = alpha | digit
586
private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;
587
private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
588
589
// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
590
// "(" | ")"
591
private static final long L_MARK = 0x678200000000L; // lowMask("-_.!~*'()");
592
private static final long H_MARK = 0x4000000080000000L; // highMask("-_.!~*'()");
593
594
// unreserved = alphanum | mark
595
private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
596
private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
597
598
// reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
599
// "$" | "," | "[" | "]"
600
// Added per RFC2732: "[", "]"
601
private static final long L_RESERVED = 0xAC00985000000000L; // lowMask(";/?:@&=+$,[]");
602
private static final long H_RESERVED = 0x28000001L; // highMask(";/?:@&=+$,[]");
603
604
// The zero'th bit is used to indicate that escape pairs and non-US-ASCII
605
// characters are allowed; this is handled by the scanEscape method below.
606
private static final long L_ESCAPED = 1L;
607
private static final long H_ESCAPED = 0L;
608
609
// uric = reserved | unreserved | escaped
610
private static final long L_URIC = L_RESERVED | L_UNRESERVED | L_ESCAPED;
611
private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;
612
613
// pchar = unreserved | escaped |
614
// ":" | "@" | "&" | "=" | "+" | "$" | ","
615
private static final long L_PCHAR
616
= L_UNRESERVED | L_ESCAPED | 0x2400185000000000L; // lowMask(":@&=+$,");
617
private static final long H_PCHAR
618
= H_UNRESERVED | H_ESCAPED | 0x1L; // highMask(":@&=+$,");
619
620
// All valid path characters
621
private static final long L_PATH = L_PCHAR | 0x800800000000000L; // lowMask(";/");
622
private static final long H_PATH = H_PCHAR; // highMask(";/") == 0x0L;
623
624
// Dash, for use in domainlabel and toplabel
625
private static final long L_DASH = 0x200000000000L; // lowMask("-");
626
private static final long H_DASH = 0x0L; // highMask("-");
627
628
// userinfo = *( unreserved | escaped |
629
// ";" | ":" | "&" | "=" | "+" | "$" | "," )
630
private static final long L_USERINFO
631
= L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask(";:&=+$,");
632
private static final long H_USERINFO
633
= H_UNRESERVED | H_ESCAPED; // | highMask(";:&=+$,") == 0L;
634
635
// reg_name = 1*( unreserved | escaped | "$" | "," |
636
// ";" | ":" | "@" | "&" | "=" | "+" )
637
private static final long L_REG_NAME
638
= L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask("$,;:@&=+");
639
private static final long H_REG_NAME
640
= H_UNRESERVED | H_ESCAPED | 0x1L; // highMask("$,;:@&=+");
641
642
// All valid characters for server-based authorities
643
private static final long L_SERVER
644
= L_USERINFO | L_ALPHANUM | L_DASH | 0x400400000000000L; // lowMask(".:@[]");
645
private static final long H_SERVER
646
= H_USERINFO | H_ALPHANUM | H_DASH | 0x28000001L; // highMask(".:@[]");
647
648
// Characters that are encoded in the path component of a URI.
649
//
650
// These characters are reserved in the path segment as described in
651
// RFC2396 section 3.3:
652
// "=" | ";" | "?" | "/"
653
//
654
// These characters are defined as excluded in RFC2396 section 2.4.3
655
// and must be escaped if they occur in the data part of a URI:
656
// "#" | " " | "<" | ">" | "%" | "\"" | "{" | "}" | "|" | "\\" | "^" |
657
// "[" | "]" | "`"
658
//
659
// Also US ASCII control characters 00-1F and 7F.
660
661
// lowMask((char)0, (char)31) | lowMask("=;?/# <>%\"{}|\\^[]`");
662
private static final long L_ENCODED = 0xF800802DFFFFFFFFL;
663
664
// highMask((char)0x7F, (char)0x7F) | highMask("=;?/# <>%\"{}|\\^[]`");
665
private static final long H_ENCODED = 0xB800000178000000L;
666
667
}
668
669