Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/java/net/IDN.java
41152 views
1
/*
2
* Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
package java.net;
26
27
import java.io.InputStream;
28
import java.io.IOException;
29
import java.security.AccessController;
30
import java.security.PrivilegedAction;
31
32
import jdk.internal.icu.impl.Punycode;
33
import jdk.internal.icu.text.StringPrep;
34
import jdk.internal.icu.text.UCharacterIterator;
35
36
/**
37
* Provides methods to convert internationalized domain names (IDNs) between
38
* a normal Unicode representation and an ASCII Compatible Encoding (ACE) representation.
39
* Internationalized domain names can use characters from the entire range of
40
* Unicode, while traditional domain names are restricted to ASCII characters.
41
* ACE is an encoding of Unicode strings that uses only ASCII characters and
42
* can be used with software (such as the Domain Name System) that only
43
* understands traditional domain names.
44
*
45
* <p>Internationalized domain names are defined in <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
46
* RFC 3490 defines two operations: ToASCII and ToUnicode. These 2 operations employ
47
* <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a> algorithm, which is a
48
* profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a>, and
49
* <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a> algorithm to convert
50
* domain name string back and forth.
51
*
52
* <p>The behavior of aforementioned conversion process can be adjusted by various flags:
53
* <ul>
54
* <li>If the ALLOW_UNASSIGNED flag is used, the domain name string to be converted
55
* can contain code points that are unassigned in Unicode 3.2, which is the
56
* Unicode version on which IDN conversion is based. If the flag is not used,
57
* the presence of such unassigned code points is treated as an error.
58
* <li>If the USE_STD3_ASCII_RULES flag is used, ASCII strings are checked against <a href="http://www.ietf.org/rfc/rfc1122.txt">RFC 1122</a> and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC 1123</a>.
59
* It is an error if they don't meet the requirements.
60
* </ul>
61
* These flags can be logically OR'ed together.
62
*
63
* <p>The security consideration is important with respect to internationalization
64
* domain name support. For example, English domain names may be <i>homographed</i>
65
* - maliciously misspelled by substitution of non-Latin letters.
66
* <a href="http://www.unicode.org/reports/tr36/">Unicode Technical Report #36</a>
67
* discusses security issues of IDN support as well as possible solutions.
68
* Applications are responsible for taking adequate security measures when using
69
* international domain names.
70
*
71
* @author Edward Wang
72
* @since 1.6
73
*
74
*/
75
@SuppressWarnings("removal")
76
public final class IDN {
77
/**
78
* Flag to allow processing of unassigned code points
79
*/
80
public static final int ALLOW_UNASSIGNED = 0x01;
81
82
/**
83
* Flag to turn on the check against STD-3 ASCII rules
84
*/
85
public static final int USE_STD3_ASCII_RULES = 0x02;
86
87
88
/**
89
* Translates a string from Unicode to ASCII Compatible Encoding (ACE),
90
* as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
91
*
92
* <p>ToASCII operation can fail. ToASCII fails if any step of it fails.
93
* If ToASCII operation fails, an IllegalArgumentException will be thrown.
94
* In this case, the input string should not be used in an internationalized domain name.
95
*
96
* <p> A label is an individual part of a domain name. The original ToASCII operation,
97
* as defined in RFC 3490, only operates on a single label. This method can handle
98
* both label and entire domain name, by assuming that labels in a domain name are
99
* always separated by dots. The following characters are recognized as dots:
100
* &#0092;u002E (full stop), &#0092;u3002 (ideographic full stop), &#0092;uFF0E (fullwidth full stop),
101
* and &#0092;uFF61 (halfwidth ideographic full stop). if dots are
102
* used as label separators, this method also changes all of them to &#0092;u002E (full stop)
103
* in output translated string.
104
*
105
* @param input the string to be processed
106
* @param flag process flag; can be 0 or any logical OR of possible flags
107
*
108
* @return the translated {@code String}
109
*
110
* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
111
*/
112
public static String toASCII(String input, int flag)
113
{
114
int p = 0, q = 0;
115
StringBuilder out = new StringBuilder();
116
117
if (isRootLabel(input)) {
118
return ".";
119
}
120
121
while (p < input.length()) {
122
q = searchDots(input, p);
123
out.append(toASCIIInternal(input.substring(p, q), flag));
124
if (q != (input.length())) {
125
// has more labels, or keep the trailing dot as at present
126
out.append('.');
127
}
128
p = q + 1;
129
}
130
131
return out.toString();
132
}
133
134
135
/**
136
* Translates a string from Unicode to ASCII Compatible Encoding (ACE),
137
* as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
138
*
139
* <p> This convenience method works as if by invoking the
140
* two-argument counterpart as follows:
141
* <blockquote>
142
* {@link #toASCII(String, int) toASCII}(input,&nbsp;0);
143
* </blockquote>
144
*
145
* @param input the string to be processed
146
*
147
* @return the translated {@code String}
148
*
149
* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
150
*/
151
public static String toASCII(String input) {
152
return toASCII(input, 0);
153
}
154
155
156
/**
157
* Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
158
* as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
159
*
160
* <p>ToUnicode never fails. In case of any error, the input string is returned unmodified.
161
*
162
* <p> A label is an individual part of a domain name. The original ToUnicode operation,
163
* as defined in RFC 3490, only operates on a single label. This method can handle
164
* both label and entire domain name, by assuming that labels in a domain name are
165
* always separated by dots. The following characters are recognized as dots:
166
* &#0092;u002E (full stop), &#0092;u3002 (ideographic full stop), &#0092;uFF0E (fullwidth full stop),
167
* and &#0092;uFF61 (halfwidth ideographic full stop).
168
*
169
* @param input the string to be processed
170
* @param flag process flag; can be 0 or any logical OR of possible flags
171
*
172
* @return the translated {@code String}
173
*/
174
public static String toUnicode(String input, int flag) {
175
int p = 0, q = 0;
176
StringBuilder out = new StringBuilder();
177
178
if (isRootLabel(input)) {
179
return ".";
180
}
181
182
while (p < input.length()) {
183
q = searchDots(input, p);
184
out.append(toUnicodeInternal(input.substring(p, q), flag));
185
if (q != (input.length())) {
186
// has more labels, or keep the trailing dot as at present
187
out.append('.');
188
}
189
p = q + 1;
190
}
191
192
return out.toString();
193
}
194
195
196
/**
197
* Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
198
* as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
199
*
200
* <p> This convenience method works as if by invoking the
201
* two-argument counterpart as follows:
202
* <blockquote>
203
* {@link #toUnicode(String, int) toUnicode}(input,&nbsp;0);
204
* </blockquote>
205
*
206
* @param input the string to be processed
207
*
208
* @return the translated {@code String}
209
*/
210
public static String toUnicode(String input) {
211
return toUnicode(input, 0);
212
}
213
214
215
/* ---------------- Private members -------------- */
216
217
// ACE Prefix is "xn--"
218
private static final String ACE_PREFIX = "xn--";
219
private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length();
220
221
private static final int MAX_LABEL_LENGTH = 63;
222
223
// single instance of nameprep
224
private static StringPrep namePrep = null;
225
226
static {
227
InputStream stream = null;
228
229
try {
230
final String IDN_PROFILE = "/sun/net/idn/uidna.spp";
231
if (System.getSecurityManager() != null) {
232
stream = AccessController.doPrivileged(new PrivilegedAction<>() {
233
public InputStream run() {
234
return StringPrep.class.getResourceAsStream(IDN_PROFILE);
235
}
236
});
237
} else {
238
stream = StringPrep.class.getResourceAsStream(IDN_PROFILE);
239
}
240
241
namePrep = new StringPrep(stream);
242
stream.close();
243
} catch (IOException e) {
244
// should never reach here
245
assert false;
246
}
247
}
248
249
250
/* ---------------- Private operations -------------- */
251
252
253
//
254
// to suppress the default zero-argument constructor
255
//
256
private IDN() {}
257
258
//
259
// toASCII operation; should only apply to a single label
260
//
261
private static String toASCIIInternal(String label, int flag)
262
{
263
// step 1
264
// Check if the string contains code points outside the ASCII range 0..0x7c.
265
boolean isASCII = isAllASCII(label);
266
StringBuffer dest;
267
268
// step 2
269
// perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
270
if (!isASCII) {
271
UCharacterIterator iter = UCharacterIterator.getInstance(label);
272
try {
273
dest = namePrep.prepare(iter, flag);
274
} catch (java.text.ParseException e) {
275
throw new IllegalArgumentException(e);
276
}
277
} else {
278
dest = new StringBuffer(label);
279
}
280
281
// step 8, move forward to check the smallest number of the code points
282
// the length must be inside 1..63
283
if (dest.length() == 0) {
284
throw new IllegalArgumentException(
285
"Empty label is not a legal name");
286
}
287
288
// step 3
289
// Verify the absence of non-LDH ASCII code points
290
// 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f
291
// Verify the absence of leading and trailing hyphen
292
boolean useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0);
293
if (useSTD3ASCIIRules) {
294
for (int i = 0; i < dest.length(); i++) {
295
int c = dest.charAt(i);
296
if (isNonLDHAsciiCodePoint(c)) {
297
throw new IllegalArgumentException(
298
"Contains non-LDH ASCII characters");
299
}
300
}
301
302
if (dest.charAt(0) == '-' ||
303
dest.charAt(dest.length() - 1) == '-') {
304
305
throw new IllegalArgumentException(
306
"Has leading or trailing hyphen");
307
}
308
}
309
310
if (!isASCII) {
311
// step 4
312
// If all code points are inside 0..0x7f, skip to step 8
313
if (!isAllASCII(dest.toString())) {
314
// step 5
315
// verify the sequence does not begin with ACE prefix
316
if(!startsWithACEPrefix(dest)){
317
318
// step 6
319
// encode the sequence with punycode
320
try {
321
dest = Punycode.encode(dest, null);
322
} catch (java.text.ParseException e) {
323
throw new IllegalArgumentException(e);
324
}
325
326
dest = toASCIILower(dest);
327
328
// step 7
329
// prepend the ACE prefix
330
dest.insert(0, ACE_PREFIX);
331
} else {
332
throw new IllegalArgumentException("The input starts with the ACE Prefix");
333
}
334
335
}
336
}
337
338
// step 8
339
// the length must be inside 1..63
340
if (dest.length() > MAX_LABEL_LENGTH) {
341
throw new IllegalArgumentException("The label in the input is too long");
342
}
343
344
return dest.toString();
345
}
346
347
//
348
// toUnicode operation; should only apply to a single label
349
//
350
private static String toUnicodeInternal(String label, int flag) {
351
boolean[] caseFlags = null;
352
StringBuffer dest;
353
354
// step 1
355
// find out if all the codepoints in input are ASCII
356
boolean isASCII = isAllASCII(label);
357
358
if(!isASCII){
359
// step 2
360
// perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
361
try {
362
UCharacterIterator iter = UCharacterIterator.getInstance(label);
363
dest = namePrep.prepare(iter, flag);
364
} catch (Exception e) {
365
// toUnicode never fails; if any step fails, return the input string
366
return label;
367
}
368
} else {
369
dest = new StringBuffer(label);
370
}
371
372
// step 3
373
// verify ACE Prefix
374
if(startsWithACEPrefix(dest)) {
375
376
// step 4
377
// Remove the ACE Prefix
378
String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());
379
380
try {
381
// step 5
382
// Decode using punycode
383
StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);
384
385
// step 6
386
// Apply toASCII
387
String toASCIIOut = toASCII(decodeOut.toString(), flag);
388
389
// step 7
390
// verify
391
if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
392
// step 8
393
// return output of step 5
394
return decodeOut.toString();
395
}
396
} catch (Exception ignored) {
397
// no-op
398
}
399
}
400
401
// just return the input
402
return label;
403
}
404
405
406
//
407
// LDH stands for "letter/digit/hyphen", with characters restricted to the
408
// 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen
409
// <->.
410
// Non LDH refers to characters in the ASCII range, but which are not
411
// letters, digits or the hyphen.
412
//
413
// non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7F
414
//
415
private static boolean isNonLDHAsciiCodePoint(int ch){
416
return (0x0000 <= ch && ch <= 0x002C) ||
417
(0x002E <= ch && ch <= 0x002F) ||
418
(0x003A <= ch && ch <= 0x0040) ||
419
(0x005B <= ch && ch <= 0x0060) ||
420
(0x007B <= ch && ch <= 0x007F);
421
}
422
423
//
424
// search dots in a string and return the index of that character;
425
// or if there is no dots, return the length of input string
426
// dots might be: \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
427
// and \uFF61 (halfwidth ideographic full stop).
428
//
429
private static int searchDots(String s, int start) {
430
int i;
431
for (i = start; i < s.length(); i++) {
432
if (isLabelSeparator(s.charAt(i))) {
433
break;
434
}
435
}
436
437
return i;
438
}
439
440
//
441
// to check if a string is a root label, ".".
442
//
443
private static boolean isRootLabel(String s) {
444
return (s.length() == 1 && isLabelSeparator(s.charAt(0)));
445
}
446
447
//
448
// to check if a character is a label separator, i.e. a dot character.
449
//
450
private static boolean isLabelSeparator(char c) {
451
return (c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61');
452
}
453
454
//
455
// to check if a string only contains US-ASCII code point
456
//
457
private static boolean isAllASCII(String input) {
458
boolean isASCII = true;
459
for (int i = 0; i < input.length(); i++) {
460
int c = input.charAt(i);
461
if (c > 0x7F) {
462
isASCII = false;
463
break;
464
}
465
}
466
return isASCII;
467
}
468
469
//
470
// to check if a string starts with ACE-prefix
471
//
472
private static boolean startsWithACEPrefix(StringBuffer input){
473
boolean startsWithPrefix = true;
474
475
if(input.length() < ACE_PREFIX_LENGTH){
476
return false;
477
}
478
for(int i = 0; i < ACE_PREFIX_LENGTH; i++){
479
if(toASCIILower(input.charAt(i)) != ACE_PREFIX.charAt(i)){
480
startsWithPrefix = false;
481
}
482
}
483
return startsWithPrefix;
484
}
485
486
private static char toASCIILower(char ch){
487
if('A' <= ch && ch <= 'Z'){
488
return (char)(ch + 'a' - 'A');
489
}
490
return ch;
491
}
492
493
private static StringBuffer toASCIILower(StringBuffer input){
494
StringBuffer dest = new StringBuffer();
495
for(int i = 0; i < input.length();i++){
496
dest.append(toASCIILower(input.charAt(i)));
497
}
498
return dest;
499
}
500
}
501
502