Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/jdk/internal/icu/impl/Punycode.java
41161 views
1
/*
2
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
/*
26
*******************************************************************************
27
* Copyright (C) 2003-2004, International Business Machines Corporation and *
28
* others. All Rights Reserved. *
29
*******************************************************************************
30
*/
31
//
32
// CHANGELOG
33
// 2005-05-19 Edward Wang
34
// - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/Punycode.java
35
// - move from package com.ibm.icu.text to package sun.net.idn
36
// - use ParseException instead of StringPrepParseException
37
// 2007-08-14 Martin Buchholz
38
// - remove redundant casts
39
//
40
package jdk.internal.icu.impl;
41
42
import java.text.ParseException;
43
import jdk.internal.icu.lang.UCharacter;
44
import jdk.internal.icu.text.UTF16;
45
46
/**
47
* Ported code from ICU punycode.c
48
* @author ram
49
*/
50
51
/* Package Private class */
52
public final class Punycode {
53
54
/* Punycode parameters for Bootstring */
55
private static final int BASE = 36;
56
private static final int TMIN = 1;
57
private static final int TMAX = 26;
58
private static final int SKEW = 38;
59
private static final int DAMP = 700;
60
private static final int INITIAL_BIAS = 72;
61
private static final int INITIAL_N = 0x80;
62
63
/* "Basic" Unicode/ASCII code points */
64
private static final int HYPHEN = 0x2d;
65
private static final int DELIMITER = HYPHEN;
66
67
private static final int ZERO = 0x30;
68
private static final int NINE = 0x39;
69
70
private static final int SMALL_A = 0x61;
71
private static final int SMALL_Z = 0x7a;
72
73
private static final int CAPITAL_A = 0x41;
74
private static final int CAPITAL_Z = 0x5a;
75
76
// TODO: eliminate the 256 limitation
77
private static final int MAX_CP_COUNT = 256;
78
79
private static final int UINT_MAGIC = 0x80000000;
80
private static final long ULONG_MAGIC = 0x8000000000000000L;
81
82
private static int adaptBias(int delta, int length, boolean firstTime){
83
if(firstTime){
84
delta /=DAMP;
85
}else{
86
delta /= 2;
87
}
88
delta += delta/length;
89
90
int count=0;
91
for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
92
delta/=(BASE-TMIN);
93
}
94
95
return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
96
}
97
98
/**
99
* basicToDigit[] contains the numeric value of a basic code
100
* point (for use in representing integers) in the range 0 to
101
* BASE-1, or -1 if b is does not represent a value.
102
*/
103
static final int[] basicToDigit= new int[]{
104
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
105
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
106
107
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
108
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
109
110
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
111
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
112
113
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
114
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
115
116
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
117
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
118
119
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121
122
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124
125
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
126
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
127
};
128
129
private static char asciiCaseMap(char b, boolean uppercase) {
130
if(uppercase) {
131
if(SMALL_A<=b && b<=SMALL_Z) {
132
b-=(SMALL_A-CAPITAL_A);
133
}
134
} else {
135
if(CAPITAL_A<=b && b<=CAPITAL_Z) {
136
b+=(SMALL_A-CAPITAL_A);
137
}
138
}
139
return b;
140
}
141
142
/**
143
* digitToBasic() returns the basic code point whose value
144
* (when used for representing integers) is d, which must be in the
145
* range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
146
* nonzero, in which case the uppercase form is used.
147
*/
148
private static char digitToBasic(int digit, boolean uppercase) {
149
/* 0..25 map to ASCII a..z or A..Z */
150
/* 26..35 map to ASCII 0..9 */
151
if(digit<26) {
152
if(uppercase) {
153
return (char)(CAPITAL_A+digit);
154
} else {
155
return (char)(SMALL_A+digit);
156
}
157
} else {
158
return (char)((ZERO-26)+digit);
159
}
160
}
161
/**
162
* Converts Unicode to Punycode.
163
* The input string must not contain single, unpaired surrogates.
164
* The output will be represented as an array of ASCII code points.
165
*
166
* @param src
167
* @param caseFlags
168
* @return
169
* @throws ParseException
170
*/
171
public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{
172
173
int[] cpBuffer = new int[MAX_CP_COUNT];
174
int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
175
char c, c2;
176
int srcLength = src.length();
177
int destCapacity = MAX_CP_COUNT;
178
char[] dest = new char[destCapacity];
179
StringBuffer result = new StringBuffer();
180
/*
181
* Handle the basic code points and
182
* convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
183
*/
184
srcCPCount=destLength=0;
185
186
for(j=0; j<srcLength; ++j) {
187
if(srcCPCount==MAX_CP_COUNT) {
188
/* too many input code points */
189
throw new ParseException("Too many input code points", -1);
190
}
191
c=src.charAt(j);
192
if(isBasic(c)) {
193
if(destLength<destCapacity) {
194
cpBuffer[srcCPCount++]=0;
195
dest[destLength]=
196
caseFlags!=null ?
197
asciiCaseMap(c, caseFlags[j]) :
198
c;
199
}
200
++destLength;
201
} else {
202
n=((caseFlags!=null && caseFlags[j])? 1 : 0)<<31L;
203
if(!UTF16.isSurrogate(c)) {
204
n|=c;
205
} else if(UTF16.isLeadSurrogate(c) && (j+1)<srcLength && UTF16.isTrailSurrogate(c2=src.charAt(j+1))) {
206
++j;
207
208
n|=UCharacter.getCodePoint(c, c2);
209
} else {
210
/* error: unmatched surrogate */
211
throw new ParseException("Illegal char found", -1);
212
}
213
cpBuffer[srcCPCount++]=n;
214
}
215
}
216
217
/* Finish the basic string - if it is not empty - with a delimiter. */
218
basicLength=destLength;
219
if(basicLength>0) {
220
if(destLength<destCapacity) {
221
dest[destLength]=DELIMITER;
222
}
223
++destLength;
224
}
225
226
/*
227
* handledCPCount is the number of code points that have been handled
228
* basicLength is the number of basic code points
229
* destLength is the number of chars that have been output
230
*/
231
232
/* Initialize the state: */
233
n=INITIAL_N;
234
delta=0;
235
bias=INITIAL_BIAS;
236
237
/* Main encoding loop: */
238
for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
239
/*
240
* All non-basic code points < n have been handled already.
241
* Find the next larger one:
242
*/
243
for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
244
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
245
if(n<=q && q<m) {
246
m=q;
247
}
248
}
249
250
/*
251
* Increase delta enough to advance the decoder's
252
* <n,i> state to <m,0>, but guard against overflow:
253
*/
254
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
255
throw new RuntimeException("Internal program error");
256
}
257
delta+=(m-n)*(handledCPCount+1);
258
n=m;
259
260
/* Encode a sequence of same code points n */
261
for(j=0; j<srcCPCount; ++j) {
262
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
263
if(q<n) {
264
++delta;
265
} else if(q==n) {
266
/* Represent delta as a generalized variable-length integer: */
267
for(q=delta, k=BASE; /* no condition */; k+=BASE) {
268
269
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
270
271
t=k-bias;
272
if(t<TMIN) {
273
t=TMIN;
274
} else if(t>TMAX) {
275
t=TMAX;
276
}
277
*/
278
279
t=k-bias;
280
if(t<TMIN) {
281
t=TMIN;
282
} else if(k>=(bias+TMAX)) {
283
t=TMAX;
284
}
285
286
if(q<t) {
287
break;
288
}
289
290
if(destLength<destCapacity) {
291
dest[destLength++]=digitToBasic(t+(q-t)%(BASE-t), false);
292
}
293
q=(q-t)/(BASE-t);
294
}
295
296
if(destLength<destCapacity) {
297
dest[destLength++]=digitToBasic(q, (cpBuffer[j]<0));
298
}
299
bias=adaptBias(delta, handledCPCount+1,(handledCPCount==basicLength));
300
delta=0;
301
++handledCPCount;
302
}
303
}
304
305
++delta;
306
++n;
307
}
308
309
return result.append(dest, 0, destLength);
310
}
311
312
private static boolean isBasic(int ch){
313
return (ch < INITIAL_N);
314
}
315
316
private static boolean isBasicUpperCase(int ch){
317
return( CAPITAL_A <= ch && ch <= CAPITAL_Z);
318
}
319
320
private static boolean isSurrogate(int ch){
321
return (((ch)&0xfffff800)==0xd800);
322
}
323
/**
324
* Converts Punycode to Unicode.
325
* The Unicode string will be at most as long as the Punycode string.
326
*
327
* @param src
328
* @param caseFlags
329
* @return
330
* @throws ParseException
331
*/
332
public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
333
throws ParseException{
334
int srcLength = src.length();
335
StringBuffer result = new StringBuffer();
336
int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
337
destCPCount, firstSupplementaryIndex, cpLength;
338
char b;
339
int destCapacity = MAX_CP_COUNT;
340
char[] dest = new char[destCapacity];
341
342
/*
343
* Handle the basic code points:
344
* Let basicLength be the number of input code points
345
* before the last delimiter, or 0 if there is none,
346
* then copy the first basicLength code points to the output.
347
*
348
* The two following loops iterate backward.
349
*/
350
for(j=srcLength; j>0;) {
351
if(src.charAt(--j)==DELIMITER) {
352
break;
353
}
354
}
355
destLength=basicLength=destCPCount=j;
356
357
while(j>0) {
358
b=src.charAt(--j);
359
if(!isBasic(b)) {
360
throw new ParseException("Illegal char found", -1);
361
}
362
363
if(j<destCapacity) {
364
dest[j]= b;
365
366
if(caseFlags!=null) {
367
caseFlags[j]=isBasicUpperCase(b);
368
}
369
}
370
}
371
372
/* Initialize the state: */
373
n=INITIAL_N;
374
i=0;
375
bias=INITIAL_BIAS;
376
firstSupplementaryIndex=1000000000;
377
378
/*
379
* Main decoding loop:
380
* Start just after the last delimiter if any
381
* basic code points were copied; start at the beginning otherwise.
382
*/
383
for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
384
/*
385
* in is the index of the next character to be consumed, and
386
* destCPCount is the number of code points in the output array.
387
*
388
* Decode a generalized variable-length integer into delta,
389
* which gets added to i. The overflow checking is easier
390
* if we increase i as we go, then subtract off its starting
391
* value at the end to obtain delta.
392
*/
393
for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
394
if(in>=srcLength) {
395
throw new ParseException("Illegal char found", -1);
396
}
397
398
digit=basicToDigit[(byte)src.charAt(in++)];
399
if(digit<0) {
400
throw new ParseException("Invalid char found", -1);
401
}
402
if(digit>(0x7fffffff-i)/w) {
403
/* integer overflow */
404
throw new ParseException("Illegal char found", -1);
405
}
406
407
i+=digit*w;
408
t=k-bias;
409
if(t<TMIN) {
410
t=TMIN;
411
} else if(k>=(bias+TMAX)) {
412
t=TMAX;
413
}
414
if(digit<t) {
415
break;
416
}
417
418
if(w>0x7fffffff/(BASE-t)) {
419
/* integer overflow */
420
throw new ParseException("Illegal char found", -1);
421
}
422
w*=BASE-t;
423
}
424
425
/*
426
* Modification from sample code:
427
* Increments destCPCount here,
428
* where needed instead of in for() loop tail.
429
*/
430
++destCPCount;
431
bias=adaptBias(i-oldi, destCPCount, (oldi==0));
432
433
/*
434
* i was supposed to wrap around from (incremented) destCPCount to 0,
435
* incrementing n each time, so we'll fix that now:
436
*/
437
if(i/destCPCount>(0x7fffffff-n)) {
438
/* integer overflow */
439
throw new ParseException("Illegal char found", -1);
440
}
441
442
n+=i/destCPCount;
443
i%=destCPCount;
444
/* not needed for Punycode: */
445
/* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
446
447
if(n>0x10ffff || isSurrogate(n)) {
448
/* Unicode code point overflow */
449
throw new ParseException("Illegal char found", -1);
450
}
451
452
/* Insert n at position i of the output: */
453
cpLength=UTF16.getCharCount(n);
454
if((destLength+cpLength)<destCapacity) {
455
int codeUnitIndex;
456
457
/*
458
* Handle indexes when supplementary code points are present.
459
*
460
* In almost all cases, there will be only BMP code points before i
461
* and even in the entire string.
462
* This is handled with the same efficiency as with UTF-32.
463
*
464
* Only the rare cases with supplementary code points are handled
465
* more slowly - but not too bad since this is an insertion anyway.
466
*/
467
if(i<=firstSupplementaryIndex) {
468
codeUnitIndex=i;
469
if(cpLength>1) {
470
firstSupplementaryIndex=codeUnitIndex;
471
} else {
472
++firstSupplementaryIndex;
473
}
474
} else {
475
codeUnitIndex=firstSupplementaryIndex;
476
codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex);
477
}
478
479
/* use the UChar index codeUnitIndex instead of the code point index i */
480
if(codeUnitIndex<destLength) {
481
System.arraycopy(dest, codeUnitIndex,
482
dest, codeUnitIndex+cpLength,
483
(destLength-codeUnitIndex));
484
if(caseFlags!=null) {
485
System.arraycopy(caseFlags, codeUnitIndex,
486
caseFlags, codeUnitIndex+cpLength,
487
destLength-codeUnitIndex);
488
}
489
}
490
if(cpLength==1) {
491
/* BMP, insert one code unit */
492
dest[codeUnitIndex]=(char)n;
493
} else {
494
/* supplementary character, insert two code units */
495
dest[codeUnitIndex]=UTF16.getLeadSurrogate(n);
496
dest[codeUnitIndex+1]=UTF16.getTrailSurrogate(n);
497
}
498
if(caseFlags!=null) {
499
/* Case of last character determines uppercase flag: */
500
caseFlags[codeUnitIndex]=isBasicUpperCase(src.charAt(in-1));
501
if(cpLength==2) {
502
caseFlags[codeUnitIndex+1]=false;
503
}
504
}
505
}
506
destLength+=cpLength;
507
++i;
508
}
509
result.append(dest, 0, destLength);
510
return result;
511
}
512
}
513
514