Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/sun/nio/cs/CESU_8.java
41159 views
1
/*
2
* Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package sun.nio.cs;
27
28
import jdk.internal.access.JavaLangAccess;
29
import jdk.internal.access.SharedSecrets;
30
31
import java.nio.Buffer;
32
import java.nio.ByteBuffer;
33
import java.nio.CharBuffer;
34
import java.nio.charset.Charset;
35
import java.nio.charset.CharsetDecoder;
36
import java.nio.charset.CharsetEncoder;
37
import java.nio.charset.CoderResult;
38
import java.nio.charset.CodingErrorAction;
39
40
/* Legal CESU-8 Byte Sequences
41
*
42
* # Code Points Bits Bit/Byte pattern
43
* 1 7 0xxxxxxx
44
* U+0000..U+007F 00..7F
45
*
46
* 2 11 110xxxxx 10xxxxxx
47
* U+0080..U+07FF C2..DF 80..BF
48
*
49
* 3 16 1110xxxx 10xxxxxx 10xxxxxx
50
* U+0800..U+0FFF E0 A0..BF 80..BF
51
* U+1000..U+FFFF E1..EF 80..BF 80..BF
52
*
53
*/
54
55
class CESU_8 extends Unicode
56
{
57
public CESU_8() {
58
super("CESU-8", StandardCharsets.aliases_CESU_8());
59
}
60
61
public String historicalName() {
62
return "CESU8";
63
}
64
65
public CharsetDecoder newDecoder() {
66
return new Decoder(this);
67
}
68
69
public CharsetEncoder newEncoder() {
70
return new Encoder(this);
71
}
72
73
private static final void updatePositions(Buffer src, int sp,
74
Buffer dst, int dp) {
75
src.position(sp - src.arrayOffset());
76
dst.position(dp - dst.arrayOffset());
77
}
78
79
private static class Decoder extends CharsetDecoder
80
implements ArrayDecoder {
81
82
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
83
84
private Decoder(Charset cs) {
85
super(cs, 1.0f, 1.0f);
86
}
87
88
private static boolean isNotContinuation(int b) {
89
return (b & 0xc0) != 0x80;
90
}
91
92
// [E0] [A0..BF] [80..BF]
93
// [E1..EF] [80..BF] [80..BF]
94
private static boolean isMalformed3(int b1, int b2, int b3) {
95
return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
96
(b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
97
}
98
99
// only used when there is only one byte left in src buffer
100
private static boolean isMalformed3_2(int b1, int b2) {
101
return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
102
(b2 & 0xc0) != 0x80;
103
}
104
105
private static CoderResult malformedN(ByteBuffer src, int nb) {
106
switch (nb) {
107
case 1:
108
case 2: // always 1
109
return CoderResult.malformedForLength(1);
110
case 3:
111
int b1 = src.get();
112
int b2 = src.get(); // no need to lookup b3
113
return CoderResult.malformedForLength(
114
((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
115
isNotContinuation(b2)) ? 1 : 2);
116
case 4: // we don't care the speed here
117
b1 = src.get() & 0xff;
118
b2 = src.get() & 0xff;
119
if (b1 > 0xf4 ||
120
(b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
121
(b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
122
isNotContinuation(b2))
123
return CoderResult.malformedForLength(1);
124
if (isNotContinuation(src.get()))
125
return CoderResult.malformedForLength(2);
126
return CoderResult.malformedForLength(3);
127
default:
128
assert false;
129
return null;
130
}
131
}
132
133
private static CoderResult malformed(ByteBuffer src, int sp,
134
CharBuffer dst, int dp,
135
int nb)
136
{
137
src.position(sp - src.arrayOffset());
138
CoderResult cr = malformedN(src, nb);
139
updatePositions(src, sp, dst, dp);
140
return cr;
141
}
142
143
144
private static CoderResult malformed(ByteBuffer src,
145
int mark, int nb)
146
{
147
src.position(mark);
148
CoderResult cr = malformedN(src, nb);
149
src.position(mark);
150
return cr;
151
}
152
153
private static CoderResult malformedForLength(ByteBuffer src,
154
int sp,
155
CharBuffer dst,
156
int dp,
157
int malformedNB)
158
{
159
updatePositions(src, sp, dst, dp);
160
return CoderResult.malformedForLength(malformedNB);
161
}
162
163
private static CoderResult malformedForLength(ByteBuffer src,
164
int mark,
165
int malformedNB)
166
{
167
src.position(mark);
168
return CoderResult.malformedForLength(malformedNB);
169
}
170
171
172
private static CoderResult xflow(Buffer src, int sp, int sl,
173
Buffer dst, int dp, int nb) {
174
updatePositions(src, sp, dst, dp);
175
return (nb == 0 || sl - sp < nb)
176
? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
177
}
178
179
private static CoderResult xflow(Buffer src, int mark, int nb) {
180
src.position(mark);
181
return (nb == 0 || src.remaining() < nb)
182
? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
183
}
184
185
private CoderResult decodeArrayLoop(ByteBuffer src,
186
CharBuffer dst)
187
{
188
// This method is optimized for ASCII input.
189
byte[] sa = src.array();
190
int soff = src.arrayOffset();
191
int sp = soff + src.position();
192
int sl = soff + src.limit();
193
194
char[] da = dst.array();
195
int doff = dst.arrayOffset();
196
int dp = doff + dst.position();
197
int dl = doff + dst.limit();
198
199
int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp));
200
sp += n;
201
dp += n;
202
203
while (sp < sl) {
204
int b1 = sa[sp];
205
if (b1 >= 0) {
206
// 1 byte, 7 bits: 0xxxxxxx
207
if (dp >= dl)
208
return xflow(src, sp, sl, dst, dp, 1);
209
da[dp++] = (char) b1;
210
sp++;
211
} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
212
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
213
if (sl - sp < 2 || dp >= dl)
214
return xflow(src, sp, sl, dst, dp, 2);
215
int b2 = sa[sp + 1];
216
if (isNotContinuation(b2))
217
return malformedForLength(src, sp, dst, dp, 1);
218
da[dp++] = (char) (((b1 << 6) ^ b2)
219
^
220
(((byte) 0xC0 << 6) ^
221
((byte) 0x80 << 0)));
222
sp += 2;
223
} else if ((b1 >> 4) == -2) {
224
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
225
int srcRemaining = sl - sp;
226
if (srcRemaining < 3 || dp >= dl) {
227
if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1]))
228
return malformedForLength(src, sp, dst, dp, 1);
229
return xflow(src, sp, sl, dst, dp, 3);
230
}
231
int b2 = sa[sp + 1];
232
int b3 = sa[sp + 2];
233
if (isMalformed3(b1, b2, b3))
234
return malformed(src, sp, dst, dp, 3);
235
da[dp++] = (char)
236
((b1 << 12) ^
237
(b2 << 6) ^
238
(b3 ^
239
(((byte) 0xE0 << 12) ^
240
((byte) 0x80 << 6) ^
241
((byte) 0x80 << 0))));
242
sp += 3;
243
} else {
244
return malformed(src, sp, dst, dp, 1);
245
}
246
}
247
return xflow(src, sp, sl, dst, dp, 0);
248
}
249
250
private CoderResult decodeBufferLoop(ByteBuffer src,
251
CharBuffer dst)
252
{
253
int mark = src.position();
254
int limit = src.limit();
255
while (mark < limit) {
256
int b1 = src.get();
257
if (b1 >= 0) {
258
// 1 byte, 7 bits: 0xxxxxxx
259
if (dst.remaining() < 1)
260
return xflow(src, mark, 1); // overflow
261
dst.put((char) b1);
262
mark++;
263
} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
264
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
265
if (limit - mark < 2|| dst.remaining() < 1)
266
return xflow(src, mark, 2);
267
int b2 = src.get();
268
if (isNotContinuation(b2))
269
return malformedForLength(src, mark, 1);
270
dst.put((char) (((b1 << 6) ^ b2)
271
^
272
(((byte) 0xC0 << 6) ^
273
((byte) 0x80 << 0))));
274
mark += 2;
275
} else if ((b1 >> 4) == -2) {
276
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
277
int srcRemaining = limit - mark;
278
if (srcRemaining < 3 || dst.remaining() < 1) {
279
if (srcRemaining > 1 && isMalformed3_2(b1, src.get()))
280
return malformedForLength(src, mark, 1);
281
return xflow(src, mark, 3);
282
}
283
int b2 = src.get();
284
int b3 = src.get();
285
if (isMalformed3(b1, b2, b3))
286
return malformed(src, mark, 3);
287
dst.put((char)
288
((b1 << 12) ^
289
(b2 << 6) ^
290
(b3 ^
291
(((byte) 0xE0 << 12) ^
292
((byte) 0x80 << 6) ^
293
((byte) 0x80 << 0)))));
294
mark += 3;
295
} else {
296
return malformed(src, mark, 1);
297
}
298
}
299
return xflow(src, mark, 0);
300
}
301
302
protected CoderResult decodeLoop(ByteBuffer src,
303
CharBuffer dst)
304
{
305
if (src.hasArray() && dst.hasArray())
306
return decodeArrayLoop(src, dst);
307
else
308
return decodeBufferLoop(src, dst);
309
}
310
311
private static ByteBuffer getByteBuffer(ByteBuffer bb, byte[] ba, int sp)
312
{
313
if (bb == null)
314
bb = ByteBuffer.wrap(ba);
315
bb.position(sp);
316
return bb;
317
}
318
319
// returns -1 if there is/are malformed byte(s) and the
320
// "action" for malformed input is not REPLACE.
321
public int decode(byte[] sa, int sp, int len, char[] da) {
322
final int sl = sp + len;
323
int dp = 0;
324
int dlASCII = Math.min(len, da.length);
325
ByteBuffer bb = null; // only necessary if malformed
326
327
// ASCII only optimized loop
328
while (dp < dlASCII && sa[sp] >= 0)
329
da[dp++] = (char) sa[sp++];
330
331
while (sp < sl) {
332
int b1 = sa[sp++];
333
if (b1 >= 0) {
334
// 1 byte, 7 bits: 0xxxxxxx
335
da[dp++] = (char) b1;
336
} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
337
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
338
if (sp < sl) {
339
int b2 = sa[sp++];
340
if (isNotContinuation(b2)) {
341
if (malformedInputAction() != CodingErrorAction.REPLACE)
342
return -1;
343
da[dp++] = replacement().charAt(0);
344
sp--; // malformedN(bb, 2) always returns 1
345
} else {
346
da[dp++] = (char) (((b1 << 6) ^ b2)^
347
(((byte) 0xC0 << 6) ^
348
((byte) 0x80 << 0)));
349
}
350
continue;
351
}
352
if (malformedInputAction() != CodingErrorAction.REPLACE)
353
return -1;
354
da[dp++] = replacement().charAt(0);
355
return dp;
356
} else if ((b1 >> 4) == -2) {
357
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
358
if (sp + 1 < sl) {
359
int b2 = sa[sp++];
360
int b3 = sa[sp++];
361
if (isMalformed3(b1, b2, b3)) {
362
if (malformedInputAction() != CodingErrorAction.REPLACE)
363
return -1;
364
da[dp++] = replacement().charAt(0);
365
sp -=3;
366
bb = getByteBuffer(bb, sa, sp);
367
sp += malformedN(bb, 3).length();
368
} else {
369
da[dp++] = (char)((b1 << 12) ^
370
(b2 << 6) ^
371
(b3 ^
372
(((byte) 0xE0 << 12) ^
373
((byte) 0x80 << 6) ^
374
((byte) 0x80 << 0))));
375
}
376
continue;
377
}
378
if (malformedInputAction() != CodingErrorAction.REPLACE)
379
return -1;
380
if (sp < sl && isMalformed3_2(b1, sa[sp])) {
381
da[dp++] = replacement().charAt(0);
382
continue;
383
384
}
385
da[dp++] = replacement().charAt(0);
386
return dp;
387
} else {
388
if (malformedInputAction() != CodingErrorAction.REPLACE)
389
return -1;
390
da[dp++] = replacement().charAt(0);
391
}
392
}
393
return dp;
394
}
395
}
396
397
private static class Encoder extends CharsetEncoder
398
implements ArrayEncoder {
399
400
private Encoder(Charset cs) {
401
super(cs, 1.1f, 3.0f);
402
}
403
404
public boolean canEncode(char c) {
405
return !Character.isSurrogate(c);
406
}
407
408
public boolean isLegalReplacement(byte[] repl) {
409
return ((repl.length == 1 && repl[0] >= 0) ||
410
super.isLegalReplacement(repl));
411
}
412
413
private static CoderResult overflow(CharBuffer src, int sp,
414
ByteBuffer dst, int dp) {
415
updatePositions(src, sp, dst, dp);
416
return CoderResult.OVERFLOW;
417
}
418
419
private static CoderResult overflow(CharBuffer src, int mark) {
420
src.position(mark);
421
return CoderResult.OVERFLOW;
422
}
423
424
private static void to3Bytes(byte[] da, int dp, char c) {
425
da[dp] = (byte)(0xe0 | ((c >> 12)));
426
da[dp + 1] = (byte)(0x80 | ((c >> 6) & 0x3f));
427
da[dp + 2] = (byte)(0x80 | (c & 0x3f));
428
}
429
430
private static void to3Bytes(ByteBuffer dst, char c) {
431
dst.put((byte)(0xe0 | ((c >> 12))));
432
dst.put((byte)(0x80 | ((c >> 6) & 0x3f)));
433
dst.put((byte)(0x80 | (c & 0x3f)));
434
}
435
436
private Surrogate.Parser sgp;
437
private char[] c2;
438
private CoderResult encodeArrayLoop(CharBuffer src,
439
ByteBuffer dst)
440
{
441
char[] sa = src.array();
442
int sp = src.arrayOffset() + src.position();
443
int sl = src.arrayOffset() + src.limit();
444
445
byte[] da = dst.array();
446
int dp = dst.arrayOffset() + dst.position();
447
int dl = dst.arrayOffset() + dst.limit();
448
int dlASCII = dp + Math.min(sl - sp, dl - dp);
449
450
// ASCII only loop
451
while (dp < dlASCII && sa[sp] < '\u0080')
452
da[dp++] = (byte) sa[sp++];
453
while (sp < sl) {
454
char c = sa[sp];
455
if (c < 0x80) {
456
// Have at most seven bits
457
if (dp >= dl)
458
return overflow(src, sp, dst, dp);
459
da[dp++] = (byte)c;
460
} else if (c < 0x800) {
461
// 2 bytes, 11 bits
462
if (dl - dp < 2)
463
return overflow(src, sp, dst, dp);
464
da[dp++] = (byte)(0xc0 | (c >> 6));
465
da[dp++] = (byte)(0x80 | (c & 0x3f));
466
} else if (Character.isSurrogate(c)) {
467
// Have a surrogate pair
468
if (sgp == null)
469
sgp = new Surrogate.Parser();
470
int uc = sgp.parse(c, sa, sp, sl);
471
if (uc < 0) {
472
updatePositions(src, sp, dst, dp);
473
return sgp.error();
474
}
475
if (dl - dp < 6)
476
return overflow(src, sp, dst, dp);
477
to3Bytes(da, dp, Character.highSurrogate(uc));
478
dp += 3;
479
to3Bytes(da, dp, Character.lowSurrogate(uc));
480
dp += 3;
481
sp++; // 2 chars
482
} else {
483
// 3 bytes, 16 bits
484
if (dl - dp < 3)
485
return overflow(src, sp, dst, dp);
486
to3Bytes(da, dp, c);
487
dp += 3;
488
}
489
sp++;
490
}
491
updatePositions(src, sp, dst, dp);
492
return CoderResult.UNDERFLOW;
493
}
494
495
private CoderResult encodeBufferLoop(CharBuffer src,
496
ByteBuffer dst)
497
{
498
int mark = src.position();
499
while (src.hasRemaining()) {
500
char c = src.get();
501
if (c < 0x80) {
502
// Have at most seven bits
503
if (!dst.hasRemaining())
504
return overflow(src, mark);
505
dst.put((byte)c);
506
} else if (c < 0x800) {
507
// 2 bytes, 11 bits
508
if (dst.remaining() < 2)
509
return overflow(src, mark);
510
dst.put((byte)(0xc0 | (c >> 6)));
511
dst.put((byte)(0x80 | (c & 0x3f)));
512
} else if (Character.isSurrogate(c)) {
513
// Have a surrogate pair
514
if (sgp == null)
515
sgp = new Surrogate.Parser();
516
int uc = sgp.parse(c, src);
517
if (uc < 0) {
518
src.position(mark);
519
return sgp.error();
520
}
521
if (dst.remaining() < 6)
522
return overflow(src, mark);
523
to3Bytes(dst, Character.highSurrogate(uc));
524
to3Bytes(dst, Character.lowSurrogate(uc));
525
mark++; // 2 chars
526
} else {
527
// 3 bytes, 16 bits
528
if (dst.remaining() < 3)
529
return overflow(src, mark);
530
to3Bytes(dst, c);
531
}
532
mark++;
533
}
534
src.position(mark);
535
return CoderResult.UNDERFLOW;
536
}
537
538
protected final CoderResult encodeLoop(CharBuffer src,
539
ByteBuffer dst)
540
{
541
if (src.hasArray() && dst.hasArray())
542
return encodeArrayLoop(src, dst);
543
else
544
return encodeBufferLoop(src, dst);
545
}
546
547
// returns -1 if there is malformed char(s) and the
548
// "action" for malformed input is not REPLACE.
549
public int encode(char[] sa, int sp, int len, byte[] da) {
550
int sl = sp + len;
551
int dp = 0;
552
int dlASCII = dp + Math.min(len, da.length);
553
554
// ASCII only optimized loop
555
while (dp < dlASCII && sa[sp] < '\u0080')
556
da[dp++] = (byte) sa[sp++];
557
558
while (sp < sl) {
559
char c = sa[sp++];
560
if (c < 0x80) {
561
// Have at most seven bits
562
da[dp++] = (byte)c;
563
} else if (c < 0x800) {
564
// 2 bytes, 11 bits
565
da[dp++] = (byte)(0xc0 | (c >> 6));
566
da[dp++] = (byte)(0x80 | (c & 0x3f));
567
} else if (Character.isSurrogate(c)) {
568
if (sgp == null)
569
sgp = new Surrogate.Parser();
570
int uc = sgp.parse(c, sa, sp - 1, sl);
571
if (uc < 0) {
572
if (malformedInputAction() != CodingErrorAction.REPLACE)
573
return -1;
574
da[dp++] = replacement()[0];
575
} else {
576
to3Bytes(da, dp, Character.highSurrogate(uc));
577
dp += 3;
578
to3Bytes(da, dp, Character.lowSurrogate(uc));
579
dp += 3;
580
sp++; // 2 chars
581
}
582
} else {
583
// 3 bytes, 16 bits
584
to3Bytes(da, dp, c);
585
dp += 3;
586
}
587
}
588
return dp;
589
}
590
}
591
}
592
593