Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/jdk.charsets/share/classes/sun/nio/cs/ext/JISAutoDetect.java
41161 views
1
/*
2
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package sun.nio.cs.ext;
27
28
import java.nio.ByteBuffer;
29
import java.nio.CharBuffer;
30
import java.nio.charset.Charset;
31
import java.nio.charset.CharsetDecoder;
32
import java.nio.charset.CharsetEncoder;
33
import java.nio.charset.CoderResult;
34
import java.nio.charset.CharacterCodingException;
35
import java.nio.charset.MalformedInputException;
36
import sun.nio.cs.DelegatableDecoder;
37
import sun.nio.cs.HistoricallyNamedCharset;
38
import java.security.AccessController;
39
import java.security.PrivilegedAction;
40
import sun.nio.cs.*;
41
import static java.lang.Character.UnicodeBlock;
42
43
44
public class JISAutoDetect
45
extends Charset
46
implements HistoricallyNamedCharset
47
{
48
49
private static final int EUCJP_MASK = 0x01;
50
private static final int SJIS2B_MASK = 0x02;
51
private static final int SJIS1B_MASK = 0x04;
52
private static final int EUCJP_KANA1_MASK = 0x08;
53
private static final int EUCJP_KANA2_MASK = 0x10;
54
55
public JISAutoDetect() {
56
super("x-JISAutoDetect", ExtendedCharsets.aliasesFor("x-JISAutoDetect"));
57
}
58
59
public boolean contains(Charset cs) {
60
return ((cs.name().equals("US-ASCII"))
61
|| (cs instanceof SJIS)
62
|| (cs instanceof EUC_JP)
63
|| (cs instanceof ISO2022_JP));
64
}
65
66
public boolean canEncode() {
67
return false;
68
}
69
70
public CharsetDecoder newDecoder() {
71
return new Decoder(this);
72
}
73
74
public String historicalName() {
75
return "JISAutoDetect";
76
}
77
78
public CharsetEncoder newEncoder() {
79
throw new UnsupportedOperationException();
80
}
81
82
// A heuristic algorithm for guessing if EUC-decoded text really
83
// might be Japanese text. Better heuristics are possible...
84
private static boolean looksLikeJapanese(CharBuffer cb) {
85
int hiragana = 0; // Fullwidth Hiragana
86
int katakana = 0; // Halfwidth Katakana
87
while (cb.hasRemaining()) {
88
char c = cb.get();
89
if (0x3040 <= c && c <= 0x309f && ++hiragana > 1) return true;
90
if (0xff65 <= c && c <= 0xff9f && ++katakana > 1) return true;
91
}
92
return false;
93
}
94
95
private static class Decoder extends CharsetDecoder {
96
@SuppressWarnings("removal")
97
private static final String osName = AccessController.doPrivileged(
98
(PrivilegedAction<String>) () -> System.getProperty("os.name"));
99
100
private static final String SJISName = getSJISName();
101
private static final String EUCJPName = "EUC_JP";
102
private DelegatableDecoder detectedDecoder = null;
103
104
public Decoder(Charset cs) {
105
super(cs, 0.5f, 1.0f);
106
}
107
108
private static boolean isPlainASCII(byte b) {
109
return b >= 0 && b != 0x1b;
110
}
111
112
private static void copyLeadingASCII(ByteBuffer src, CharBuffer dst) {
113
int start = src.position();
114
int limit = start + Math.min(src.remaining(), dst.remaining());
115
int p;
116
byte b;
117
for (p = start; p < limit && isPlainASCII(b = src.get(p)); p++)
118
dst.put((char)(b & 0xff));
119
src.position(p);
120
}
121
122
private CoderResult decodeLoop(DelegatableDecoder decoder,
123
ByteBuffer src, CharBuffer dst) {
124
((CharsetDecoder)decoder).reset();
125
detectedDecoder = decoder;
126
return detectedDecoder.decodeLoop(src, dst);
127
}
128
129
protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
130
if (detectedDecoder == null) {
131
copyLeadingASCII(src, dst);
132
133
// All ASCII?
134
if (! src.hasRemaining())
135
return CoderResult.UNDERFLOW;
136
// Overflow only if there is still ascii but no out buffer.
137
if (!dst.hasRemaining() &&
138
isPlainASCII(src.get(src.position())))
139
return CoderResult.OVERFLOW;
140
141
// We need to perform double, not float, arithmetic; otherwise
142
// we lose low order bits when src is larger than 2**24.
143
int cbufsiz = (int)(src.limit() * (double)maxCharsPerByte());
144
CharBuffer sandbox = CharBuffer.allocate(cbufsiz);
145
146
// First try ISO-2022-JP, since there is no ambiguity
147
Charset cs2022 = Charset.forName("ISO-2022-JP");
148
DelegatableDecoder dd2022
149
= (DelegatableDecoder) cs2022.newDecoder();
150
ByteBuffer src2022 = src.asReadOnlyBuffer();
151
CoderResult res2022 = dd2022.decodeLoop(src2022, sandbox);
152
if (! res2022.isError())
153
return decodeLoop(dd2022, src, dst);
154
155
// We must choose between EUC and SJIS
156
Charset csEUCJ = Charset.forName(EUCJPName);
157
Charset csSJIS = Charset.forName(SJISName);
158
159
DelegatableDecoder ddEUCJ
160
= (DelegatableDecoder) csEUCJ.newDecoder();
161
DelegatableDecoder ddSJIS
162
= (DelegatableDecoder) csSJIS.newDecoder();
163
164
ByteBuffer srcEUCJ = src.asReadOnlyBuffer();
165
sandbox.clear();
166
CoderResult resEUCJ = ddEUCJ.decodeLoop(srcEUCJ, sandbox);
167
// If EUC decoding fails, must be SJIS
168
if (resEUCJ.isError())
169
return decodeLoop(ddSJIS, src, dst);
170
ByteBuffer srcSJIS = src.asReadOnlyBuffer();
171
CharBuffer sandboxSJIS = CharBuffer.allocate(cbufsiz);
172
CoderResult resSJIS = ddSJIS.decodeLoop(srcSJIS, sandboxSJIS);
173
// If SJIS decoding fails, must be EUC
174
if (resSJIS.isError())
175
return decodeLoop(ddEUCJ, src, dst);
176
177
// From here on, we have some ambiguity, and must guess.
178
179
// We prefer input that does not appear to end mid-character.
180
if (srcEUCJ.position() > srcSJIS.position())
181
return decodeLoop(ddEUCJ, src, dst);
182
183
if (srcEUCJ.position() < srcSJIS.position())
184
return decodeLoop(ddSJIS, src, dst);
185
186
// end-of-input is after the first byte of the first char?
187
if (src.position() == srcEUCJ.position())
188
return CoderResult.UNDERFLOW;
189
190
// Use heuristic knowledge of typical Japanese text
191
sandbox.flip();
192
return decodeLoop(looksLikeJapanese(sandbox) ? ddEUCJ : ddSJIS,
193
src, dst);
194
}
195
196
return detectedDecoder.decodeLoop(src, dst);
197
}
198
199
protected void implReset() {
200
detectedDecoder = null;
201
}
202
203
protected CoderResult implFlush(CharBuffer out) {
204
if (detectedDecoder != null)
205
return detectedDecoder.implFlush(out);
206
else
207
return super.implFlush(out);
208
}
209
210
public boolean isAutoDetecting() {
211
return true;
212
}
213
214
public boolean isCharsetDetected() {
215
return detectedDecoder != null;
216
}
217
218
public Charset detectedCharset() {
219
if (detectedDecoder == null)
220
throw new IllegalStateException("charset not yet detected");
221
return ((CharsetDecoder) detectedDecoder).charset();
222
}
223
224
225
/**
226
* Returned Shift_JIS Charset name is OS dependent
227
*/
228
private static String getSJISName() {
229
if (osName.startsWith("Windows"))
230
return("windows-31J");
231
else
232
return("Shift_JIS");
233
}
234
235
}
236
}
237
238