CoCalc -- Charset.java

GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/share/classes/java/nio/charset/Charset.java
⁴¹¹⁵⁹ views
1
/*
2
 * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
package java.nio.charset;
27

28
import jdk.internal.misc.VM;
29
import sun.nio.cs.ThreadLocalCoders;
30
import sun.security.action.GetPropertyAction;
31

32
import java.nio.ByteBuffer;
33
import java.nio.CharBuffer;
34
import java.nio.charset.spi.CharsetProvider;
35
import java.security.AccessController;
36
import java.security.PrivilegedAction;
37
import java.util.Arrays;
38
import java.util.Collections;
39
import java.util.HashSet;
40
import java.util.Iterator;
41
import java.util.Locale;
42
import java.util.Map;
43
import java.util.NoSuchElementException;
44
import java.util.Objects;
45
import java.util.ServiceConfigurationError;
46
import java.util.ServiceLoader;
47
import java.util.Set;
48
import java.util.SortedMap;
49
import java.util.TreeMap;
50

51

52
/**
53
 * A named mapping between sequences of sixteen-bit Unicode <a
54
 * href="../../lang/Character.html#unicode">code units</a> and sequences of
55
 * bytes.  This class defines methods for creating decoders and encoders and
56
 * for retrieving the various names associated with a charset.  Instances of
57
 * this class are immutable.
58
 *
59
 * <p> This class also defines static methods for testing whether a particular
60
 * charset is supported, for locating charset instances by name, and for
61
 * constructing a map that contains every charset for which support is
62
 * available in the current Java virtual machine.  Support for new charsets can
63
 * be added via the service-provider interface defined in the {@link
64
 * java.nio.charset.spi.CharsetProvider} class.
65
 *
66
 * <p> All of the methods defined in this class are safe for use by multiple
67
 * concurrent threads.
68
 *
69
 *
70
 * <h2><a id="names">Charset names</a></h2>
71
 *
72
 * <p> Charsets are named by strings composed of the following characters:
73
 *
74
 * <ul>
75
 *
76
 *   <li> The uppercase letters {@code 'A'} through {@code 'Z'}
77
 *        (<code>'&#92;u0041'</code>&nbsp;through&nbsp;<code>'&#92;u005a'</code>),
78
 *
79
 *   <li> The lowercase letters {@code 'a'} through {@code 'z'}
80
 *        (<code>'&#92;u0061'</code>&nbsp;through&nbsp;<code>'&#92;u007a'</code>),
81
 *
82
 *   <li> The digits {@code '0'} through {@code '9'}
83
 *        (<code>'&#92;u0030'</code>&nbsp;through&nbsp;<code>'&#92;u0039'</code>),
84
 *
85
 *   <li> The dash character {@code '-'}
86
 *        (<code>'&#92;u002d'</code>,&nbsp;<small>HYPHEN-MINUS</small>),
87
 *
88
 *   <li> The plus character {@code '+'}
89
 *        (<code>'&#92;u002b'</code>,&nbsp;<small>PLUS SIGN</small>),
90
 *
91
 *   <li> The period character {@code '.'}
92
 *        (<code>'&#92;u002e'</code>,&nbsp;<small>FULL STOP</small>),
93
 *
94
 *   <li> The colon character {@code ':'}
95
 *        (<code>'&#92;u003a'</code>,&nbsp;<small>COLON</small>), and
96
 *
97
 *   <li> The underscore character {@code '_'}
98
 *        (<code>'&#92;u005f'</code>,&nbsp;<small>LOW&nbsp;LINE</small>).
99
 *
100
 * </ul>
101
 *
102
 * A charset name must begin with either a letter or a digit.  The empty string
103
 * is not a legal charset name.  Charset names are not case-sensitive; that is,
104
 * case is always ignored when comparing charset names.  Charset names
105
 * generally follow the conventions documented in <a
106
 * href="http://www.ietf.org/rfc/rfc2278.txt"><i>RFC&nbsp;2278:&nbsp;IANA Charset
107
 * Registration Procedures</i></a>.
108
 *
109
 * <p> Every charset has a <i>canonical name</i> and may also have one or more
110
 * <i>aliases</i>.  The canonical name is returned by the {@link #name() name} method
111
 * of this class.  Canonical names are, by convention, usually in upper case.
112
 * The aliases of a charset are returned by the {@link #aliases() aliases}
113
 * method.
114
 *
115
 * <p><a id="hn">Some charsets have an <i>historical name</i> that is defined for
116
 * compatibility with previous versions of the Java platform.</a>  A charset's
117
 * historical name is either its canonical name or one of its aliases.  The
118
 * historical name is returned by the {@code getEncoding()} methods of the
119
 * {@link java.io.InputStreamReader#getEncoding InputStreamReader} and {@link
120
 * java.io.OutputStreamWriter#getEncoding OutputStreamWriter} classes.
121
 *
122
 * <p><a id="iana"> </a>If a charset listed in the <a
123
 * href="http://www.iana.org/assignments/character-sets"><i>IANA Charset
124
 * Registry</i></a> is supported by an implementation of the Java platform then
125
 * its canonical name must be the name listed in the registry. Many charsets
126
 * are given more than one name in the registry, in which case the registry
127
 * identifies one of the names as <i>MIME-preferred</i>.  If a charset has more
128
 * than one registry name then its canonical name must be the MIME-preferred
129
 * name and the other names in the registry must be valid aliases.  If a
130
 * supported charset is not listed in the IANA registry then its canonical name
131
 * must begin with one of the strings {@code "X-"} or {@code "x-"}.
132
 *
133
 * <p> The IANA charset registry does change over time, and so the canonical
134
 * name and the aliases of a particular charset may also change over time.  To
135
 * ensure compatibility it is recommended that no alias ever be removed from a
136
 * charset, and that if the canonical name of a charset is changed then its
137
 * previous canonical name be made into an alias.
138
 *
139
 *
140
 * <h2><a id="standard">Standard charsets</a></h2>
141
 *
142
 *
143
 * <p> Every implementation of the Java platform is required to support the
144
 * following standard charsets.  Consult the release documentation for your
145
 * implementation to see if any other charsets are supported.  The behavior
146
 * of such optional charsets may differ between implementations.
147
 *
148
 * <blockquote><table class="striped" style="width:80%">
149
 * <caption style="display:none">Description of standard charsets</caption>
150
 * <thead>
151
 * <tr><th scope="col" style="text-align:left">Charset</th><th scope="col" style="text-align:left">Description</th></tr>
152
 * </thead>
153
 * <tbody>
154
 * <tr><th scope="row" style="vertical-align:top">{@code US-ASCII}</th>
155
 *     <td>Seven-bit ASCII, a.k.a. {@code ISO646-US},
156
 *         a.k.a. the Basic Latin block of the Unicode character set</td></tr>
157
 * <tr><th scope="row" style="vertical-align:top"><code>ISO-8859-1&nbsp;&nbsp;</code></th>
158
 *     <td>ISO Latin Alphabet No. 1, a.k.a. {@code ISO-LATIN-1}</td></tr>
159
 * <tr><th scope="row" style="vertical-align:top">{@code UTF-8}</th>
160
 *     <td>Eight-bit UCS Transformation Format</td></tr>
161
 * <tr><th scope="row" style="vertical-align:top">{@code UTF-16BE}</th>
162
 *     <td>Sixteen-bit UCS Transformation Format,
163
 *         big-endian byte&nbsp;order</td></tr>
164
 * <tr><th scope="row" style="vertical-align:top">{@code UTF-16LE}</th>
165
 *     <td>Sixteen-bit UCS Transformation Format,
166
 *         little-endian byte&nbsp;order</td></tr>
167
 * <tr><th scope="row" style="vertical-align:top">{@code UTF-16}</th>
168
 *     <td>Sixteen-bit UCS Transformation Format,
169
 *         byte&nbsp;order identified by an optional byte-order mark</td></tr>
170
 * </tbody>
171
 * </table></blockquote>
172
 *
173
 * <p> The {@code UTF-8} charset is specified by <a
174
 * href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC&nbsp;2279</i></a>; the
175
 * transformation format upon which it is based is specified in
176
 * Amendment&nbsp;2 of ISO&nbsp;10646-1 and is also described in the <a
177
 * href="http://www.unicode.org/standard/standard.html"><i>Unicode
178
 * Standard</i></a>.
179
 *
180
 * <p> The {@code UTF-16} charsets are specified by <a
181
 * href="http://www.ietf.org/rfc/rfc2781.txt"><i>RFC&nbsp;2781</i></a>; the
182
 * transformation formats upon which they are based are specified in
183
 * Amendment&nbsp;1 of ISO&nbsp;10646-1 and are also described in the <a
184
 * href="http://www.unicode.org/standard/standard.html"><i>Unicode
185
 * Standard</i></a>.
186
 *
187
 * <p> The {@code UTF-16} charsets use sixteen-bit quantities and are
188
 * therefore sensitive to byte order.  In these encodings the byte order of a
189
 * stream may be indicated by an initial <i>byte-order mark</i> represented by
190
 * the Unicode character <code>'&#92;uFEFF'</code>.  Byte-order marks are handled
191
 * as follows:
192
 *
193
 * <ul>
194
 *
195
 *   <li><p> When decoding, the {@code UTF-16BE} and {@code UTF-16LE}
196
 *   charsets interpret the initial byte-order marks as a <small>ZERO-WIDTH
197
 *   NON-BREAKING SPACE</small>; when encoding, they do not write
198
 *   byte-order marks. </p></li>
199
 *
200
 *   <li><p> When decoding, the {@code UTF-16} charset interprets the
201
 *   byte-order mark at the beginning of the input stream to indicate the
202
 *   byte-order of the stream but defaults to big-endian if there is no
203
 *   byte-order mark; when encoding, it uses big-endian byte order and writes
204
 *   a big-endian byte-order mark. </p></li>
205
 *
206
 * </ul>
207
 *
208
 * In any case, byte order marks occurring after the first element of an
209
 * input sequence are not omitted since the same code is used to represent
210
 * <small>ZERO-WIDTH NON-BREAKING SPACE</small>.
211
 *
212
 * <p> Every instance of the Java virtual machine has a default charset, which
213
 * may or may not be one of the standard charsets.  The default charset is
214
 * determined during virtual-machine startup and typically depends upon the
215
 * locale and charset being used by the underlying operating system. </p>
216
 *
217
 * <p> The {@link StandardCharsets} class defines constants for each of the
218
 * standard charsets.
219
 *
220
 * <h2>Terminology</h2>
221
 *
222
 * <p> The name of this class is taken from the terms used in
223
 * <a href="http://www.ietf.org/rfc/rfc2278.txt"><i>RFC&nbsp;2278</i></a>.
224
 * In that document a <i>charset</i> is defined as the combination of
225
 * one or more coded character sets and a character-encoding scheme.
226
 * (This definition is confusing; some other software systems define
227
 * <i>charset</i> as a synonym for <i>coded character set</i>.)
228
 *
229
 * <p> A <i>coded character set</i> is a mapping between a set of abstract
230
 * characters and a set of integers.  US-ASCII, ISO&nbsp;8859-1,
231
 * JIS&nbsp;X&nbsp;0201, and Unicode are examples of coded character sets.
232
 *
233
 * <p> Some standards have defined a <i>character set</i> to be simply a
234
 * set of abstract characters without an associated assigned numbering.
235
 * An alphabet is an example of such a character set.  However, the subtle
236
 * distinction between <i>character set</i> and <i>coded character set</i>
237
 * is rarely used in practice; the former has become a short form for the
238
 * latter, including in the Java API specification.
239
 *
240
 * <p> A <i>character-encoding scheme</i> is a mapping between one or more
241
 * coded character sets and a set of octet (eight-bit byte) sequences.
242
 * UTF-8, UTF-16, ISO&nbsp;2022, and EUC are examples of
243
 * character-encoding schemes.  Encoding schemes are often associated with
244
 * a particular coded character set; UTF-8, for example, is used only to
245
 * encode Unicode.  Some schemes, however, are associated with multiple
246
 * coded character sets; EUC, for example, can be used to encode
247
 * characters in a variety of Asian coded character sets.
248
 *
249
 * <p> When a coded character set is used exclusively with a single
250
 * character-encoding scheme then the corresponding charset is usually
251
 * named for the coded character set; otherwise a charset is usually named
252
 * for the encoding scheme and, possibly, the locale of the coded
253
 * character sets that it supports.  Hence {@code US-ASCII} is both the
254
 * name of a coded character set and of the charset that encodes it, while
255
 * {@code EUC-JP} is the name of the charset that encodes the
256
 * JIS&nbsp;X&nbsp;0201, JIS&nbsp;X&nbsp;0208, and JIS&nbsp;X&nbsp;0212
257
 * coded character sets for the Japanese language.
258
 *
259
 * <p> The native character encoding of the Java programming language is
260
 * UTF-16.  A charset in the Java platform therefore defines a mapping
261
 * between sequences of sixteen-bit UTF-16 code units (that is, sequences
262
 * of chars) and sequences of bytes. </p>
263
 *
264
 *
265
 * @author Mark Reinhold
266
 * @author JSR-51 Expert Group
267
 * @since 1.4
268
 *
269
 * @see CharsetDecoder
270
 * @see CharsetEncoder
271
 * @see java.nio.charset.spi.CharsetProvider
272
 * @see java.lang.Character
273
 */
274

275
public abstract class Charset
276
    implements Comparable<Charset>
277
{
278

279
    /* -- Static methods -- */
280

281
    /**
282
     * Checks that the given string is a legal charset name. </p>
283
     *
284
     * @param  s
285
     *         A purported charset name
286
     *
287
     * @throws  IllegalCharsetNameException
288
     *          If the given name is not a legal charset name
289
     */
290
    private static void checkName(String s) {
291
        int n = s.length();
292
        if (n == 0) {
293
            throw new IllegalCharsetNameException(s);
294
        }
295
        for (int i = 0; i < n; i++) {
296
            char c = s.charAt(i);
297
            if (c >= 'A' && c <= 'Z') continue;
298
            if (c >= 'a' && c <= 'z') continue;
299
            if (c >= '0' && c <= '9') continue;
300
            if (c == '-' && i != 0) continue;
301
            if (c == '+' && i != 0) continue;
302
            if (c == ':' && i != 0) continue;
303
            if (c == '_' && i != 0) continue;
304
            if (c == '.' && i != 0) continue;
305
            throw new IllegalCharsetNameException(s);
306
        }
307
    }
308

309
    /* The standard set of charsets */
310
    private static final CharsetProvider standardProvider
311
        = new sun.nio.cs.StandardCharsets();
312

313
    private static final String[] zeroAliases = new String[0];
314

315
    // Cache of the most-recently-returned charsets,
316
    // along with the names that were used to find them
317
    //
318
    private static volatile Object[] cache1; // "Level 1" cache
319
    private static volatile Object[] cache2; // "Level 2" cache
320

321
    private static void cache(String charsetName, Charset cs) {
322
        cache2 = cache1;
323
        cache1 = new Object[] { charsetName, cs };
324
    }
325

326
    // Creates an iterator that walks over the available providers, ignoring
327
    // those whose lookup or instantiation causes a security exception to be
328
    // thrown.  Should be invoked with full privileges.
329
    //
330
    private static Iterator<CharsetProvider> providers() {
331
        return new Iterator<>() {
332
                ClassLoader cl = ClassLoader.getSystemClassLoader();
333
                ServiceLoader<CharsetProvider> sl =
334
                    ServiceLoader.load(CharsetProvider.class, cl);
335
                Iterator<CharsetProvider> i = sl.iterator();
336
                CharsetProvider next = null;
337

338
                private boolean getNext() {
339
                    while (next == null) {
340
                        try {
341
                            if (!i.hasNext())
342
                                return false;
343
                            next = i.next();
344
                        } catch (ServiceConfigurationError sce) {
345
                            if (sce.getCause() instanceof SecurityException) {
346
                                // Ignore security exceptions
347
                                continue;
348
                            }
349
                            throw sce;
350
                        }
351
                    }
352
                    return true;
353
                }
354

355
                public boolean hasNext() {
356
                    return getNext();
357
                }
358

359
                public CharsetProvider next() {
360
                    if (!getNext())
361
                        throw new NoSuchElementException();
362
                    CharsetProvider n = next;
363
                    next = null;
364
                    return n;
365
                }
366

367
                public void remove() {
368
                    throw new UnsupportedOperationException();
369
                }
370

371
            };
372
    }
373

374
    // Thread-local gate to prevent recursive provider lookups
375
    private static ThreadLocal<ThreadLocal<?>> gate =
376
            new ThreadLocal<ThreadLocal<?>>();
377

378
    @SuppressWarnings("removal")
379
    private static Charset lookupViaProviders(final String charsetName) {
380

381
        // The runtime startup sequence looks up standard charsets as a
382
        // consequence of the VM's invocation of System.initializeSystemClass
383
        // in order to, e.g., set system properties and encode filenames.  At
384
        // that point the application class loader has not been initialized,
385
        // however, so we can't look for providers because doing so will cause
386
        // that loader to be prematurely initialized with incomplete
387
        // information.
388
        //
389
        if (!VM.isBooted())
390
            return null;
391

392
        if (gate.get() != null)
393
            // Avoid recursive provider lookups
394
            return null;
395
        try {
396
            gate.set(gate);
397

398
            return AccessController.doPrivileged(
399
                new PrivilegedAction<>() {
400
                    public Charset run() {
401
                        for (Iterator<CharsetProvider> i = providers();
402
                             i.hasNext();) {
403
                            CharsetProvider cp = i.next();
404
                            Charset cs = cp.charsetForName(charsetName);
405
                            if (cs != null)
406
                                return cs;
407
                        }
408
                        return null;
409
                    }
410
                });
411

412
        } finally {
413
            gate.set(null);
414
        }
415
    }
416

417
    /* The extended set of charsets */
418
    private static class ExtendedProviderHolder {
419
        static final CharsetProvider[] extendedProviders = extendedProviders();
420
        // returns ExtendedProvider, if installed
421
        @SuppressWarnings("removal")
422
        private static CharsetProvider[] extendedProviders() {
423
            return AccessController.doPrivileged(new PrivilegedAction<>() {
424
                    public CharsetProvider[] run() {
425
                        CharsetProvider[] cps = new CharsetProvider[1];
426
                        int n = 0;
427
                        ServiceLoader<CharsetProvider> sl =
428
                            ServiceLoader.loadInstalled(CharsetProvider.class);
429
                        for (CharsetProvider cp : sl) {
430
                            if (n + 1 > cps.length) {
431
                                cps = Arrays.copyOf(cps, cps.length << 1);
432
                            }
433
                            cps[n++] = cp;
434
                        }
435
                        return n == cps.length ? cps : Arrays.copyOf(cps, n);
436
                    }});
437
        }
438
    }
439

440
    private static Charset lookupExtendedCharset(String charsetName) {
441
        if (!VM.isBooted())  // see lookupViaProviders()
442
            return null;
443
        CharsetProvider[] ecps = ExtendedProviderHolder.extendedProviders;
444
        for (CharsetProvider cp : ecps) {
445
            Charset cs = cp.charsetForName(charsetName);
446
            if (cs != null)
447
                return cs;
448
        }
449
        return null;
450
    }
451

452
    private static Charset lookup(String charsetName) {
453
        if (charsetName == null)
454
            throw new IllegalArgumentException("Null charset name");
455
        Object[] a;
456
        if ((a = cache1) != null && charsetName.equals(a[0]))
457
            return (Charset)a[1];
458
        // We expect most programs to use one Charset repeatedly.
459
        // We convey a hint to this effect to the VM by putting the
460
        // level 1 cache miss code in a separate method.
461
        return lookup2(charsetName);
462
    }
463

464
    private static Charset lookup2(String charsetName) {
465
        Object[] a;
466
        if ((a = cache2) != null && charsetName.equals(a[0])) {
467
            cache2 = cache1;
468
            cache1 = a;
469
            return (Charset)a[1];
470
        }
471
        Charset cs;
472
        if ((cs = standardProvider.charsetForName(charsetName)) != null ||
473
            (cs = lookupExtendedCharset(charsetName))           != null ||
474
            (cs = lookupViaProviders(charsetName))              != null)
475
        {
476
            cache(charsetName, cs);
477
            return cs;
478
        }
479

480
        /* Only need to check the name if we didn't find a charset for it */
481
        checkName(charsetName);
482
        return null;
483
    }
484

485
    /**
486
     * Tells whether the named charset is supported.
487
     *
488
     * @param  charsetName
489
     *         The name of the requested charset; may be either
490
     *         a canonical name or an alias
491
     *
492
     * @return  {@code true} if, and only if, support for the named charset
493
     *          is available in the current Java virtual machine
494
     *
495
     * @throws IllegalCharsetNameException
496
     *         If the given charset name is illegal
497
     *
498
     * @throws  IllegalArgumentException
499
     *          If the given {@code charsetName} is null
500
     */
501
    public static boolean isSupported(String charsetName) {
502
        return (lookup(charsetName) != null);
503
    }
504

505
    /**
506
     * Returns a charset object for the named charset.
507
     *
508
     * @param  charsetName
509
     *         The name of the requested charset; may be either
510
     *         a canonical name or an alias
511
     *
512
     * @return  A charset object for the named charset
513
     *
514
     * @throws  IllegalCharsetNameException
515
     *          If the given charset name is illegal
516
     *
517
     * @throws  IllegalArgumentException
518
     *          If the given {@code charsetName} is null
519
     *
520
     * @throws  UnsupportedCharsetException
521
     *          If no support for the named charset is available
522
     *          in this instance of the Java virtual machine
523
     */
524
    public static Charset forName(String charsetName) {
525
        Charset cs = lookup(charsetName);
526
        if (cs != null)
527
            return cs;
528
        throw new UnsupportedCharsetException(charsetName);
529
    }
530

531
    // Fold charsets from the given iterator into the given map, ignoring
532
    // charsets whose names already have entries in the map.
533
    //
534
    private static void put(Iterator<Charset> i, Map<String,Charset> m) {
535
        while (i.hasNext()) {
536
            Charset cs = i.next();
537
            if (!m.containsKey(cs.name()))
538
                m.put(cs.name(), cs);
539
        }
540
    }
541

542
    /**
543
     * Constructs a sorted map from canonical charset names to charset objects.
544
     *
545
     * <p> The map returned by this method will have one entry for each charset
546
     * for which support is available in the current Java virtual machine.  If
547
     * two or more supported charsets have the same canonical name then the
548
     * resulting map will contain just one of them; which one it will contain
549
     * is not specified. </p>
550
     *
551
     * <p> The invocation of this method, and the subsequent use of the
552
     * resulting map, may cause time-consuming disk or network I/O operations
553
     * to occur.  This method is provided for applications that need to
554
     * enumerate all of the available charsets, for example to allow user
555
     * charset selection.  This method is not used by the {@link #forName
556
     * forName} method, which instead employs an efficient incremental lookup
557
     * algorithm.
558
     *
559
     * <p> This method may return different results at different times if new
560
     * charset providers are dynamically made available to the current Java
561
     * virtual machine.  In the absence of such changes, the charsets returned
562
     * by this method are exactly those that can be retrieved via the {@link
563
     * #forName forName} method.  </p>
564
     *
565
     * @return An immutable, case-insensitive map from canonical charset names
566
     *         to charset objects
567
     */
568
    @SuppressWarnings("removal")
569
    public static SortedMap<String,Charset> availableCharsets() {
570
        return AccessController.doPrivileged(
571
            new PrivilegedAction<>() {
572
                public SortedMap<String,Charset> run() {
573
                    TreeMap<String,Charset> m =
574
                        new TreeMap<>(
575
                            String.CASE_INSENSITIVE_ORDER);
576
                    put(standardProvider.charsets(), m);
577
                    CharsetProvider[] ecps = ExtendedProviderHolder.extendedProviders;
578
                    for (CharsetProvider ecp :ecps) {
579
                        put(ecp.charsets(), m);
580
                    }
581
                    for (Iterator<CharsetProvider> i = providers(); i.hasNext();) {
582
                        CharsetProvider cp = i.next();
583
                        put(cp.charsets(), m);
584
                    }
585
                    return Collections.unmodifiableSortedMap(m);
586
                }
587
            });
588
    }
589

590
    private static volatile Charset defaultCharset;
591

592
    /**
593
     * Returns the default charset of this Java virtual machine.
594
     *
595
     * <p> The default charset is determined during virtual-machine startup and
596
     * typically depends upon the locale and charset of the underlying
597
     * operating system.
598
     *
599
     * @return  A charset object for the default charset
600
     *
601
     * @since 1.5
602
     */
603
    public static Charset defaultCharset() {
604
        if (defaultCharset == null) {
605
            synchronized (Charset.class) {
606
                String csn = GetPropertyAction
607
                        .privilegedGetProperty("file.encoding");
608
                Charset cs = lookup(csn);
609
                if (cs != null)
610
                    defaultCharset = cs;
611
                else
612
                    defaultCharset = sun.nio.cs.UTF_8.INSTANCE;
613
            }
614
        }
615
        return defaultCharset;
616
    }
617

618

619
    /* -- Instance fields and methods -- */
620

621
    private final String name;          // tickles a bug in oldjavac
622
    private final String[] aliases;     // tickles a bug in oldjavac
623
    private Set<String> aliasSet = null;
624

625
    /**
626
     * Initializes a new charset with the given canonical name and alias
627
     * set.
628
     *
629
     * @param  canonicalName
630
     *         The canonical name of this charset
631
     *
632
     * @param  aliases
633
     *         An array of this charset's aliases, or null if it has no aliases
634
     *
635
     * @throws IllegalCharsetNameException
636
     *         If the canonical name or any of the aliases are illegal
637
     */
638
    protected Charset(String canonicalName, String[] aliases) {
639
        String[] as = Objects.requireNonNullElse(aliases, zeroAliases);
640

641
        // Skip checks for the standard, built-in Charsets we always load
642
        // during initialization.
643
        if (canonicalName != "ISO-8859-1"
644
                && canonicalName != "US-ASCII"
645
                && canonicalName != "UTF-8") {
646
            checkName(canonicalName);
647
            for (int i = 0; i < as.length; i++) {
648
                checkName(as[i]);
649
            }
650
        }
651
        this.name = canonicalName;
652
        this.aliases = as;
653
    }
654

655
    /**
656
     * Returns this charset's canonical name.
657
     *
658
     * @return  The canonical name of this charset
659
     */
660
    public final String name() {
661
        return name;
662
    }
663

664
    /**
665
     * Returns a set containing this charset's aliases.
666
     *
667
     * @return  An immutable set of this charset's aliases
668
     */
669
    public final Set<String> aliases() {
670
        if (aliasSet != null)
671
            return aliasSet;
672
        int n = aliases.length;
673
        HashSet<String> hs = new HashSet<>(n);
674
        for (int i = 0; i < n; i++)
675
            hs.add(aliases[i]);
676
        aliasSet = Collections.unmodifiableSet(hs);
677
        return aliasSet;
678
    }
679

680
    /**
681
     * Returns this charset's human-readable name for the default locale.
682
     *
683
     * <p> The default implementation of this method simply returns this
684
     * charset's canonical name.  Concrete subclasses of this class may
685
     * override this method in order to provide a localized display name. </p>
686
     *
687
     * @return  The display name of this charset in the default locale
688
     */
689
    public String displayName() {
690
        return name;
691
    }
692

693
    /**
694
     * Tells whether or not this charset is registered in the <a
695
     * href="http://www.iana.org/assignments/character-sets">IANA Charset
696
     * Registry</a>.
697
     *
698
     * @return  {@code true} if, and only if, this charset is known by its
699
     *          implementor to be registered with the IANA
700
     */
701
    public final boolean isRegistered() {
702
        return !name.startsWith("X-") && !name.startsWith("x-");
703
    }
704

705
    /**
706
     * Returns this charset's human-readable name for the given locale.
707
     *
708
     * <p> The default implementation of this method simply returns this
709
     * charset's canonical name.  Concrete subclasses of this class may
710
     * override this method in order to provide a localized display name. </p>
711
     *
712
     * @param  locale
713
     *         The locale for which the display name is to be retrieved
714
     *
715
     * @return  The display name of this charset in the given locale
716
     */
717
    public String displayName(Locale locale) {
718
        return name;
719
    }
720

721
    /**
722
     * Tells whether or not this charset contains the given charset.
723
     *
724
     * <p> A charset <i>C</i> is said to <i>contain</i> a charset <i>D</i> if,
725
     * and only if, every character representable in <i>D</i> is also
726
     * representable in <i>C</i>.  If this relationship holds then it is
727
     * guaranteed that every string that can be encoded in <i>D</i> can also be
728
     * encoded in <i>C</i> without performing any replacements.
729
     *
730
     * <p> That <i>C</i> contains <i>D</i> does not imply that each character
731
     * representable in <i>C</i> by a particular byte sequence is represented
732
     * in <i>D</i> by the same byte sequence, although sometimes this is the
733
     * case.
734
     *
735
     * <p> Every charset contains itself.
736
     *
737
     * <p> This method computes an approximation of the containment relation:
738
     * If it returns {@code true} then the given charset is known to be
739
     * contained by this charset; if it returns {@code false}, however, then
740
     * it is not necessarily the case that the given charset is not contained
741
     * in this charset.
742
     *
743
     * @param   cs
744
     *          The given charset
745
     *
746
     * @return  {@code true} if the given charset is contained in this charset
747
     */
748
    public abstract boolean contains(Charset cs);
749

750
    /**
751
     * Constructs a new decoder for this charset.
752
     *
753
     * @return  A new decoder for this charset
754
     */
755
    public abstract CharsetDecoder newDecoder();
756

757
    /**
758
     * Constructs a new encoder for this charset.
759
     *
760
     * @return  A new encoder for this charset
761
     *
762
     * @throws  UnsupportedOperationException
763
     *          If this charset does not support encoding
764
     */
765
    public abstract CharsetEncoder newEncoder();
766

767
    /**
768
     * Tells whether or not this charset supports encoding.
769
     *
770
     * <p> Nearly all charsets support encoding.  The primary exceptions are
771
     * special-purpose <i>auto-detect</i> charsets whose decoders can determine
772
     * which of several possible encoding schemes is in use by examining the
773
     * input byte sequence.  Such charsets do not support encoding because
774
     * there is no way to determine which encoding should be used on output.
775
     * Implementations of such charsets should override this method to return
776
     * {@code false}. </p>
777
     *
778
     * @return  {@code true} if, and only if, this charset supports encoding
779
     */
780
    public boolean canEncode() {
781
        return true;
782
    }
783

784
    /**
785
     * Convenience method that decodes bytes in this charset into Unicode
786
     * characters.
787
     *
788
     * <p> An invocation of this method upon a charset {@code cs} returns the
789
     * same result as the expression
790
     *
791
     * <pre>
792
     *     cs.newDecoder()
793
     *       .onMalformedInput(CodingErrorAction.REPLACE)
794
     *       .onUnmappableCharacter(CodingErrorAction.REPLACE)
795
     *       .decode(bb); </pre>
796
     *
797
     * except that it is potentially more efficient because it can cache
798
     * decoders between successive invocations.
799
     *
800
     * <p> This method always replaces malformed-input and unmappable-character
801
     * sequences with this charset's default replacement byte array.  In order
802
     * to detect such sequences, use the {@link
803
     * CharsetDecoder#decode(java.nio.ByteBuffer)} method directly.  </p>
804
     *
805
     * @param  bb  The byte buffer to be decoded
806
     *
807
     * @return  A char buffer containing the decoded characters
808
     */
809
    public final CharBuffer decode(ByteBuffer bb) {
810
        try {
811
            return ThreadLocalCoders.decoderFor(this)
812
                .onMalformedInput(CodingErrorAction.REPLACE)
813
                .onUnmappableCharacter(CodingErrorAction.REPLACE)
814
                .decode(bb);
815
        } catch (CharacterCodingException x) {
816
            throw new Error(x);         // Can't happen
817
        }
818
    }
819

820
    /**
821
     * Convenience method that encodes Unicode characters into bytes in this
822
     * charset.
823
     *
824
     * <p> An invocation of this method upon a charset {@code cs} returns the
825
     * same result as the expression
826
     *
827
     * <pre>
828
     *     cs.newEncoder()
829
     *       .onMalformedInput(CodingErrorAction.REPLACE)
830
     *       .onUnmappableCharacter(CodingErrorAction.REPLACE)
831
     *       .encode(bb); </pre>
832
     *
833
     * except that it is potentially more efficient because it can cache
834
     * encoders between successive invocations.
835
     *
836
     * <p> This method always replaces malformed-input and unmappable-character
837
     * sequences with this charset's default replacement string.  In order to
838
     * detect such sequences, use the {@link
839
     * CharsetEncoder#encode(java.nio.CharBuffer)} method directly.  </p>
840
     *
841
     * @param  cb  The char buffer to be encoded
842
     *
843
     * @return  A byte buffer containing the encoded characters
844
     */
845
    public final ByteBuffer encode(CharBuffer cb) {
846
        try {
847
            return ThreadLocalCoders.encoderFor(this)
848
                .onMalformedInput(CodingErrorAction.REPLACE)
849
                .onUnmappableCharacter(CodingErrorAction.REPLACE)
850
                .encode(cb);
851
        } catch (CharacterCodingException x) {
852
            throw new Error(x);         // Can't happen
853
        }
854
    }
855

856
    /**
857
     * Convenience method that encodes a string into bytes in this charset.
858
     *
859
     * <p> An invocation of this method upon a charset {@code cs} returns the
860
     * same result as the expression
861
     *
862
     * <pre>
863
     *     cs.encode(CharBuffer.wrap(s)); </pre>
864
     *
865
     * @param  str  The string to be encoded
866
     *
867
     * @return  A byte buffer containing the encoded characters
868
     */
869
    public final ByteBuffer encode(String str) {
870
        return encode(CharBuffer.wrap(str));
871
    }
872

873
    /**
874
     * Compares this charset to another.
875
     *
876
     * <p> Charsets are ordered by their canonical names, without regard to
877
     * case. </p>
878
     *
879
     * @param  that
880
     *         The charset to which this charset is to be compared
881
     *
882
     * @return A negative integer, zero, or a positive integer as this charset
883
     *         is less than, equal to, or greater than the specified charset
884
     */
885
    public final int compareTo(Charset that) {
886
        return (name().compareToIgnoreCase(that.name()));
887
    }
888

889
    /**
890
     * Computes a hashcode for this charset.
891
     *
892
     * @return  An integer hashcode
893
     */
894
    public final int hashCode() {
895
        return name().hashCode();
896
    }
897

898
    /**
899
     * Tells whether or not this object is equal to another.
900
     *
901
     * <p> Two charsets are equal if, and only if, they have the same canonical
902
     * names.  A charset is never equal to any other type of object.  </p>
903
     *
904
     * @return  {@code true} if, and only if, this charset is equal to the
905
     *          given object
906
     */
907
    public final boolean equals(Object ob) {
908
        if (!(ob instanceof Charset))
909
            return false;
910
        if (this == ob)
911
            return true;
912
        return name.equals(((Charset)ob).name());
913
    }
914

915
    /**
916
     * Returns a string describing this charset.
917
     *
918
     * @return  A string describing this charset
919
     */
920
    public final String toString() {
921
        return name();
922
    }
923

924
}
925

926
Product

Resources

Company