Path: blob/master/src/java.base/share/classes/sun/nio/cs/Surrogate.java
41159 views
/*1* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.nio.cs;2627import java.nio.CharBuffer;28import java.nio.charset.CoderResult;29import java.nio.charset.MalformedInputException;30import java.nio.charset.UnmappableCharacterException;3132/**33* Utility class for dealing with surrogates.34*35* @author Mark Reinhold36* @author Martin Buchholz37* @author Ulf Zibis38*/39public class Surrogate {4041private Surrogate() { }4243// TODO: Deprecate/remove the following redundant definitions44public static final char MIN_HIGH = Character.MIN_HIGH_SURROGATE;45public static final char MAX_HIGH = Character.MAX_HIGH_SURROGATE;46public static final char MIN_LOW = Character.MIN_LOW_SURROGATE;47public static final char MAX_LOW = Character.MAX_LOW_SURROGATE;48public static final char MIN = Character.MIN_SURROGATE;49public static final char MAX = Character.MAX_SURROGATE;50public static final int UCS4_MIN = Character.MIN_SUPPLEMENTARY_CODE_POINT;51public static final int UCS4_MAX = Character.MAX_CODE_POINT;5253/**54* Tells whether or not the given value is in the high surrogate range.55* Use of {@link Character#isHighSurrogate} is generally preferred.56*/57public static boolean isHigh(int c) {58return (MIN_HIGH <= c) && (c <= MAX_HIGH);59}6061/**62* Tells whether or not the given value is in the low surrogate range.63* Use of {@link Character#isLowSurrogate} is generally preferred.64*/65public static boolean isLow(int c) {66return (MIN_LOW <= c) && (c <= MAX_LOW);67}6869/**70* Tells whether or not the given value is in the surrogate range.71* Use of {@link Character#isSurrogate} is generally preferred.72*/73public static boolean is(int c) {74return (MIN <= c) && (c <= MAX);75}7677/**78* Tells whether or not the given UCS-4 character must be represented as a79* surrogate pair in UTF-16.80* Use of {@link Character#isSupplementaryCodePoint} is generally preferred.81*/82public static boolean neededFor(int uc) {83return Character.isSupplementaryCodePoint(uc);84}8586/**87* Returns the high UTF-16 surrogate for the given supplementary UCS-4 character.88* Use of {@link Character#highSurrogate} is generally preferred.89*/90public static char high(int uc) {91assert Character.isSupplementaryCodePoint(uc);92return Character.highSurrogate(uc);93}9495/**96* Returns the low UTF-16 surrogate for the given supplementary UCS-4 character.97* Use of {@link Character#lowSurrogate} is generally preferred.98*/99public static char low(int uc) {100assert Character.isSupplementaryCodePoint(uc);101return Character.lowSurrogate(uc);102}103104/**105* Converts the given surrogate pair into a 32-bit UCS-4 character.106* Use of {@link Character#toCodePoint} is generally preferred.107*/108public static int toUCS4(char c, char d) {109assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d);110return Character.toCodePoint(c, d);111}112113/**114* Surrogate parsing support. Charset implementations may use instances of115* this class to handle the details of parsing UTF-16 surrogate pairs.116*/117public static class Parser {118119public Parser() { }120121private int character; // UCS-4122private CoderResult error = CoderResult.UNDERFLOW;123private boolean isPair;124125/**126* Returns the UCS-4 character previously parsed.127*/128public int character() {129assert (error == null);130return character;131}132133/**134* Tells whether or not the previously-parsed UCS-4 character was135* originally represented by a surrogate pair.136*/137public boolean isPair() {138assert (error == null);139return isPair;140}141142/**143* Returns the number of UTF-16 characters consumed by the previous144* parse.145*/146public int increment() {147assert (error == null);148return isPair ? 2 : 1;149}150151/**152* If the previous parse operation detected an error, return the object153* describing that error.154*/155public CoderResult error() {156assert (error != null);157return error;158}159160/**161* Returns an unmappable-input result object, with the appropriate162* input length, for the previously-parsed character.163*/164public CoderResult unmappableResult() {165assert (error == null);166return CoderResult.unmappableForLength(isPair ? 2 : 1);167}168169/**170* Parses a UCS-4 character from the given source buffer, handling171* surrogates.172*173* @param c The first character174* @param in The source buffer, from which one more character175* will be consumed if c is a high surrogate176*177* @return Either a parsed UCS-4 character, in which case the isPair()178* and increment() methods will return meaningful values, or179* -1, in which case error() will return a descriptive result180* object181*/182public int parse(char c, CharBuffer in) {183if (Character.isHighSurrogate(c)) {184if (!in.hasRemaining()) {185error = CoderResult.UNDERFLOW;186return -1;187}188char d = in.get();189if (Character.isLowSurrogate(d)) {190character = Character.toCodePoint(c, d);191isPair = true;192error = null;193return character;194}195error = CoderResult.malformedForLength(1);196return -1;197}198if (Character.isLowSurrogate(c)) {199error = CoderResult.malformedForLength(1);200return -1;201}202character = c;203isPair = false;204error = null;205return character;206}207208/**209* Parses a UCS-4 character from the given source buffer, handling210* surrogates.211*212* @param c The first character213* @param ia The input array, from which one more character214* will be consumed if c is a high surrogate215* @param ip The input index216* @param il The input limit217*218* @return Either a parsed UCS-4 character, in which case the isPair()219* and increment() methods will return meaningful values, or220* -1, in which case error() will return a descriptive result221* object222*/223public int parse(char c, char[] ia, int ip, int il) {224assert (ia[ip] == c);225if (Character.isHighSurrogate(c)) {226if (il - ip < 2) {227error = CoderResult.UNDERFLOW;228return -1;229}230char d = ia[ip + 1];231if (Character.isLowSurrogate(d)) {232character = Character.toCodePoint(c, d);233isPair = true;234error = null;235return character;236}237error = CoderResult.malformedForLength(1);238return -1;239}240if (Character.isLowSurrogate(c)) {241error = CoderResult.malformedForLength(1);242return -1;243}244character = c;245isPair = false;246error = null;247return character;248}249250}251252/**253* Surrogate generation support. Charset implementations may use instances254* of this class to handle the details of generating UTF-16 surrogate255* pairs.256*/257public static class Generator {258259public Generator() { }260261private CoderResult error = CoderResult.OVERFLOW;262263/**264* If the previous generation operation detected an error, return the265* object describing that error.266*/267public CoderResult error() {268assert error != null;269return error;270}271272/**273* Generates one or two UTF-16 characters to represent the given UCS-4274* character.275*276* @param uc The UCS-4 character277* @param len The number of input bytes from which the UCS-4 value278* was constructed (used when creating result objects)279* @param dst The destination buffer, to which one or two UTF-16280* characters will be written281*282* @return Either a positive count of the number of UTF-16 characters283* written to the destination buffer, or -1, in which case284* error() will return a descriptive result object285*/286public int generate(int uc, int len, CharBuffer dst) {287if (Character.isBmpCodePoint(uc)) {288char c = (char) uc;289if (Character.isSurrogate(c)) {290error = CoderResult.malformedForLength(len);291return -1;292}293if (dst.remaining() < 1) {294error = CoderResult.OVERFLOW;295return -1;296}297dst.put(c);298error = null;299return 1;300} else if (Character.isValidCodePoint(uc)) {301if (dst.remaining() < 2) {302error = CoderResult.OVERFLOW;303return -1;304}305dst.put(Character.highSurrogate(uc));306dst.put(Character.lowSurrogate(uc));307error = null;308return 2;309} else {310error = CoderResult.unmappableForLength(len);311return -1;312}313}314315/**316* Generates one or two UTF-16 characters to represent the given UCS-4317* character.318*319* @param uc The UCS-4 character320* @param len The number of input bytes from which the UCS-4 value321* was constructed (used when creating result objects)322* @param da The destination array, to which one or two UTF-16323* characters will be written324* @param dp The destination position325* @param dl The destination limit326*327* @return Either a positive count of the number of UTF-16 characters328* written to the destination buffer, or -1, in which case329* error() will return a descriptive result object330*/331public int generate(int uc, int len, char[] da, int dp, int dl) {332if (Character.isBmpCodePoint(uc)) {333char c = (char) uc;334if (Character.isSurrogate(c)) {335error = CoderResult.malformedForLength(len);336return -1;337}338if (dl - dp < 1) {339error = CoderResult.OVERFLOW;340return -1;341}342da[dp] = c;343error = null;344return 1;345} else if (Character.isValidCodePoint(uc)) {346if (dl - dp < 2) {347error = CoderResult.OVERFLOW;348return -1;349}350da[dp] = Character.highSurrogate(uc);351da[dp + 1] = Character.lowSurrogate(uc);352error = null;353return 2;354} else {355error = CoderResult.unmappableForLength(len);356return -1;357}358}359}360361}362363364