Path: blob/master/test/langtools/tools/javac/4846262/Native2Ascii.java
41149 views
/*1* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*/2223import java.io.BufferedReader;24import java.io.BufferedWriter;25import java.io.FilterReader;26import java.io.FilterWriter;27import java.io.IOException;28import java.io.Reader;29import java.io.Writer;30import java.nio.charset.Charset;31import java.nio.charset.CharsetEncoder;32import java.nio.file.Files;33import java.nio.file.Path;34import static java.nio.charset.StandardCharsets.*;3536/**37* Simple utility to convert from native encoding file to ascii or reverse38* including \udddd Unicode notation.39*/40public class Native2Ascii {41final Charset cs;42final CharsetEncoder encoder;43public Native2Ascii(Charset cs) {44this.cs = cs;45this.encoder = cs.newEncoder();46}4748/**49* ASCII to Native conversion50*/51public void asciiToNative(Path infile, Path outfile) throws IOException {52try (BufferedReader in = Files.newBufferedReader(infile, US_ASCII);53BufferedReader reader = new BufferedReader(new A2NFilter(in));54BufferedWriter writer = Files.newBufferedWriter(outfile, cs)) {55String line;56while ((line = reader.readLine()) != null) {57writer.write(line.toCharArray());58writer.newLine();59}60}61}6263/**64* Native to ASCII conversion65*/66public void nativeToAscii(Path infile, Path outfile) throws IOException {67try (BufferedReader reader = Files.newBufferedReader(infile, cs);68BufferedWriter out = Files.newBufferedWriter(outfile, US_ASCII);69BufferedWriter writer = new BufferedWriter(new N2AFilter(out))) {70String line;71while ((line = reader.readLine()) != null) {72writer.write(line.toCharArray());73writer.newLine();74}75}76}7778// A copy of native2ascii N2AFilter79class N2AFilter extends FilterWriter {80public N2AFilter(Writer out) { super(out); }81public void write(char b) throws IOException {82char[] buf = new char[1];83buf[0] = b;84write(buf, 0, 1);85}8687public void write(char[] buf, int off, int len) throws IOException {88for (int i = 0; i < len; i++) {89if ((buf[i] > '\u007f')) {90// write \udddd91out.write('\\');92out.write('u');93String hex = Integer.toHexString(buf[i]);94StringBuilder hex4 = new StringBuilder(hex);95hex4.reverse();96int length = 4 - hex4.length();97for (int j = 0; j < length; j++) {98hex4.append('0');99}100for (int j = 0; j < 4; j++) {101out.write(hex4.charAt(3 - j));102}103} else104out.write(buf[i]);105}106}107}108109// A copy of native2ascii A2NFilter110class A2NFilter extends FilterReader {111// maintain a trailing buffer to hold any incompleted112// unicode escaped sequences113private char[] trailChars = null;114115public A2NFilter(Reader in) {116super(in);117}118119public int read(char[] buf, int off, int len) throws IOException {120int numChars = 0; // how many characters have been read121int retChars = 0; // how many characters we'll return122123char[] cBuf = new char[len];124int cOffset = 0; // offset at which we'll start reading125boolean eof = false;126127// copy trailing chars from previous invocation to input buffer128if (trailChars != null) {129for (int i = 0; i < trailChars.length; i++)130cBuf[i] = trailChars[i];131numChars = trailChars.length;132trailChars = null;133}134135int n = in.read(cBuf, numChars, len - numChars);136if (n < 0) {137eof = true;138if (numChars == 0)139return -1; // EOF;140} else {141numChars += n;142}143144for (int i = 0; i < numChars; ) {145char c = cBuf[i++];146147if (c != '\\' || (eof && numChars <= 5)) {148// Not a backslash, so copy and continue149// Always pass non backslash chars straight thru150// for regular encoding. If backslash occurs in151// input stream at the final 5 chars then don't152// attempt to read-ahead and de-escape since these153// are literal occurrences of U+005C which need to154// be encoded verbatim in the target encoding.155buf[retChars++] = c;156continue;157}158159int remaining = numChars - i;160if (remaining < 5) {161// Might be the first character of a unicode escape, but we162// don't have enough characters to tell, so save it and finish163trailChars = new char[1 + remaining];164trailChars[0] = c;165for (int j = 0; j < remaining; j++)166trailChars[1 + j] = cBuf[i + j];167break;168}169// At this point we have at least five characters remaining170171c = cBuf[i++];172if (c != 'u') {173// Not a unicode escape, so copy and continue174buf[retChars++] = '\\';175buf[retChars++] = c;176continue;177}178179// The next four characters are the hex part of a unicode escape180char rc = 0;181boolean isUE = true;182try {183rc = (char) Integer.parseInt(new String(cBuf, i, 4), 16);184} catch (NumberFormatException x) {185isUE = false;186}187if (isUE && encoder.canEncode(rc)) {188// We'll be able to convert this189buf[retChars++] = rc;190i += 4; // Align beyond the current uXXXX sequence191} else {192// We won't, so just retain the original sequence193buf[retChars++] = '\\';194buf[retChars++] = 'u';195continue;196}197198}199200return retChars;201}202203public int read() throws IOException {204char[] buf = new char[1];205206if (read(buf, 0, 1) == -1)207return -1;208else209return (int) buf[0];210}211}212}213214215