下面列出了java.nio.charset.Charset#newDecoder ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public CharsetDecoder getDecoder(String encodingName, boolean ignoreEncodingErrors) {
Charset cs = (this.charset == null)
? Charset.forName(encodingName)
: this.charset;
CharsetDecoder decoder = cs.newDecoder();
CodingErrorAction action;
if (ignoreEncodingErrors)
action = CodingErrorAction.REPLACE;
else
action = CodingErrorAction.REPORT;
return decoder
.onMalformedInput(action)
.onUnmappableCharacter(action);
}
/**
* Utility method to find a CharsetDecoder in the
* cache or create a new one if necessary. Throws an
* INTERNAL if the code set is unknown.
*/
protected CharsetDecoder getConverter(String javaCodeSetName) {
CharsetDecoder result = null;
try {
result = cache.getByteToCharConverter(javaCodeSetName);
if (result == null) {
Charset tmpCharset = Charset.forName(javaCodeSetName);
result = tmpCharset.newDecoder();
cache.setConverter(javaCodeSetName, result);
}
} catch(IllegalCharsetNameException icne) {
// This can only happen if one of our charset entries has
// an illegal name.
throw wrapper.invalidBtcConverterName( icne, javaCodeSetName ) ;
}
return result;
}
/**
* Decodes a byte sequence into a string, given a {@link Charset}.
* <p>
* This method is preferred to {@link Charset#decode(ByteBuffer)} and
* {@link String#String(byte[], Charset)} (<em>etc.</em>)
* since those methods will replace or ignore bad input, and here we throw
* an exception.
*
* @param bytes the data to decode.
*
* @return the decoded string, not null.
*
* @throws IonException if there's a {@link CharacterCodingException}.
*/
public static String decode(byte[] bytes, Charset charset)
{
CharsetDecoder decoder = charset.newDecoder();
try
{
CharBuffer buffer = decoder.decode(ByteBuffer.wrap(bytes));
return buffer.toString();
}
catch (CharacterCodingException e)
{
String message =
"Input is not valid " + charset.displayName() + " data";
throw new IonException(message, e);
}
}
/**
* Utility method to find a CharsetDecoder in the
* cache or create a new one if necessary. Throws an
* INTERNAL if the code set is unknown.
*/
protected CharsetDecoder getConverter(String javaCodeSetName) {
CharsetDecoder result = null;
try {
result = cache.getByteToCharConverter(javaCodeSetName);
if (result == null) {
Charset tmpCharset = Charset.forName(javaCodeSetName);
result = tmpCharset.newDecoder();
cache.setConverter(javaCodeSetName, result);
}
} catch(IllegalCharsetNameException icne) {
// This can only happen if one of our charset entries has
// an illegal name.
throw wrapper.invalidBtcConverterName( icne, javaCodeSetName ) ;
}
return result;
}
public CharsetDecoder getDecoder(String encodingName, boolean ignoreEncodingErrors) {
Charset cs = (this.charset == null)
? Charset.forName(encodingName)
: this.charset;
CharsetDecoder decoder = cs.newDecoder();
CodingErrorAction action;
if (ignoreEncodingErrors)
action = CodingErrorAction.REPLACE;
else
action = CodingErrorAction.REPORT;
return decoder
.onMalformedInput(action)
.onUnmappableCharacter(action);
}
public static String getString(ByteBuffer buffer) {
Charset charset = null;
CharsetDecoder decoder = null;
CharBuffer charBuffer = null;
try {
charset = Charset.forName("UTF-8");
decoder = charset.newDecoder();
charBuffer = decoder.decode(buffer.asReadOnlyBuffer());
return charBuffer.toString();
} catch (Exception ex) {
ex.printStackTrace();
return "error";
}
}
public SuspectGrepOutputStream(OutputStream out, String fileName, int repeatLimit, String type, Charset charset) {
super(out);
boolean skipLogMsgs = ExpectedStrings.skipLogMsgs(type);
List testExpectStrs = ExpectedStrings.create(type);
suspectGrepper = new LogConsumer(skipLogMsgs, testExpectStrs, fileName, repeatLimit);
decoder = charset.newDecoder();
}
static String decodeText(String input, Charset charset, CodingErrorAction codingErrorAction) throws IOException {
CharsetDecoder charsetDecoder = charset.newDecoder();
charsetDecoder.onMalformedInput(codingErrorAction);
return new BufferedReader(
new InputStreamReader(
new ByteArrayInputStream(input.getBytes()), charsetDecoder)).readLine();
}
static void test(String expectedCharset, byte[] input) throws Exception {
Charset cs = Charset.forName("x-JISAutoDetect");
CharsetDecoder autoDetect = cs.newDecoder();
Charset cs2 = Charset.forName(expectedCharset);
CharsetDecoder decoder = cs2.newDecoder();
ByteBuffer bb = ByteBuffer.allocate(128);
CharBuffer charOutput = CharBuffer.allocate(128);
CharBuffer charExpected = CharBuffer.allocate(128);
bb.put(input);
bb.flip();
bb.mark();
CoderResult result = autoDetect.decode(bb, charOutput, true);
checkCoderResult(result);
charOutput.flip();
String actual = charOutput.toString();
bb.reset();
result = decoder.decode(bb, charExpected, true);
checkCoderResult(result);
charExpected.flip();
String expected = charExpected.toString();
check(actual.equals(expected),
String.format("actual=%s expected=%s", actual, expected));
}
/**
* Handles an incoming HTTP connection, reading the contents, and parsing it
* as XML.
*
* @param client
* The client Socket.
* @throws IOException
* If an I/O error occurred.
*/
private void handleConnection(final Socket client) throws IOException {
PrintWriter out = new PrintWriter(client.getOutputStream(), true);
InputStream is = client.getInputStream();
BufferedReader in = new BufferedReader(new InputStreamReader(is));
// read content length
String prefix = "content-length: ";
String inputLine = in.readLine().toLowerCase();
while (!inputLine.startsWith(prefix)) {
// continue reading ...
inputLine = in.readLine().toLowerCase();
}
// parse content length
String length = inputLine.substring(prefix.length());
int len = Integer.parseInt(length);
inputLine = in.readLine();
while (!inputLine.equals("")) {
// continue reading ...
inputLine = in.readLine();
}
// read, decode, and parse xml content (UTF-8 encoded!)
byte[] xml = new byte[len];
is.read(xml);
ByteBuffer buf = ByteBuffer.wrap(xml);
Charset charset = Charset.forName("UTF-8");
CharsetDecoder decoder = charset.newDecoder();
CharBuffer charBuffer = decoder.decode(buf);
parseResponse(charBuffer.toString().trim());
// write response
out.write("HTTP/1.0 200 OK\n\n");
out.flush();
// notify everyone waiting on us
synchronized (this) {
this.notifyAll();
}
out.close();
in.close();
}
static char[] decode(Charset cs, byte[] ba, int off, int len) {
// (1)We never cache the "external" cs, the only benefit of creating
// an additional StringDe/Encoder object to wrap it is to share the
// de/encode() method. These SD/E objects are short-lifed, the young-gen
// gc should be able to take care of them well. But the best approash
// is still not to generate them if not really necessary.
// (2)The defensive copy of the input byte/char[] has a big performance
// impact, as well as the outgoing result byte/char[]. Need to do the
// optimization check of (sm==null && classLoader0==null) for both.
// (3)getClass().getClassLoader0() is expensive
// (4)There might be a timing gap in isTrusted setting. getClassLoader0()
// is only chcked (and then isTrusted gets set) when (SM==null). It is
// possible that the SM==null for now but then SM is NOT null later
// when safeTrim() is invoked...the "safe" way to do is to redundant
// check (... && (isTrusted || SM == null || getClassLoader0())) in trim
// but it then can be argued that the SM is null when the opertaion
// is started...
CharsetDecoder cd = cs.newDecoder();
int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
if (len == 0)
return ca;
boolean isTrusted = false;
if (System.getSecurityManager() != null) {
if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
ba = Arrays.copyOfRange(ba, off, off + len);
off = 0;
}
}
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.reset();
if (cd instanceof ArrayDecoder) {
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
return safeTrim(ca, clen, cs, isTrusted);
} else {
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = cd.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return safeTrim(ca, cb.position(), cs, isTrusted);
}
}
protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
if (detectedDecoder == null) {
copyLeadingASCII(src, dst);
// All ASCII?
if (! src.hasRemaining())
return CoderResult.UNDERFLOW;
if (! dst.hasRemaining())
return CoderResult.OVERFLOW;
// We need to perform double, not float, arithmetic; otherwise
// we lose low order bits when src is larger than 2**24.
int cbufsiz = (int)(src.limit() * (double)maxCharsPerByte());
CharBuffer sandbox = CharBuffer.allocate(cbufsiz);
// First try ISO-2022-JP, since there is no ambiguity
Charset cs2022 = Charset.forName("ISO-2022-JP");
DelegatableDecoder dd2022
= (DelegatableDecoder) cs2022.newDecoder();
ByteBuffer src2022 = src.asReadOnlyBuffer();
CoderResult res2022 = dd2022.decodeLoop(src2022, sandbox);
if (! res2022.isError())
return decodeLoop(cs2022, src, dst);
// We must choose between EUC and SJIS
Charset csEUCJ = Charset.forName(EUCJPName);
Charset csSJIS = Charset.forName(SJISName);
DelegatableDecoder ddEUCJ
= (DelegatableDecoder) csEUCJ.newDecoder();
ByteBuffer srcEUCJ = src.asReadOnlyBuffer();
sandbox.clear();
CoderResult resEUCJ = ddEUCJ.decodeLoop(srcEUCJ, sandbox);
// If EUC decoding fails, must be SJIS
if (resEUCJ.isError())
return decodeLoop(csSJIS, src, dst);
DelegatableDecoder ddSJIS
= (DelegatableDecoder) csSJIS.newDecoder();
ByteBuffer srcSJIS = src.asReadOnlyBuffer();
CharBuffer sandboxSJIS = CharBuffer.allocate(cbufsiz);
CoderResult resSJIS = ddSJIS.decodeLoop(srcSJIS, sandboxSJIS);
// If SJIS decoding fails, must be EUC
if (resSJIS.isError())
return decodeLoop(csEUCJ, src, dst);
// From here on, we have some ambiguity, and must guess.
// We prefer input that does not appear to end mid-character.
if (srcEUCJ.position() > srcSJIS.position())
return decodeLoop(csEUCJ, src, dst);
if (srcEUCJ.position() < srcSJIS.position())
return decodeLoop(csSJIS, src, dst);
// end-of-input is after the first byte of the first char?
if (src.position() == srcEUCJ.position())
return CoderResult.UNDERFLOW;
// Use heuristic knowledge of typical Japanese text
sandbox.flip();
Charset guess = looksLikeJapanese(sandbox) ? csEUCJ : csSJIS;
return decodeLoop(guess, src, dst);
}
return detectedDecoder.decodeLoop(src, dst);
}
static char[] decode(Charset cs, byte[] ba, int off, int len) {
// (1)We never cache the "external" cs, the only benefit of creating
// an additional StringDe/Encoder object to wrap it is to share the
// de/encode() method. These SD/E objects are short-lifed, the young-gen
// gc should be able to take care of them well. But the best approash
// is still not to generate them if not really necessary.
// (2)The defensive copy of the input byte/char[] has a big performance
// impact, as well as the outgoing result byte/char[]. Need to do the
// optimization check of (sm==null && classLoader0==null) for both.
// (3)getClass().getClassLoader0() is expensive
// (4)There might be a timing gap in isTrusted setting. getClassLoader0()
// is only chcked (and then isTrusted gets set) when (SM==null). It is
// possible that the SM==null for now but then SM is NOT null later
// when safeTrim() is invoked...the "safe" way to do is to redundant
// check (... && (isTrusted || SM == null || getClassLoader0())) in trim
// but it then can be argued that the SM is null when the opertaion
// is started...
CharsetDecoder cd = cs.newDecoder();
int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
if (len == 0)
return ca;
boolean isTrusted = false;
if (System.getSecurityManager() != null) {
if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
ba = Arrays.copyOfRange(ba, off, off + len);
off = 0;
}
}
cd.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.reset();
if (cd instanceof ArrayDecoder) {
int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
return safeTrim(ca, clen, cs, isTrusted);
} else {
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = cd.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return safeTrim(ca, cb.position(), cs, isTrusted);
}
}
public String readLine(Charset charset, byte delimiter) throws IOException {
CharsetDecoder decoder = charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
int delim = delimiter&0xff;
int rc;
int offset = 0;
StringBuilder sb = null;
CoderResult res;
while ((rc = read())!=-1) {
if (rc == delim) {
break;
}
barray[offset++] = (byte)rc;
if (barray.length == offset) {
bbuff.position(0);
bbuff.limit(barray.length);
cbuff.position(0);
cbuff.limit(carray.length);
res = decoder.decode(bbuff, cbuff, false);
if (res.isError()) {
throw new IOException("Decoding error: " + res.toString());
}
offset = bbuff.remaining();
switch (offset) {
default:
System.arraycopy(barray, bbuff.position(), barray, 0, bbuff
.remaining());
break;
case 2:
barray[1] = barray[barray.length - 1];
barray[0] = barray[barray.length - 2];
break;
case 1:
barray[0] = barray[barray.length - 1];
break;
case 0:
}
if (sb == null) {
sb = new StringBuilder(cbuff.position());
}
sb.append(carray, 0, cbuff.position());
}
}
if (sb == null) {
if (rc == -1 && offset == 0) {
// We are at EOF with nothing read
return null;
}
sb = new StringBuilder();
}
bbuff.position(0);
bbuff.limit(offset);
cbuff.position(0);
cbuff.limit(carray.length);
res = decoder.decode(bbuff, cbuff, true);
if (res.isError()) {
System.out.println("Error");
}
sb.append(carray, 0, cbuff.position());
cbuff.position(0);
res = decoder.flush(cbuff);
if (res.isError()) {
System.out.println("Error");
}
sb.append(carray, 0, cbuff.position());
return sb.toString();
}
/**
* @param charset expected charset of the data
*/
public ProgressiveStringDecoder(Charset charset) {
mDecoder = charset.newDecoder();
}
protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
if (detectedDecoder == null) {
copyLeadingASCII(src, dst);
// All ASCII?
if (! src.hasRemaining())
return CoderResult.UNDERFLOW;
if (! dst.hasRemaining())
return CoderResult.OVERFLOW;
// We need to perform double, not float, arithmetic; otherwise
// we lose low order bits when src is larger than 2**24.
int cbufsiz = (int)(src.limit() * (double)maxCharsPerByte());
CharBuffer sandbox = CharBuffer.allocate(cbufsiz);
// First try ISO-2022-JP, since there is no ambiguity
Charset cs2022 = Charset.forName("ISO-2022-JP");
DelegatableDecoder dd2022
= (DelegatableDecoder) cs2022.newDecoder();
ByteBuffer src2022 = src.asReadOnlyBuffer();
CoderResult res2022 = dd2022.decodeLoop(src2022, sandbox);
if (! res2022.isError())
return decodeLoop(cs2022, src, dst);
// We must choose between EUC and SJIS
Charset csEUCJ = Charset.forName(EUCJPName);
Charset csSJIS = Charset.forName(SJISName);
DelegatableDecoder ddEUCJ
= (DelegatableDecoder) csEUCJ.newDecoder();
ByteBuffer srcEUCJ = src.asReadOnlyBuffer();
sandbox.clear();
CoderResult resEUCJ = ddEUCJ.decodeLoop(srcEUCJ, sandbox);
// If EUC decoding fails, must be SJIS
if (resEUCJ.isError())
return decodeLoop(csSJIS, src, dst);
DelegatableDecoder ddSJIS
= (DelegatableDecoder) csSJIS.newDecoder();
ByteBuffer srcSJIS = src.asReadOnlyBuffer();
CharBuffer sandboxSJIS = CharBuffer.allocate(cbufsiz);
CoderResult resSJIS = ddSJIS.decodeLoop(srcSJIS, sandboxSJIS);
// If SJIS decoding fails, must be EUC
if (resSJIS.isError())
return decodeLoop(csEUCJ, src, dst);
// From here on, we have some ambiguity, and must guess.
// We prefer input that does not appear to end mid-character.
if (srcEUCJ.position() > srcSJIS.position())
return decodeLoop(csEUCJ, src, dst);
if (srcEUCJ.position() < srcSJIS.position())
return decodeLoop(csSJIS, src, dst);
// end-of-input is after the first byte of the first char?
if (src.position() == srcEUCJ.position())
return CoderResult.UNDERFLOW;
// Use heuristic knowledge of typical Japanese text
sandbox.flip();
Charset guess = looksLikeJapanese(sandbox) ? csEUCJ : csSJIS;
return decodeLoop(guess, src, dst);
}
return detectedDecoder.decodeLoop(src, dst);
}
CharsetValidator(Charset charset) {
decoder = charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
if (detectedDecoder == null) {
copyLeadingASCII(src, dst);
// All ASCII?
if (! src.hasRemaining())
return CoderResult.UNDERFLOW;
if (! dst.hasRemaining())
return CoderResult.OVERFLOW;
// We need to perform double, not float, arithmetic; otherwise
// we lose low order bits when src is larger than 2**24.
int cbufsiz = (int)(src.limit() * (double)maxCharsPerByte());
CharBuffer sandbox = CharBuffer.allocate(cbufsiz);
// First try ISO-2022-JP, since there is no ambiguity
Charset cs2022 = Charset.forName("ISO-2022-JP");
DelegatableDecoder dd2022
= (DelegatableDecoder) cs2022.newDecoder();
ByteBuffer src2022 = src.asReadOnlyBuffer();
CoderResult res2022 = dd2022.decodeLoop(src2022, sandbox);
if (! res2022.isError())
return decodeLoop(cs2022, src, dst);
// We must choose between EUC and SJIS
Charset csEUCJ = Charset.forName(EUCJPName);
Charset csSJIS = Charset.forName(SJISName);
DelegatableDecoder ddEUCJ
= (DelegatableDecoder) csEUCJ.newDecoder();
ByteBuffer srcEUCJ = src.asReadOnlyBuffer();
sandbox.clear();
CoderResult resEUCJ = ddEUCJ.decodeLoop(srcEUCJ, sandbox);
// If EUC decoding fails, must be SJIS
if (resEUCJ.isError())
return decodeLoop(csSJIS, src, dst);
DelegatableDecoder ddSJIS
= (DelegatableDecoder) csSJIS.newDecoder();
ByteBuffer srcSJIS = src.asReadOnlyBuffer();
CharBuffer sandboxSJIS = CharBuffer.allocate(cbufsiz);
CoderResult resSJIS = ddSJIS.decodeLoop(srcSJIS, sandboxSJIS);
// If SJIS decoding fails, must be EUC
if (resSJIS.isError())
return decodeLoop(csEUCJ, src, dst);
// From here on, we have some ambiguity, and must guess.
// We prefer input that does not appear to end mid-character.
if (srcEUCJ.position() > srcSJIS.position())
return decodeLoop(csEUCJ, src, dst);
if (srcEUCJ.position() < srcSJIS.position())
return decodeLoop(csSJIS, src, dst);
// end-of-input is after the first byte of the first char?
if (src.position() == srcEUCJ.position())
return CoderResult.UNDERFLOW;
// Use heuristic knowledge of typical Japanese text
sandbox.flip();
Charset guess = looksLikeJapanese(sandbox) ? csEUCJ : csSJIS;
return decodeLoop(guess, src, dst);
}
return detectedDecoder.decodeLoop(src, dst);
}
/**
* Opens a file for reading, returning a {@code BufferedReader} that may be
* used to read text from the file in an efficient manner. Bytes from the
* file are decoded into characters using the specified charset. Reading
* commences at the beginning of the file.
*
* <p> The {@code Reader} methods that read from the file throw {@code
* IOException} if a malformed or unmappable byte sequence is read.
*
* @param path
* the path to the file
* @param cs
* the charset to use for decoding
*
* @return a new buffered reader, with default buffer size, to read text
* from the file
*
* @throws IOException
* if an I/O error occurs opening the file
* @throws SecurityException
* In the case of the default provider, and a security manager is
* installed, the {@link SecurityManager#checkRead(String) checkRead}
* method is invoked to check read access to the file.
*
* @see #readAllLines
*/
public static BufferedReader newBufferedReader(Path path, Charset cs)
throws IOException
{
CharsetDecoder decoder = cs.newDecoder();
Reader reader = new InputStreamReader(newInputStream(path), decoder);
return new BufferedReader(reader);
}
/**
* Opens a file for reading, returning a {@code BufferedReader} that may be
* used to read text from the file in an efficient manner. Bytes from the
* file are decoded into characters using the specified charset. Reading
* commences at the beginning of the file.
*
* <p> The {@code Reader} methods that read from the file throw {@code
* IOException} if a malformed or unmappable byte sequence is read.
*
* @param path
* the path to the file
* @param cs
* the charset to use for decoding
*
* @return a new buffered reader, with default buffer size, to read text
* from the file
*
* @throws IOException
* if an I/O error occurs opening the file
* @throws SecurityException
* In the case of the default provider, and a security manager is
* installed, the {@link SecurityManager#checkRead(String) checkRead}
* method is invoked to check read access to the file.
*
* @see #readAllLines
*/
public static BufferedReader newBufferedReader(Path path, Charset cs)
throws IOException
{
CharsetDecoder decoder = cs.newDecoder();
Reader reader = new InputStreamReader(newInputStream(path), decoder);
return new BufferedReader(reader);
}