下面列出了java.nio.charset.CoderResult# isMalformed ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
Validity checkValidity(ByteBuffer buffer) {
int pos = buffer.position();
CoderResult result = CoderResult.OVERFLOW;
while (result.isOverflow()) {
dummyBuffer.clear();
result = decoder.decode(buffer, dummyBuffer, true);
}
buffer.position(pos);
if (result.isUnderflow()) {
return Validity.VALID;
} else if (result.isMalformed()) {
return Validity.MALFORMED;
} else {
return Validity.UNMAPPABLE;
}
}
/**
* Decodes bytes in bbuf as utf-8 and appends decoded characters to sb. If
* decoding of any portion fails, appends the un-decodable %xx%xx sequence
* extracted from inputStr instead of decoded characters. See "bad unicode"
* tests in GoogleCanonicalizerTest#testDecode(). Variables only make sense
* within context of {@link #decode(String)}.
*
* @param sb
* StringBuilder to append to
* @param bbuf
* raw bytes decoded from %-encoded input
* @param inputStr
* full input string
* @param seqStart
* start index inclusive within inputStr of %-encoded sequence
* @param seqEnd
* end index exclusive within inputStr of %-encoded sequence
* @param utf8decoder
*/
private void appendDecodedPctUtf8(StringBuilder sb, ByteBuffer bbuf,
String inputStr, int seqStart, int seqEnd,
CharsetDecoder utf8decoder) {
// assert bbuf.position() * 3 == seqEnd - seqStart;
utf8decoder.reset();
CharBuffer cbuf = CharBuffer.allocate(bbuf.position());
bbuf.flip();
while (bbuf.position() < bbuf.limit()) {
CoderResult coderResult = utf8decoder.decode(bbuf, cbuf, true);
sb.append(cbuf.flip());
if (coderResult.isMalformed()) {
// put the malformed %xx%xx into the result un-decoded
CharSequence undecodablePctHex = inputStr.subSequence(seqStart
+ 3 * bbuf.position(), seqStart + 3 * bbuf.position()
+ 3 * coderResult.length());
sb.append(undecodablePctHex);
// there could be more good stuff after the bad
bbuf.position(bbuf.position() + coderResult.length());
}
cbuf.clear();
}
}
private void decodeNextCharBuffer() throws CharConversionException {
CoderResult coderResult = DECODER.decode(byteBuffer, mboxCharBuffer, endOfInputFlag);
updateEndOfInputFlag();
mboxCharBuffer.flip();
if (coderResult.isError()) {
if (coderResult.isMalformed()) {
throw new CharConversionException("Malformed input!");
} else if (coderResult.isUnmappable()) {
throw new CharConversionException("Unmappable character!");
}
}
}
private static void tryDecodeUtf8(ByteBuffer bb, StringBuilder out) {
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
CharBuffer cb = CharBuffer.allocate(bb.remaining());
while (bb.hasRemaining()) {
CoderResult result = decoder.decode(bb, cb, true);
if (result.isMalformed()) {
for (int i = 0; i < result.length(); i++) {
out.append('%').append(String.format("%02x", bb.get()));
}
}
out.append(cb.flip());
cb.clear();
}
}
@Override
public synchronized int readChar() throws IOException {
if (!buf.hasRemaining()) {
refillBuf();
}
int start = buf.position();
charBuf.clear();
boolean isEndOfInput = false;
if (position >= fileSize) {
isEndOfInput = true;
}
CoderResult res = decoder.decode(buf, charBuf, isEndOfInput);
if (res.isMalformed() || res.isUnmappable()) {
res.throwException();
}
int delta = buf.position() - start;
charBuf.flip();
if (charBuf.hasRemaining()) {
char c = charBuf.get();
// don't increment the persisted location if we are in between a
// surrogate pair, otherwise we may never recover if we seek() to this
// location!
incrPosition(delta, !Character.isHighSurrogate(c));
return c;
// there may be a partial character in the decoder buffer
} else {
incrPosition(delta, false);
return -1;
}
}
/**
* Convert the given bytes to characters.
*
* @param bc byte input
* @param cc char output
* @param endOfInput Is this all of the available data
*
* @throws IOException If the conversion can not be completed
*/
public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
throws IOException {
if ((bb == null) || (bb.array() != bc.getBuffer())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
} else {
// Initialize the byte buffer
bb.limit(bc.getEnd());
bb.position(bc.getStart());
}
if ((cb == null) || (cb.array() != cc.getBuffer())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(),
cc.getBuffer().length - cc.getEnd());
} else {
// Initialize the char buffer
cb.limit(cc.getBuffer().length);
cb.position(cc.getEnd());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = cb.position();
// Loop until one char is decoded or there is a decoder error
do {
leftovers.put(bc.substractB());
leftovers.flip();
result = decoder.decode(leftovers, cb, endOfInput);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (cb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
bb.position(bc.getStart());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = decoder.decode(bb, cb, endOfInput);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk, if
// this continues the char buffer will get resized
bc.setOffset(bb.position());
cc.setEnd(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setOffset(bb.position());
cc.setEnd(cb.position());
// Put leftovers in the leftovers byte buffer
if (bc.getLength() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(bc.getLength());
bc.substract(leftovers.array(), 0, bc.getLength());
}
}
}
private void findRepl(List<CandidateData> candidates, final int depth, final int node, final byte[] prevBytes, final int wordIndex, final int candIndex) {
int dist = 0;
for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
byteBuffer = BufferUtils.clearAndEnsureCapacity(byteBuffer, prevBytes.length + 1);
byteBuffer.put(prevBytes);
byteBuffer.put(fsa.getArcLabel(arc));
final int bufPos = byteBuffer.position();
byteBuffer.flip();
decoder.reset();
// FIXME: this isn't correct -- no checks for overflows, no decoder flush. I don't think this should be in here
// too, the decoder should run once on accumulated temporary byte buffer (current path) only when there's
// a potential that this buffer can become a replacement candidate (isEndOfCandidate). Because we assume candidates
// are valid input strings (this is verified when building the dictionary), it's save a lot of conversions.
final CoderResult c = decoder.decode(byteBuffer, charBuffer, true);
if (c.isMalformed()) { // assume that only valid
// encodings are there
final byte[] prev = new byte[bufPos];
byteBuffer.position(0);
byteBuffer.get(prev);
if (!fsa.isArcTerminal(arc)) {
findRepl(candidates, depth, fsa.getEndNode(arc), prev, wordIndex, candIndex); // note: depth is not incremented
}
byteBuffer.clear();
} else if (!c.isError()) { // unmappable characters are silently discarded
charBuffer.flip();
candidate[candIndex] = charBuffer.get();
charBuffer.clear();
byteBuffer.clear();
int lengthReplacement;
// replacement "any to two"
if ((lengthReplacement = matchAnyToTwo(wordIndex, candIndex)) > 0) {
// the replacement takes place at the end of the candidate
if (isEndOfCandidate(arc, wordIndex) && (dist = hMatrix.get(depth - 1, depth - 1)) <= effectEditDistance) {
if (Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 2)) > 0) {
// there are extra letters in the word after the replacement
dist = dist + Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 2));
}
if (dist <= effectEditDistance) {
candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
}
}
if (isArcNotTerminal(arc, candIndex)) {
int x = hMatrix.get(depth, depth);
hMatrix.set(depth, depth, hMatrix.get(depth - 1, depth - 1));
findRepl(candidates, Math.max(0, depth), fsa.getEndNode(arc), new byte[0], wordIndex + lengthReplacement - 1,
candIndex + 1);
hMatrix.set(depth, depth, x);
}
}
//replacement "any to one"
if ((lengthReplacement = matchAnyToOne(wordIndex, candIndex)) > 0) {
// the replacement takes place at the end of the candidate
if (isEndOfCandidate(arc, wordIndex) && (dist = hMatrix.get(depth, depth)) <= effectEditDistance) {
if (Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 1)) > 0) {
// there are extra letters in the word after the replacement
dist = dist + Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 1));
}
if (dist <= effectEditDistance) {
candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
}
}
if (isArcNotTerminal(arc, candIndex)) {
findRepl(candidates, depth, fsa.getEndNode(arc), new byte[0], wordIndex + lengthReplacement, candIndex + 1);
}
}
//general
if (cuted(depth, wordIndex, candIndex) <= effectEditDistance) {
if ((isEndOfCandidate(arc, wordIndex))
&& (dist = ed(wordLen - 1 - (wordIndex - depth), depth, wordLen - 1, candIndex)) <= effectEditDistance) {
candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
}
if (isArcNotTerminal(arc, candIndex)) {
findRepl(candidates, depth + 1, fsa.getEndNode(arc), new byte[0], wordIndex + 1, candIndex + 1);
}
}
}
}
}
/**
* Convert the given characters to bytes.
*
* @param cc char input
* @param bc byte output
* @throws IOException An encoding error occurred
*/
public void convert(CharChunk cc, ByteChunk bc) throws IOException {
if ((bb == null) || (bb.array() != bc.getBuffer())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), bc.getBuffer().length - bc.getEnd());
} else {
// Initialize the byte buffer
bb.limit(bc.getBuffer().length);
bb.position(bc.getEnd());
}
if ((cb == null) || (cb.array() != cc.getBuffer())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), cc.getLength());
} else {
// Initialize the char buffer
cb.limit(cc.getEnd());
cb.position(cc.getStart());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = bb.position();
// Loop until one char is encoded or there is a encoder error
do {
leftovers.put((char) cc.substract());
leftovers.flip();
result = encoder.encode(leftovers, bb, false);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (bb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
cb.position(cc.getStart());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = encoder.encode(cb, bb, false);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setEnd(bb.position());
cc.setOffset(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setEnd(bb.position());
cc.setOffset(cb.position());
// Put leftovers in the leftovers char buffer
if (cc.getLength() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(cc.getLength());
cc.substract(leftovers.array(), 0, cc.getLength());
}
}
}
/**
* Convert the given characters to bytes.
*
* @param cc char input
* @param bc byte output
* @throws IOException An encoding error occurred
*/
public void convert(CharBuffer cc, ByteBuffer bc) throws IOException {
if ((bb == null) || (bb.array() != bc.array())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.array(), bc.limit(), bc.capacity() - bc.limit());
} else {
// Initialize the byte buffer
bb.limit(bc.capacity());
bb.position(bc.limit());
}
if ((cb == null) || (cb.array() != cc.array())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.array(), cc.arrayOffset() + cc.position(), cc.remaining());
} else {
// Initialize the char buffer
cb.limit(cc.limit());
cb.position(cc.position());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = bb.position();
// Loop until one char is encoded or there is a encoder error
do {
leftovers.put(cc.get());
leftovers.flip();
result = encoder.encode(leftovers, bb, false);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (bb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
cb.position(cc.position());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = encoder.encode(cb, bb, false);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.limit(bb.position());
cc.position(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.limit(bb.position());
cc.position(cb.position());
// Put leftovers in the leftovers char buffer
if (cc.remaining() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(cc.remaining());
cc.get(leftovers.array(), 0, cc.remaining());
}
}
}
public CharBuffer decode(ByteBuffer inbuf, boolean ignoreEncodingErrors) {
String encName = getEncodingName();
CharsetDecoder decoder;
try {
decoder = getDecoder(encName, ignoreEncodingErrors);
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
log.error(Errors.UnsupportedEncoding(encName));
return (CharBuffer) CharBuffer.allocate(1).flip();
}
// slightly overestimate the buffer size to avoid reallocation.
float factor =
decoder.averageCharsPerByte() * 0.8f +
decoder.maxCharsPerByte() * 0.2f;
CharBuffer dest = CharBuffer.
allocate(10 + (int)(inbuf.remaining()*factor));
while (true) {
CoderResult result = decoder.decode(inbuf, dest, true);
dest.flip();
if (result.isUnderflow()) { // done reading
// make sure there is at least one extra character
if (dest.limit() == dest.capacity()) {
dest = CharBuffer.allocate(dest.capacity()+1).put(dest);
dest.flip();
}
return dest;
} else if (result.isOverflow()) { // buffer too small; expand
int newCapacity =
10 + dest.capacity() +
(int)(inbuf.remaining()*decoder.maxCharsPerByte());
dest = CharBuffer.allocate(newCapacity).put(dest);
} else if (result.isMalformed() || result.isUnmappable()) {
// bad character in input
StringBuilder unmappable = new StringBuilder();
int len = result.length();
for (int i = 0; i < len; i++) {
unmappable.append(String.format("%02X", inbuf.get()));
}
String charsetName = charset == null ? encName : charset.name();
log.error(dest.limit(),
Errors.IllegalCharForEncoding(unmappable.toString(), charsetName));
// undo the flip() to prepare the output buffer
// for more translation
dest.position(dest.limit());
dest.limit(dest.capacity());
dest.put((char)0xfffd); // backward compatible
} else {
throw new AssertionError(result);
}
}
// unreached
}
/**
* Compute lenght of this sequence - quite expensive operation, indeed.
*/
@Override
public int length() {
if (length != -1) {
return length;
}
long start = System.currentTimeMillis();
int charactersRead = 0;
long bytesRead = 0;
MappedByteBuffer mappedByteBuffer = null;
CharBuffer charBuffer = CharBuffer.allocate(SIZE_LIMIT);
CharsetDecoder decoder = prepareDecoder(charset);
decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
try {
while (bytesRead < fileSize) {
mappedByteBuffer = fileChannel.map(
FileChannel.MapMode.READ_ONLY, bytesRead,
Math.min(SIZE_LIMIT, fileSize - bytesRead));
CoderResult result;
do {
charBuffer.clear();
result = decoder.decode(
mappedByteBuffer, charBuffer,
bytesRead + SIZE_LIMIT >= fileSize);
if (result.isUnmappable() || result.isMalformed()
|| result.isError()) {
throw new IOException("Error decoding file: "
+ result.toString() + " ");
}
if (bytesRead + SIZE_LIMIT >= fileSize) {
LOG.info("Coding end");
}
charactersRead += charBuffer.position();
} while (result.isOverflow());
int readNow = mappedByteBuffer.position();
bytesRead += readNow;
unmap(mappedByteBuffer);
}
charBuffer.clear();
boolean repeat;
do {
repeat = decoder.flush(charBuffer).isOverflow();
charactersRead += charBuffer.position();
charBuffer.clear();
} while (repeat);
} catch (IOException ex) {
if (mappedByteBuffer != null) {
unmap(mappedByteBuffer);
}
Exceptions.printStackTrace(ex);
}
length = charactersRead;
LOG.log(Level.INFO, "Length computed in {0} ms.", //NOI18N
System.currentTimeMillis() - start);
return length;
}
@Override
public char charAt(int index) {
if (index < lastIndex) {
returns++;
}
lastIndex = index;
if (index > length()) {
throw new IndexOutOfBoundsException();
}
if (isInBuffer(index)) {
return getFromBuffer(index);
} else {
if (index < currentStart || currentStart == -1) {
reset();
}
retrieves++;
MappedByteBuffer mappedByteBuffer = null;
try {
while (readBytes < fileSize) {
try {
mappedByteBuffer = fileChannel.map(
FileChannel.MapMode.READ_ONLY,
readBytes,
Math.min(SIZE_LIMIT, fileSize - readBytes));
maps++;
CoderResult result;
do {
currentStart = currentStart == -1 ? 0
: currentStart + currentBuffer.limit();
currentBuffer.clear();
result = currentDecoder.decode(mappedByteBuffer,
currentBuffer,
readBytes + SIZE_LIMIT >= fileSize);
currentBuffer.flip();
int readChars = currentBuffer.limit();
if (currentStart + readChars > index) {
return getFromBuffer(index);
}
if (result.isUnmappable() || result.isMalformed()
|| result.isError()) {
throw new IOException("Error decoding file: "
+ result.toString() + " ");
}
} while (result.isOverflow());
} finally {
if (mappedByteBuffer != null) {
int readNow = mappedByteBuffer.position();
readBytes += readNow;
unmap(mappedByteBuffer);
}
}
}
boolean repeat;
do {
repeat = currentDecoder.flush(currentBuffer).isOverflow();
int size = currentBuffer.position();
if (size + currentStart > index) {
currentBuffer.flip();
return currentBuffer.get(index - currentStart);
}
currentBuffer.clear();
currentStart += size;
} while (repeat);
} catch (IOException ex) {
if (mappedByteBuffer != null) {
unmap(mappedByteBuffer);
}
Exceptions.printStackTrace(ex);
}
}
throw new IllegalStateException(
"Cannot get character."); //NOI18N
}
/**
* Convert the given bytes to characters.
*
* @param bc byte input
* @param cc char output
* @param endOfInput Is this all of the available data
*/
public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
throws IOException {
if ((bb == null) || (bb.array() != bc.getBuffer())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
} else {
// Initialize the byte buffer
bb.limit(bc.getEnd());
bb.position(bc.getStart());
}
if ((cb == null) || (cb.array() != cc.getBuffer())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(),
cc.getBuffer().length - cc.getEnd());
} else {
// Initialize the char buffer
cb.limit(cc.getBuffer().length);
cb.position(cc.getEnd());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = cb.position();
// Loop until one char is decoded or there is a decoder error
do {
leftovers.put(bc.substractB());
leftovers.flip();
result = decoder.decode(leftovers, cb, endOfInput);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (cb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
bb.position(bc.getStart());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = decoder.decode(bb, cb, endOfInput);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk, if
// this continues the char buffer will get resized
bc.setOffset(bb.position());
cc.setEnd(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setOffset(bb.position());
cc.setEnd(cb.position());
// Put leftovers in the leftovers byte buffer
if (bc.getLength() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(bc.getLength());
bc.substract(leftovers.array(), 0, bc.getLength());
}
}
}
/**
* Convert the given characters to bytes.
*
* @param cc char input
* @param bc byte output
*/
public void convert(CharChunk cc, ByteChunk bc)
throws IOException {
if ((bb == null) || (bb.array() != bc.getBuffer())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(),
bc.getBuffer().length - bc.getEnd());
} else {
// Initialize the byte buffer
bb.limit(bc.getBuffer().length);
bb.position(bc.getEnd());
}
if ((cb == null) || (cb.array() != cc.getBuffer())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(),
cc.getLength());
} else {
// Initialize the char buffer
cb.limit(cc.getEnd());
cb.position(cc.getStart());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = bb.position();
// Loop until one char is encoded or there is a encoder error
do {
leftovers.put((char) cc.substract());
leftovers.flip();
result = encoder.encode(leftovers, bb, false);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (bb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
cb.position(cc.getStart());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = encoder.encode(cb, bb, false);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setEnd(bb.position());
cc.setOffset(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setEnd(bb.position());
cc.setOffset(cb.position());
// Put leftovers in the leftovers char buffer
if (cc.getLength() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(cc.getLength());
cc.substract(leftovers.array(), 0, cc.getLength());
}
}
}
@SuppressWarnings("cast")
public CharBuffer decode(ByteBuffer inbuf, boolean ignoreEncodingErrors) {
String encName = getEncodingName();
CharsetDecoder decoder;
try {
decoder = getDecoder(encName, ignoreEncodingErrors);
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
log.error("unsupported.encoding", encName);
return (CharBuffer)CharBuffer.allocate(1).flip();
}
// slightly overestimate the buffer size to avoid reallocation.
float factor =
decoder.averageCharsPerByte() * 0.8f +
decoder.maxCharsPerByte() * 0.2f;
CharBuffer dest = CharBuffer.
allocate(10 + (int)(inbuf.remaining()*factor));
while (true) {
CoderResult result = decoder.decode(inbuf, dest, true);
dest.flip();
if (result.isUnderflow()) { // done reading
// make sure there is at least one extra character
if (dest.limit() == dest.capacity()) {
dest = CharBuffer.allocate(dest.capacity()+1).put(dest);
dest.flip();
}
return dest;
} else if (result.isOverflow()) { // buffer too small; expand
int newCapacity =
10 + dest.capacity() +
(int)(inbuf.remaining()*decoder.maxCharsPerByte());
dest = CharBuffer.allocate(newCapacity).put(dest);
} else if (result.isMalformed() || result.isUnmappable()) {
// bad character in input
StringBuilder unmappable = new StringBuilder();
int len = result.length();
for (int i = 0; i < len; i++) {
unmappable.append(String.format("%02X", inbuf.get()));
}
String charsetName = charset == null ? encName : charset.name();
log.error(dest.limit(),
Errors.IllegalCharForEncoding(unmappable.toString(), charsetName));
// undo the flip() to prepare the output buffer
// for more translation
dest.position(dest.limit());
dest.limit(dest.capacity());
dest.put((char)0xfffd); // backward compatible
} else {
throw new AssertionError(result);
}
}
// unreached
}
/**
* Reads at most {@code length} characters from this reader and stores them
* at position {@code offset} in the character array {@code buf}. Returns
* the number of characters actually read or -1 if the end of the reader has
* been reached. The bytes are either obtained from converting bytes in this
* reader's buffer or by first filling the buffer from the source
* InputStream and then reading from the buffer.
*
* @param buf
* the array to store the characters read.
* @param offset
* the initial position in {@code buf} to store the characters
* read from this reader.
* @param length
* the maximum number of characters to read.
* @return the number of characters read or -1 if the end of the reader has
* been reached.
* @throws IndexOutOfBoundsException
* if {@code offset < 0} or {@code length < 0}, or if
* {@code offset + length} is greater than the length of
* {@code buf}.
* @throws IOException
* if this reader is closed or some other I/O error occurs.
*/
@Override
public int read(char[] buf, int offset, int length) throws IOException {
synchronized (lock) {
if (!isOpen()) {
throw new IOException("InputStreamReader is closed.");
}
if (offset < 0 || offset > buf.length - length || length < 0) {
throw new IndexOutOfBoundsException();
}
if (length == 0) {
return 0;
}
CharBuffer out = CharBuffer.wrap(buf, offset, length);
CoderResult result = CoderResult.UNDERFLOW;
// bytes.remaining() indicates number of bytes in buffer
// when 1-st time entered, it'll be equal to zero
boolean needInput = !bytes.hasRemaining();
while (out.hasRemaining()) {
// fill the buffer if needed
if (needInput) {
try {
if ((in.available() == 0)
&& (out.position() > offset)) {
// we could return the result without blocking read
break;
}
} catch (IOException e) {
// available didn't work so just try the read
}
int to_read = bytes.capacity() - bytes.limit();
int off = bytes.arrayOffset() + bytes.limit();
int was_red = in.read(bytes.array(), off, to_read);
if (was_red == -1) {
endOfInput = true;
break;
} else if (was_red == 0) {
break;
}
bytes.limit(bytes.limit() + was_red);
needInput = false;
}
// decode bytes
result = decoder.decode(bytes, out, false);
if (result.isUnderflow()) {
// compact the buffer if no space left
if (bytes.limit() == bytes.capacity()) {
bytes.compact();
bytes.limit(bytes.position());
bytes.position(0);
}
needInput = true;
} else {
break;
}
}
if (result == CoderResult.UNDERFLOW && endOfInput) {
result = decoder.decode(bytes, out, true);
decoder.flush(out);
decoder.reset();
}
if (result.isMalformed()) {
throw new MalformedInputException(result.length());
} else if (result.isUnmappable()) {
throw new UnmappableCharacterException(result.length());
}
return out.position() - offset == 0 ? -1 : out.position() - offset;
}
}
/**
* Reads at most {@code length} characters from this reader and stores them
* at position {@code offset} in the character array {@code buf}. Returns
* the number of characters actually read or -1 if the end of the reader has
* been reached. The bytes are either obtained from converting bytes in this
* reader's buffer or by first filling the buffer from the source
* InputStream and then reading from the buffer.
*
* @param buf
* the array to store the characters read.
* @param offset
* the initial position in {@code buf} to store the characters
* read from this reader.
* @param length
* the maximum number of characters to read.
* @return the number of characters read or -1 if the end of the reader has
* been reached.
* @throws IndexOutOfBoundsException
* if {@code offset < 0} or {@code length < 0}, or if
* {@code offset + length} is greater than the length of
* {@code buf}.
* @throws IOException
* if this reader is closed or some other I/O error occurs.
*/
@Override
public int read(char[] buf, int offset, int length) throws IOException {
synchronized (lock) {
if (!isOpen()) {
throw new IOException("InputStreamReader is closed.");
}
if (offset < 0 || offset > buf.length - length || length < 0) {
throw new IndexOutOfBoundsException();
}
if (length == 0) {
return 0;
}
CharBuffer out = CharBuffer.wrap(buf, offset, length);
CoderResult result = CoderResult.UNDERFLOW;
// bytes.remaining() indicates number of bytes in buffer
// when 1-st time entered, it'll be equal to zero
boolean needInput = !bytes.hasRemaining();
while (out.hasRemaining()) {
// fill the buffer if needed
if (needInput) {
try {
if ((in.available() == 0)
&& (out.position() > offset)) {
// we could return the result without blocking read
break;
}
} catch (IOException e) {
// available didn't work so just try the read
}
int to_read = bytes.capacity() - bytes.limit();
int off = bytes.arrayOffset() + bytes.limit();
int was_red = in.read(bytes.array(), off, to_read);
if (was_red == -1) {
endOfInput = true;
break;
} else if (was_red == 0) {
break;
}
bytes.limit(bytes.limit() + was_red);
needInput = false;
}
// decode bytes
result = decoder.decode(bytes, out, false);
if (result.isUnderflow()) {
// compact the buffer if no space left
if (bytes.limit() == bytes.capacity()) {
bytes.compact();
bytes.limit(bytes.position());
bytes.position(0);
}
needInput = true;
} else {
break;
}
}
if (result == CoderResult.UNDERFLOW && endOfInput) {
result = decoder.decode(bytes, out, true);
decoder.flush(out);
decoder.reset();
}
if (result.isMalformed()) {
throw new MalformedInputException(result.length());
} else if (result.isUnmappable()) {
throw new UnmappableCharacterException(result.length());
}
return out.position() - offset == 0 ? -1 : out.position() - offset;
}
}
/**
* Convert the given bytes to characters.
*
* @param bc byte input
* @param cc char output
* @param endOfInput Is this all of the available data
*/
public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
throws IOException {
if ((bb == null) || (bb.array() != bc.getBuffer())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
} else {
// Initialize the byte buffer
bb.limit(bc.getEnd());
bb.position(bc.getStart());
}
if ((cb == null) || (cb.array() != cc.getBuffer())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(),
cc.getBuffer().length - cc.getEnd());
} else {
// Initialize the char buffer
cb.limit(cc.getBuffer().length);
cb.position(cc.getEnd());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = cb.position();
// Loop until one char is decoded or there is a decoder error
do {
leftovers.put(bc.substractB());
leftovers.flip();
result = decoder.decode(leftovers, cb, endOfInput);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (cb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
bb.position(bc.getStart());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = decoder.decode(bb, cb, endOfInput);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk, if
// this continues the char buffer will get resized
bc.setOffset(bb.position());
cc.setEnd(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setOffset(bb.position());
cc.setEnd(cb.position());
// Put leftovers in the leftovers byte buffer
if (bc.getLength() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(bc.getLength());
bc.substract(leftovers.array(), 0, bc.getLength());
}
}
}
/**
* Convert the given characters to bytes.
*
* @param cc char input
* @param bc byte output
*/
public void convert(CharChunk cc, ByteChunk bc)
throws IOException {
if ((bb == null) || (bb.array() != bc.getBuffer())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(),
bc.getBuffer().length - bc.getEnd());
} else {
// Initialize the byte buffer
bb.limit(bc.getBuffer().length);
bb.position(bc.getEnd());
}
if ((cb == null) || (cb.array() != cc.getBuffer())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(),
cc.getLength());
} else {
// Initialize the char buffer
cb.limit(cc.getEnd());
cb.position(cc.getStart());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = bb.position();
// Loop until one char is encoded or there is a encoder error
do {
leftovers.put((char) cc.substract());
leftovers.flip();
result = encoder.encode(leftovers, bb, false);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (bb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
cb.position(cc.getStart());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = encoder.encode(cb, bb, false);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setEnd(bb.position());
cc.setOffset(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setEnd(bb.position());
cc.setOffset(cb.position());
// Put leftovers in the leftovers char buffer
if (cc.getLength() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(cc.getLength());
cc.substract(leftovers.array(), 0, cc.getLength());
}
}
}
/**
* Reads up to {@code count} characters from this reader and stores them
* at position {@code offset} in the character array {@code buffer}. Returns
* the number of characters actually read or -1 if the end of the reader has
* been reached. The bytes are either obtained from converting bytes in this
* reader's buffer or by first filling the buffer from the source
* InputStream and then reading from the buffer.
*
* @throws IndexOutOfBoundsException
* if {@code offset < 0 || count < 0 || offset + count > buffer.length}.
* @throws IOException
* if this reader is closed or some other I/O error occurs.
*/
@Override
public int read(char[] buffer, int offset, int count) throws IOException {
synchronized (lock) {
if (!isOpen()) {
throw new IOException("InputStreamReader is closed");
}
JTranscArrays.checkOffsetAndCount(buffer.length, offset, count);
if (count == 0) {
return 0;
}
CharBuffer out = CharBuffer.wrap(buffer, offset, count);
CoderResult result = CoderResult.UNDERFLOW;
// bytes.remaining() indicates number of bytes in buffer
// when 1-st time entered, it'll be equal to zero
boolean needInput = !bytes.hasRemaining();
while (out.hasRemaining()) {
// fill the buffer if needed
if (needInput) {
try {
if (in.available() == 0 && out.position() > offset) {
// we could return the result without blocking read
break;
}
} catch (IOException e) {
// available didn't work so just try the read
}
int desiredByteCount = bytes.capacity() - bytes.limit();
int off = bytes.arrayOffset() + bytes.limit();
int actualByteCount = in.read(bytes.array(), off, desiredByteCount);
if (actualByteCount == -1) {
endOfInput = true;
break;
} else if (actualByteCount == 0) {
break;
}
bytes.limit(bytes.limit() + actualByteCount);
needInput = false;
}
// decode bytes
result = decoder.decode(bytes, out, false);
if (result.isUnderflow()) {
// compact the buffer if no space left
if (bytes.limit() == bytes.capacity()) {
bytes.compact();
bytes.limit(bytes.position());
bytes.position(0);
}
needInput = true;
} else {
break;
}
}
if (result == CoderResult.UNDERFLOW && endOfInput) {
result = decoder.decode(bytes, out, true);
decoder.flush(out);
decoder.reset();
}
if (result.isMalformed() || result.isUnmappable()) {
result.throwException();
}
return out.position() - offset == 0 ? -1 : out.position() - offset;
}
}