下面列出了java.nio.charset.CharsetDecoder# onMalformedInput ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Try to determine whether a byte buffer's character encoding is that of the
* passed-in charset. Uses inefficient
* heuristics that will be revisited when we're more familiar with likely
* usage patterns.
*
* Note this has been heavily changed since inception and will
* almost certainly disappear in the 10.x timeframe -- HR.
*/
public static boolean inferCharset(byte[] bytes, int bytesRead, Charset clientCharset) {
ByteBuffer byteBuf = ByteBuffer.wrap(bytes, 0, bytesRead);
CharBuffer charBuf = CharBuffer.allocate(byteBuf.capacity() * 2);
if (clientCharset != null) {
CharsetDecoder decoder = clientCharset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
CoderResult coderResult = decoder.decode(byteBuf, charBuf, false);
if (coderResult != null) {
if (coderResult.isError()) {
// Wasn't this one...
return false;
} else {
return true; // Still only *probably* true, dammit...
}
}
}
return true;
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(
java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
public static String safeDecodeByteArray(byte[] b, String baseCharset) {
try {
if (baseCharset == null) {
return new String(b, StandardCharsets.ISO_8859_1);
}
return new String(b, baseCharset);
} catch (Exception e) {
try {
//If it fails, go for something which shouldn't fail!
CharsetDecoder decoder = Charset.forName(baseCharset).newDecoder();
decoder.onMalformedInput(CodingErrorAction.IGNORE);
decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
CharBuffer parsed = decoder.decode(ByteBuffer.wrap(b, 0, b.length));
return parsed.toString();
} catch (Exception e2) {
Log.log(e2);
//Shouldn't ever happen!
return new String("Unable to decode bytearray from Python.");
}
}
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(
java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(
java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(
java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
/**
* Convert text in a given character set to a Unicode string. Any invalid
* characters are replaced with U+FFFD. Returns null if the character set
* is not recognized.
* @param text ByteBuffer containing the character array to convert.
* @param charsetName Character set it's in encoded in.
* @return: Unicode string on success, null on failure.
*/
@CalledByNative
private static String convertToUnicodeWithSubstitutions(
ByteBuffer text,
String charsetName) {
try {
Charset charset = Charset.forName(charsetName);
// TODO(mmenke): Investigate if Charset.decode() can be used
// instead. The question is whether it uses the proper replace
// character. JDK CharsetDecoder docs say U+FFFD is the default,
// but Charset.decode() docs say it uses the "charset's default
// replacement byte array".
CharsetDecoder decoder = charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
decoder.replaceWith("\uFFFD");
return decoder.decode(text).toString();
} catch (Exception e) {
return null;
}
}
@Override
public ManagedHttpClientConnection create(final HttpRoute route, final ConnectionConfig config) {
final ConnectionConfig cconfig = config != null ? config : ConnectionConfig.DEFAULT;
CharsetDecoder chardecoder = null;
CharsetEncoder charencoder = null;
final Charset charset = cconfig.getCharset();
final CodingErrorAction malformedInputAction = cconfig.getMalformedInputAction() != null ? cconfig.getMalformedInputAction() : CodingErrorAction.REPORT;
final CodingErrorAction unmappableInputAction = cconfig.getUnmappableInputAction() != null ? cconfig.getUnmappableInputAction()
: CodingErrorAction.REPORT;
if (charset != null) {
chardecoder = charset.newDecoder();
chardecoder.onMalformedInput(malformedInputAction);
chardecoder.onUnmappableCharacter(unmappableInputAction);
charencoder = charset.newEncoder();
charencoder.onMalformedInput(malformedInputAction);
charencoder.onUnmappableCharacter(unmappableInputAction);
}
final String id = "http-outgoing-" + Long.toString(COUNTER.getAndIncrement());
return new TracingManagedHttpClientConnection(id, cconfig.getBufferSize(), cconfig.getFragmentSizeHint(), chardecoder, charencoder,
cconfig.getMessageConstraints(), incomingContentStrategy, outgoingContentStrategy, requestWriterFactory, responseParserFactory, logFunc);
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(
java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
/**
* Returns a cached thread-local {@link CharsetDecoder} for the specified
* <tt>charset</tt>.
*/
public static CharsetDecoder getDecoder(Charset charset) {
if (charset == null) {
throw new NullPointerException("charset");
}
Map<Charset, CharsetDecoder> map = decoders.get();
CharsetDecoder d = map.get(charset);
if (d != null) {
d.reset();
d.onMalformedInput(CodingErrorAction.REPLACE);
d.onUnmappableCharacter(CodingErrorAction.REPLACE);
return d;
}
d = charset.newDecoder();
d.onMalformedInput(CodingErrorAction.REPLACE);
d.onUnmappableCharacter(CodingErrorAction.REPLACE);
map.put(charset, d);
return d;
}
/**
* Try to determine whether a byte buffer's character encoding is that of the
* passed-in charset. Uses inefficient
* heuristics that will be revisited when we're more familiar with likely
* usage patterns.
*
* Note this has been heavily changed since inception and will
* almost certainly disappear in the 10.x timeframe -- HR.
*/
public static boolean inferCharset(byte[] bytes, int bytesRead, Charset clientCharset) {
ByteBuffer byteBuf = ByteBuffer.wrap(bytes, 0, bytesRead);
CharBuffer charBuf = CharBuffer.allocate(byteBuf.capacity() * 2);
if (clientCharset != null) {
CharsetDecoder decoder = clientCharset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
CoderResult coderResult = decoder.decode(byteBuf, charBuf, false);
if (coderResult != null) {
if (coderResult.isError()) {
// Wasn't this one...
return false;
} else {
return true; // Still only *probably* true, dammit...
}
}
}
return true;
}
public JsonReader(JsonDBConfig dbConfig, File collectionFile) throws IOException {
this.collectionFile = collectionFile;
this.lockFilesLocation = new File(collectionFile.getParentFile(), "lock");
this.fileLockLocation = new File(lockFilesLocation, collectionFile.getName() + ".lock");
if(!lockFilesLocation.exists()) {
lockFilesLocation.mkdirs();
}
if(!fileLockLocation.exists()) {
fileLockLocation.createNewFile();
}
CharsetDecoder decoder = dbConfig.getCharset().newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
raf = new RandomAccessFile(fileLockLocation, "rw");
channel = raf.getChannel();
try {
lock = channel.lock();
} catch (IOException | OverlappingFileLockException e) {
try {
channel.close();
raf.close();
} catch (IOException e1) {
logger.error("Failed while closing RandomAccessFile for collection file {}", collectionFile.getName());
}
throw new JsonFileLockException("JsonReader failed to obtain a file lock for file " + fileLockLocation, e);
}
fis = new FileInputStream(collectionFile);
isr = new InputStreamReader(fis, decoder);
reader = new BufferedReader(isr);
}
public String getPayloadTracingString() {
if (null == payload || 0 == payload.length)
return "no payload";
boolean text = true;
for (byte b:payload) {
if (' ' > b) {
switch(b) {
case '\t':
case '\n':
case '\r':
continue;
}
text = false;
break;
}
}
if (text) {
CharsetDecoder decoder = CoAP.UTF8_CHARSET.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
ByteBuffer in = ByteBuffer.wrap(payload);
CharBuffer out = CharBuffer.allocate(24);
CoderResult result = decoder.decode(in, out, true);
decoder.flush(out);
out.flip();
if (CoderResult.OVERFLOW == result) {
return "\"" + out + "\".. " + payload.length + " bytes";
} else if (!result.isError()){
return "\"" + out + "\"" ;
}
}
return Utils.toHexText(payload, 256);
}
/**
* Truncate a single field so that it does not exceed Lucene's byte size limit on indexed terms.
*
* @param field the string to be indexed
* @return a string that can be indexed which is within Lucene's byte size limit, or null if anything goes wrong
*/
public static String truncateIndexField(String field) {
if (field == null) {
return field;
}
Charset charset = Charset.defaultCharset();
byte[] bytes = field.getBytes(charset);
if (bytes.length <= IndexWriter.MAX_TERM_LENGTH) {
return field;
}
// chop the field to maximum allowed byte length
ByteBuffer bbuf = ByteBuffer.wrap(bytes, 0, IndexWriter.MAX_TERM_LENGTH);
try {
// decode the chopped byte buffer back into original charset
CharsetDecoder decoder = charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.IGNORE);
decoder.reset();
CharBuffer cbuf = decoder.decode(bbuf);
return cbuf.toString();
} catch (CharacterCodingException shouldNotHappen) {}
// if we get here, something bad has happened
return null;
}
public CharsetDecoder prepareDecoder(Charset charset) {
CharsetDecoder decoder = charset.newDecoder();
if (strict) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
} else {
decoder.onMalformedInput(CodingErrorAction.IGNORE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
return decoder;
}
public void test_read_1() throws IOException {
// if the decoder is constructed by InputStreamReader itself, the
// decoder's default error action is REPLACE
InputStreamReader isr = new InputStreamReader(new ByteArrayInputStream(
new byte[] { -32, -96 }), "UTF-8");
assertEquals("read() return incorrect value", 65533, isr.read());
InputStreamReader isr2 = new InputStreamReader(
new ByteArrayInputStream(new byte[] { -32, -96 }), Charset
.forName("UTF-8"));
assertEquals("read() return incorrect value", 65533, isr2.read());
// if the decoder is passed in, keep its status intact
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
InputStreamReader isr3 = new InputStreamReader(
new ByteArrayInputStream(new byte[] { -32, -96 }), decoder);
try {
isr3.read();
fail("Should throw MalformedInputException");
} catch (MalformedInputException e) {
// expected
}
CharsetDecoder decoder2 = Charset.forName("UTF-8").newDecoder();
decoder2.onMalformedInput(CodingErrorAction.IGNORE);
InputStreamReader isr4 = new InputStreamReader(
new ByteArrayInputStream(new byte[] { -32, -96 }), decoder2);
assertEquals("read() return incorrect value", -1, isr4.read());
CharsetDecoder decoder3 = Charset.forName("UTF-8").newDecoder();
decoder3.onMalformedInput(CodingErrorAction.REPLACE);
InputStreamReader isr5 = new InputStreamReader(
new ByteArrayInputStream(new byte[] { -32, -96 }), decoder3);
assertEquals("read() return incorrect value", 65533, isr5.read());
}
static String decodeText(String input, Charset charset, CodingErrorAction codingErrorAction) throws IOException {
CharsetDecoder charsetDecoder = charset.newDecoder();
charsetDecoder.onMalformedInput(codingErrorAction);
return new BufferedReader(
new InputStreamReader(
new ByteArrayInputStream(input.getBytes()), charsetDecoder)).readLine();
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
final Charset inputCharset = Charset.forName(context.getProperty(INPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
final Charset outputCharset = Charset.forName(context.getProperty(OUTPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
final CharBuffer charBuffer = CharBuffer.allocate(MAX_BUFFER_SIZE);
final CharsetDecoder decoder = inputCharset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
decoder.replaceWith("?");
final CharsetEncoder encoder = outputCharset.newEncoder();
encoder.onMalformedInput(CodingErrorAction.REPLACE);
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
encoder.replaceWith("?".getBytes(outputCharset));
try {
final StopWatch stopWatch = new StopWatch(true);
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
try (final BufferedReader reader = new BufferedReader(new InputStreamReader(rawIn, decoder), MAX_BUFFER_SIZE);
final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(rawOut, encoder), MAX_BUFFER_SIZE)) {
int charsRead;
while ((charsRead = reader.read(charBuffer)) != -1) {
charBuffer.flip();
writer.write(charBuffer.array(), 0, charsRead);
}
writer.flush();
}
}
});
session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
logger.info("successfully converted characters from {} to {} for {}",
new Object[]{inputCharset, outputCharset, flowFile});
session.transfer(flowFile, REL_SUCCESS);
} catch (final Exception e) {
throw new ProcessException(e);
}
}
private static void processDocuments() {
CharsetDecoder decoder = charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.IGNORE);
decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
// perform 1 to 2-pass decoding on every document
for (int i = 0; i < works.length; i++) {
documents.add(null);
if (works[i] == null)
continue;
URL url = works[i].url;
log.debug("[" + (i + 1) + "/" + urls.length + "] Processing: " + url);
String encoding = DEFAULT_ENCODING;
String doc = null;
ByteBuffer buffer = works[i].buffer;
buffer.flip();
try {
// try to use default encoding to decode the document
doc = decoder.decode(buffer).toString();
// identify encoding by looking into the <meta> tag
Matcher m = CHARSET_PAT.matcher(doc);
if (m.find()) {
encoding = m.group(1).toUpperCase();
log.debug("Encoding identified as: " + encoding);
} else {
log.debug("Encoding could not be identified! Using the default: " + DEFAULT_ENCODING);
}
// if the identified encoding is different from the default encoding
if (!encoding.equals(DEFAULT_ENCODING)) {
// decode again using the identified encoding
CharsetDecoder d = Charset.forName(encoding).newDecoder();
d.onUnmappableCharacter(CodingErrorAction.IGNORE);
d.onMalformedInput(CodingErrorAction.IGNORE);
buffer.flip();
doc = d.decode(buffer).toString();
}
} catch (Exception e) {
log.error("Character coding error: " + e);
continue;
}
documents.set(i, removeHTTPHeader(doc));
// doc = removeHTTPHeader(doc);
// doc = CacheRecoverer.recover(url, doc);
// documents.set(i, doc);
}
}
/**
* Sets the given decoder to replace all malformed and unmappable characters.
* @param decoder
*/
protected static void setLenientDecoder(CharsetDecoder decoder) {
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}