下面列出了java.nio.charset.CharsetDecoder# onUnmappableCharacter ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(
java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
/** Returns a cached thread-local {@link CharsetDecoder} for the specified <tt>charset</tt>. */
private static CharsetDecoder getDecoder(Charset charset) {
if (charset == null) {
throw new NullPointerException("charset");
}
Map<Charset, CharsetDecoder> map = decoders.get();
CharsetDecoder d = map.get(charset);
if (d != null) {
d.reset();
d.onMalformedInput(CodingErrorAction.REPLACE);
d.onUnmappableCharacter(CodingErrorAction.REPLACE);
return d;
}
d = charset.newDecoder();
d.onMalformedInput(CodingErrorAction.REPLACE);
d.onUnmappableCharacter(CodingErrorAction.REPLACE);
map.put(charset, d);
return d;
}
/**
* Convert text in a given character set to a Unicode string. Any invalid
* characters are replaced with U+FFFD. Returns null if the character set
* is not recognized.
* @param text ByteBuffer containing the character array to convert.
* @param charsetName Character set it's in encoded in.
* @return: Unicode string on success, null on failure.
*/
@CalledByNative
private static String convertToUnicodeWithSubstitutions(
ByteBuffer text,
String charsetName) {
try {
Charset charset = Charset.forName(charsetName);
// TODO(mmenke): Investigate if Charset.decode() can be used
// instead. The question is whether it uses the proper replace
// character. JDK CharsetDecoder docs say U+FFFD is the default,
// but Charset.decode() docs say it uses the "charset's default
// replacement byte array".
CharsetDecoder decoder = charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
decoder.replaceWith("\uFFFD");
return decoder.decode(text).toString();
} catch (Exception e) {
return null;
}
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(
java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
/**
* Returns a cached thread-local {@link CharsetDecoder} for the specified
* <tt>charset</tt>.
*/
public static CharsetDecoder getDecoder(Charset charset) {
if (charset == null) {
throw new NullPointerException("charset");
}
Map<Charset, CharsetDecoder> map = InternalThreadLocalMap.get().charsetDecoderCache();
CharsetDecoder d = map.get(charset);
if (d != null) {
d.reset();
d.onMalformedInput(CodingErrorAction.REPLACE);
d.onUnmappableCharacter(CodingErrorAction.REPLACE);
return d;
}
d = charset.newDecoder();
d.onMalformedInput(CodingErrorAction.REPLACE);
d.onUnmappableCharacter(CodingErrorAction.REPLACE);
map.put(charset, d);
return d;
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(
java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
public JsonReader(JsonDBConfig dbConfig, File collectionFile) throws IOException {
this.collectionFile = collectionFile;
this.lockFilesLocation = new File(collectionFile.getParentFile(), "lock");
this.fileLockLocation = new File(lockFilesLocation, collectionFile.getName() + ".lock");
if(!lockFilesLocation.exists()) {
lockFilesLocation.mkdirs();
}
if(!fileLockLocation.exists()) {
fileLockLocation.createNewFile();
}
CharsetDecoder decoder = dbConfig.getCharset().newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
raf = new RandomAccessFile(fileLockLocation, "rw");
channel = raf.getChannel();
try {
lock = channel.lock();
} catch (IOException | OverlappingFileLockException e) {
try {
channel.close();
raf.close();
} catch (IOException e1) {
logger.error("Failed while closing RandomAccessFile for collection file {}", collectionFile.getName());
}
throw new JsonFileLockException("JsonReader failed to obtain a file lock for file " + fileLockLocation, e);
}
fis = new FileInputStream(collectionFile);
isr = new InputStreamReader(fis, decoder);
reader = new BufferedReader(isr);
}
@Override
public void log(byte[] bytes) {
try {
final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
final ByteBuffer in = ByteBuffer.wrap(bytes);
log(decoder.decode(in).toString());
} catch (CharacterCodingException ignored) {
}
}
@Override
protected CharsetDecoder initialValue() {
Charset utf8 = Charset.forName("UTF8");
CharsetDecoder decoder = utf8.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
return decoder;
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
public void test_replaceWith() throws Exception {
CharsetDecoder d = Charset.forName("UTF-16").newDecoder();
d.replaceWith("x");
d.onMalformedInput(CodingErrorAction.REPLACE);
d.onUnmappableCharacter(CodingErrorAction.REPLACE);
ByteBuffer in = ByteBuffer.wrap(new byte[] { 109, 97, 109 });
assertEquals("\u6d61x", d.decode(in).toString());
}
public static String stringCharset(ByteBuffer bytes, String charset) throws InvalidDataException {
CharsetDecoder decode = Charset.forName(charset).newDecoder();
decode.onMalformedInput(codingErrorAction);
decode.onUnmappableCharacter(codingErrorAction);
String s;
try {
bytes.mark();
s = decode.decode(bytes).toString();
bytes.reset();
} catch (CharacterCodingException e) {
throw new InvalidDataException(CloseFrame.NO_UTF8, e);
}
return s;
}
public static String stringUtf8( ByteBuffer bytes ) throws InvalidDataException {
CharsetDecoder decode = Charset.forName( "UTF8" ).newDecoder();
decode.onMalformedInput( codingErrorAction );
decode.onUnmappableCharacter( codingErrorAction );
// decode.replaceWith( "X" );
String s;
try {
bytes.mark();
s = decode.decode( bytes ).toString();
bytes.reset();
} catch ( CharacterCodingException e ) {
throw new InvalidDataException( CloseFrame.NO_UTF8, e );
}
return s;
}
public static String stringUtf8( ByteBuffer bytes ) throws InvalidDataException {
CharsetDecoder decode = Charset.forName( "UTF8" ).newDecoder();
decode.onMalformedInput( codingErrorAction );
decode.onUnmappableCharacter( codingErrorAction );
// decode.replaceWith( "X" );
String s;
try {
bytes.mark();
s = decode.decode( bytes ).toString();
bytes.reset();
} catch ( CharacterCodingException e ) {
throw new InvalidDataException( CloseFrame.NO_UTF8, e );
}
return s;
}
@SuppressWarnings("unchecked")
private LoginContext login(BufferedReader body) throws IOException, LoginException {
try {
Subject subject = new Subject();
String parametersLine = body.readLine();
// Throws UnsupportedEncodingException.
byte[] utf8Bytes = parametersLine.getBytes("UTF-8");
CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder();
utf8Decoder.onMalformedInput(CodingErrorAction.IGNORE);
utf8Decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
// Throws CharacterCodingException.
CharBuffer parsed = utf8Decoder.decode(ByteBuffer.wrap(utf8Bytes));
parametersLine = parsed.toString();
MultiMap<String> parameters = new UrlEncoded(parametersLine);
CallbackHandler callbackHandler = new HttpRequestBasedCallbackHandler(parameters);
LoginContext context = new LoginContext("Wave", subject, callbackHandler, configuration);
// If authentication fails, login() will throw a LoginException.
context.login();
return context;
} catch (CharacterCodingException cce) {
throw new LoginException("Character coding exception (not utf-8): "
+ cce.getLocalizedMessage());
} catch (UnsupportedEncodingException uee) {
throw new LoginException("ad character encoding specification: " + uee.getLocalizedMessage());
}
}
public static String stringUtf8( ByteBuffer bytes ) throws InvalidDataException {
CharsetDecoder decode = Charset.forName( "UTF8" ).newDecoder();
decode.onMalformedInput( codingErrorAction );
decode.onUnmappableCharacter( codingErrorAction );
String s;
try {
bytes.mark();
s = decode.decode( bytes ).toString();
bytes.reset();
} catch ( CharacterCodingException e ) {
throw new InvalidDataException( CloseFrame.NO_UTF8, e );
}
return s;
}
/**
* @tests java.nio.charset.CharsetDecoder#decode(java.nio.ByteBuffer)
*/
public void test_decode() throws CharacterCodingException {
// Regression for HARMONY-33
// ByteBuffer bb = ByteBuffer.allocate(1);
// bb.put(0, (byte) 77);
// CharsetDecoder decoder = Charset.forName("UTF-16").newDecoder();
// decoder.onMalformedInput(CodingErrorAction.REPLACE);
// decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
// decoder.decode(bb);
// Regression for HARMONY-67
// byte[] b = new byte[] { (byte) 1 };
// ByteBuffer buf = ByteBuffer.wrap(b);
// CharBuffer charbuf = Charset.forName("UTF-16").decode(buf);
// assertEquals("Assert 0: charset UTF-16", 1, charbuf.length());
//
// charbuf = Charset.forName("UTF-16BE").decode(buf);
// assertEquals("Assert 1: charset UTF-16BE", 0, charbuf.length());
//
// charbuf = Charset.forName("UTF-16LE").decode(buf);
// assertEquals("Assert 2: charset UTF16LE", 0, charbuf.length());
// Regression for HARMONY-99
CharsetDecoder decoder2 = Charset.forName("UTF-16").newDecoder();
decoder2.onMalformedInput(CodingErrorAction.REPORT);
decoder2.onUnmappableCharacter(CodingErrorAction.REPORT);
ByteBuffer in = ByteBuffer.wrap(new byte[] { 109, 97, 109 });
try {
decoder2.decode(in);
fail("Assert 3: MalformedInputException should have thrown");
} catch (MalformedInputException e) {
//expected
}
}
@Override
protected Def checkMeasuredInternal(FileObject fo,
SearchListener listener) {
MappedByteBuffer bb = null;
FileChannel fc = null;
try {
listener.fileContentMatchingStarted(fo.getPath());
File file = FileUtil.toFile(fo);
// Open the file and then get a channel from the stream
FileInputStream fis = new FileInputStream(file);
fc = fis.getChannel();
// Get the file's size and then map it into memory
int sz = (int) fc.size();
bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
// if (asciiPattern && !matchesIgnoringEncoding(bb)) {
// return null;
//}
// Decode the file into a char buffer
Charset charset = FileEncodingQuery.getEncoding(fo);
CharsetDecoder decoder = prepareDecoder(charset);
decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
CharBuffer cb = decoder.decode(bb);
List<TextDetail> textDetails = matchWholeFile(cb, fo);
if (textDetails == null) {
return null;
} else {
Def def = new Def(fo, decoder.charset(), textDetails);
return def;
}
} catch (Exception e) {
listener.generalError(e);
return null;
} finally {
if (fc != null) {
try {
fc.close();
} catch (IOException ex) {
listener.generalError(ex);
}
}
MatcherUtils.unmap(bb);
}
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
final Charset inputCharset = Charset.forName(context.getProperty(INPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
final Charset outputCharset = Charset.forName(context.getProperty(OUTPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
final CharBuffer charBuffer = CharBuffer.allocate(MAX_BUFFER_SIZE);
final CharsetDecoder decoder = inputCharset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
decoder.replaceWith("?");
final CharsetEncoder encoder = outputCharset.newEncoder();
encoder.onMalformedInput(CodingErrorAction.REPLACE);
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
encoder.replaceWith("?".getBytes(outputCharset));
try {
final StopWatch stopWatch = new StopWatch(true);
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
try (final BufferedReader reader = new BufferedReader(new InputStreamReader(rawIn, decoder), MAX_BUFFER_SIZE);
final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(rawOut, encoder), MAX_BUFFER_SIZE)) {
int charsRead;
while ((charsRead = reader.read(charBuffer)) != -1) {
charBuffer.flip();
writer.write(charBuffer.array(), 0, charsRead);
}
writer.flush();
}
}
});
session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
logger.info("successfully converted characters from {} to {} for {}",
new Object[]{inputCharset, outputCharset, flowFile});
session.transfer(flowFile, REL_SUCCESS);
} catch (final Exception e) {
throw new ProcessException(e);
}
}
public String readLine(Charset charset, byte delimiter) throws IOException {
CharsetDecoder decoder = charset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
int delim = delimiter&0xff;
int rc;
int offset = 0;
StringBuilder sb = null;
CoderResult res;
while ((rc = read())!=-1) {
if (rc == delim) {
break;
}
barray[offset++] = (byte)rc;
if (barray.length == offset) {
bbuff.position(0);
bbuff.limit(barray.length);
cbuff.position(0);
cbuff.limit(carray.length);
res = decoder.decode(bbuff, cbuff, false);
if (res.isError()) {
throw new IOException("Decoding error: " + res.toString());
}
offset = bbuff.remaining();
switch (offset) {
default:
System.arraycopy(barray, bbuff.position(), barray, 0, bbuff
.remaining());
break;
case 2:
barray[1] = barray[barray.length - 1];
barray[0] = barray[barray.length - 2];
break;
case 1:
barray[0] = barray[barray.length - 1];
break;
case 0:
}
if (sb == null) {
sb = new StringBuilder(cbuff.position());
}
sb.append(carray, 0, cbuff.position());
}
}
if (sb == null) {
if (rc == -1 && offset == 0) {
// We are at EOF with nothing read
return null;
}
sb = new StringBuilder();
}
bbuff.position(0);
bbuff.limit(offset);
cbuff.position(0);
cbuff.limit(carray.length);
res = decoder.decode(bbuff, cbuff, true);
if (res.isError()) {
System.out.println("Error");
}
sb.append(carray, 0, cbuff.position());
cbuff.position(0);
res = decoder.flush(cbuff);
if (res.isError()) {
System.out.println("Error");
}
sb.append(carray, 0, cbuff.position());
return sb.toString();
}