下面列出了java.io.FilterReader#java.nio.charset.CodingErrorAction 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public static byte[] convertCharsToBytes(char[] src, Charset charset) {
CharsetEncoder encoder = charset.newEncoder();
byte[] resultArray = new byte[(int) (src.length * encoder.maxBytesPerChar())];
if (src.length != 0) {
CharBuffer charBuffer = CharBuffer.wrap(src);
ByteBuffer byteBuffer = ByteBuffer.wrap(resultArray);
encoder.onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE);
encoder.reset();
CoderResult coderResult = encoder.encode(charBuffer, byteBuffer, true);
if (coderResult.isUnderflow()) {
coderResult = encoder.flush(byteBuffer);
if (coderResult.isUnderflow()) {
if (resultArray.length != byteBuffer.position()) {
resultArray = Arrays.copyOf(resultArray, byteBuffer.position());
}
}
}
}
return resultArray;
}
public static void decodeReplace (byte[] input, char[] expectedOutput) throws CharacterCodingException {
ByteBuffer inputBB = ByteBuffer.wrap(input);
CharBuffer outputCB;
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
outputCB = decoder.decode(inputBB);
outputCB.rewind();
assertEqualChars2("Decoded charactes must match!",
expectedOutput,
outputCB.array(),
input);
// assertTrue("Decoded charactes (REPLACEed ones INCLUSIVE) must match!",
// Arrays.equals(expectedOutput, outputCB.array()));
// assertEqualChars("Decoded charactes (REPLACEed ones INCLUSIVE) must match!",
// expectedOutput,
// outputCB.array());
// assertEquals("Decoded charactes must match!",
// String.valueOf(allChars),
// outputCB.toString());
}
/**
* Returns a cached thread-local {@link CharsetEncoder} for the specified
* <tt>charset</tt>.
*/
public static CharsetEncoder getEncoder(Charset charset) {
if (charset == null) {
throw new NullPointerException("charset");
}
Map<Charset, CharsetEncoder> map = encoders.get();
CharsetEncoder e = map.get(charset);
if (e != null) {
e.reset();
e.onMalformedInput(CodingErrorAction.REPLACE);
e.onUnmappableCharacter(CodingErrorAction.REPLACE);
return e;
}
e = charset.newEncoder();
e.onMalformedInput(CodingErrorAction.REPLACE);
e.onUnmappableCharacter(CodingErrorAction.REPLACE);
map.put(charset, e);
return e;
}
/**
* Truncates a string to the number of characters that fit in X bytes avoiding multi byte
* characters being cut in half at the cut off point. Also handles surrogate pairs where 2
* characters in the string is actually one literal character.
*
* <p>Based on:
* https://stackoverflow.com/a/35148974/1260237
* http://www.jroller.com/holy/entry/truncating_utf_string_to_the
*/
private static String truncateToFitUtf8ByteLength(final String s, final int maxBytes) {
if (s == null) {
return null;
}
Charset charset = StandardCharsets.UTF_8;
CharsetDecoder decoder = charset.newDecoder();
byte[] sba = s.getBytes(charset);
if (sba.length <= maxBytes) {
return s;
}
final int maxTruncatedBytes = maxBytes - ELLIPSIS.getBytes(charset).length;
// Ensure truncation by having byte buffer = maxTruncatedBytes
ByteBuffer bb = ByteBuffer.wrap(sba, 0, maxTruncatedBytes);
CharBuffer cb = CharBuffer.allocate(maxTruncatedBytes);
// Ignore an incomplete character
decoder.onMalformedInput(CodingErrorAction.IGNORE);
decoder.decode(bb, cb, true);
decoder.flush(cb);
return new String(cb.array(), 0, cb.position()) + ELLIPSIS;
}
/**
* Constructor.
*
* @param cs the input character sequence
* @param charset the character set name to use
* @param bufferSize the buffer size to use.
* @throws IllegalArgumentException if the buffer is not large enough to hold a complete character
*/
public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
super();
this.encoder = charset.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
// Ensure that buffer is long enough to hold a complete character
final float maxBytesPerChar = encoder.maxBytesPerChar();
if (bufferSize < maxBytesPerChar) {
throw new IllegalArgumentException("Buffer size " + bufferSize + " is less than maxBytesPerChar " +
maxBytesPerChar);
}
this.bbuf = ByteBuffer.allocate(bufferSize);
this.bbuf.flip();
this.cbuf = CharBuffer.wrap(cs);
this.mark_cbuf = NO_MARK;
this.mark_bbuf = NO_MARK;
}
/**
* Creates a new charset converted that decodes/encodes bytes in the
* specified non-null from/to charset objects specified.
*
* @param fromCharset
* @param toCharset
* @param ignoreBOM - true to ignore any byte order marks written by the UTF-16
* charset and omit them from all return byte buffers
*/
public CharsetConverter(Charset fromCharset, Charset toCharset,
boolean ignoreBOM) {
// Create decoder that reports malformed/unmappable values
this.decoder = fromCharset.newDecoder();
this.decoder.onMalformedInput(CodingErrorAction.REPORT);
this.decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
// Create encoder that reports malformed/unmappable values
this.encoder = toCharset.newEncoder();
this.encoder.onMalformedInput(CodingErrorAction.REPORT);
this.encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
// Check bom on UTF-16 since Java writes a BOM on each call to encode
if ("UTF-16".equals(toCharset.name())) {
checkBOM = true;
}
this.ignoreBOM = ignoreBOM;
}
public CharsetDecoder getDecoder(String encodingName, boolean ignoreEncodingErrors) {
Charset cs = (this.charset == null)
? Charset.forName(encodingName)
: this.charset;
CharsetDecoder decoder = cs.newDecoder();
CodingErrorAction action;
if (ignoreEncodingErrors)
action = CodingErrorAction.REPLACE;
else
action = CodingErrorAction.REPORT;
return decoder
.onMalformedInput(action)
.onUnmappableCharacter(action);
}
private void assertCorrectEncoding(byte[] expected, String input, String charsetName)
throws IOException {
Charset cs = Charset.forName(charsetName);
CharsetEncoder encoder = cs.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
ByteBuffer bb = encoder.encode(CharBuffer.wrap(input.toCharArray()));
byte[] result = new byte[bb.remaining()];
bb.get(result);
Assert.assertArrayEquals(expected, result);
bb = cs.encode(CharBuffer.wrap(input.toCharArray()));
result = new byte[bb.remaining()];
bb.get(result);
Assert.assertArrayEquals(expected, result);
Assert.assertArrayEquals(expected, input.getBytes(charsetName));
Assert.assertArrayEquals(expected, input.getBytes(cs));
}
/**
* Convert a filename from Java´s native UTF-16 to OS native character encoding.
*
* @param fileName The UTF-16 filename string.
* @return Natively encoded string for the OS.
*/
private static String convertToNativeEncoding(String fileName, boolean isPath) {
String ret = fileName;
ret = removeIllegalCharacters(ret, isPath);
//convert our filename to OS encoding...
try {
final CharsetEncoder charsetEncoder = Charset.defaultCharset().newEncoder();
charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); // otherwise breaks on first unconvertable char
charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
charsetEncoder.replaceWith(new byte[]{'_'});
final ByteBuffer buf = charsetEncoder.encode(CharBuffer.wrap(ret));
if (buf.hasArray()) {
ret = new String(buf.array());
}
//remove NUL character from conversion...
ret = ret.replaceAll("\\u0000", "");
} catch (CharacterCodingException e) {
e.printStackTrace();
}
return ret;
}
public CharsetDecoder getDecoder(String encodingName, boolean ignoreEncodingErrors) {
Charset cs = (this.charset == null)
? Charset.forName(encodingName)
: this.charset;
CharsetDecoder decoder = cs.newDecoder();
CodingErrorAction action;
if (ignoreEncodingErrors)
action = CodingErrorAction.REPLACE;
else
action = CodingErrorAction.REPORT;
return decoder
.onMalformedInput(action)
.onUnmappableCharacter(action);
}
/**
* Construct a terminal emulator that uses the supplied screen
*
* @param session the terminal session the emulator is attached to
* @param screen the screen to render characters into.
* @param columns the number of columns to emulate
* @param rows the number of rows to emulate
* @param scheme the default color scheme of this emulator
*/
public TerminalEmulator(TermSession session, TranscriptScreen screen, int columns, int rows, ColorScheme scheme) {
mSession = session;
mMainBuffer = screen;
mScreen = mMainBuffer;
mAltBuffer = new TranscriptScreen(columns, rows, rows, scheme);
mRows = rows;
mColumns = columns;
mTabStop = new boolean[mColumns];
setColorScheme(scheme);
mUTF8ByteBuffer = ByteBuffer.allocate(4);
mInputCharBuffer = CharBuffer.allocate(2);
mUTF8Decoder = Charset.forName("UTF-8").newDecoder();
mUTF8Decoder.onMalformedInput(CodingErrorAction.REPLACE);
mUTF8Decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
reset();
}
/**
* Creates a new charset converted that decodes/encodes bytes in the
* specified non-null from/to charset objects specified.
*
* @param fromCharset
* @param toCharset
* @param ignoreBOM
* - true to ignore any byte order marks written by the UTF-16
* charset and omit them from all return byte buffers
*/
public CharsetConverter(Charset fromCharset, Charset toCharset,
boolean ignoreBOM) {
// Create decoder that reports malformed/unmappable values
this.decoder = fromCharset.newDecoder();
this.decoder.onMalformedInput(CodingErrorAction.REPORT);
this.decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
// Create encoder that reports malformed/unmappable values
this.encoder = toCharset.newEncoder();
this.encoder.onMalformedInput(CodingErrorAction.REPORT);
this.encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
// Check bom on UTF-16 since Java writes a BOM on each call to encode
if ("UTF-16".equals(toCharset.name())) {
checkBOM = true;
}
this.ignoreBOM = ignoreBOM;
}
@Test
public void whenUsingCharsetEncoder_thenOK()
throws CharacterCodingException {
final String inputString = "Hello ਸੰਸਾਰ!";
CharsetEncoder encoder = StandardCharsets.US_ASCII.newEncoder();
encoder.onMalformedInput(CodingErrorAction.IGNORE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.replaceWith(new byte[] { 0 });
byte[] byteArrray = encoder.encode(CharBuffer.wrap(inputString))
.array();
System.out.printf(
"Using encode with CharsetEncoder:%s, Input String:%s, Output byte array:%s\n",
encoder, inputString, Arrays.toString(byteArrray));
assertArrayEquals(
new byte[] { 72, 101, 108, 108, 111, 32, 0, 0, 0, 0, 0, 33 },
byteArrray);
}
/**
* Constructs a new encoder instance that iterates over {@code string}, converting
* it to bytes using the charset {@code charset}.
*
* <p>The encoder reads up to {@code stepSize} characters at the same time,
* buffering the results internally. {@code stepSize} must be at least 2 (this is to
* ensure that surrogate pairs are processed correctly).
*
* @param string the string to iterate over, must not be {@code null}
* @param charset the charset to use for encoding characters to bytes, must not be {@code null}
* @param stepSize the number to characters to try encoding in each encoding step,
* must be positive
*
* @throws NullPointerException if {@code string} or {@code charset} is {@code null}
* @throws IllegalArgumentException if {@code stepSize} is lesser than 2
*/
public CharsetEncoderByteIterator(String string, Charset charset, int stepSize) {
Objects.requireNonNull(string);
Check.gt(stepSize, 1);
// use the same settings as String.getBytes(Charset)
this.encoder = charset.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.reset();
this.string = string;
this.idx = 0;
this.byteIdx = 0;
this.flushed = false;
// no need to allocate more chars than what the string can give us
stepSize = Math.min(stepSize, string.length());
stepSize = Math.max(2, stepSize); // but ensure we can always handle surrogate pairs
this.in = CharBuffer.allocate(stepSize);
int outBufferSize = (int) ((stepSize + 1) * encoder.maxBytesPerChar());
this.out = ByteBuffer.allocate(outBufferSize);
out.flip();
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(
java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
if (replace) {
decoder.onMalformedInput(java.nio.charset.CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
String str = decoder.decode(utf8).toString();
// set decoder back to its default value: REPORT
if (replace) {
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return str;
}
@Override
protected CharsetDecoder initialValue() {
Charset utf8 = Charset.forName("UTF8");
CharsetDecoder decoder = utf8.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPORT);
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
return decoder;
}
/**
* Construct a new {@link ReaderInputStream}.
*
* @param reader the target {@link Reader}
* @param charset the charset encoding
* @param bufferSize the size of the input buffer in number of characters
*/
public ReaderInputStream(final Reader reader, final Charset charset, final int bufferSize) {
this(reader,
charset.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE),
bufferSize);
}
public BinaryDecoder(int initialSize, Charset charset, Consumer<int[]> onChar) {
if (initialSize < 2) {
throw new IllegalArgumentException("Initial size must be at least 2");
}
decoder = charset.newDecoder();
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
decoder.onMalformedInput(CodingErrorAction.REPLACE);
bBuf = EMPTY;
cBuf = CharBuffer.allocate(initialSize); // We need at least 2
this.onChar = onChar;
}
private StringEncoder(Charset cs, String rcn) {
this.requestedCharsetName = rcn;
this.cs = cs;
this.ce = cs.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
this.isTrusted = (cs.getClass().getClassLoader() == null);
}
static byte[] encode(Charset cs, char[] ca, int off, int len) {
CharsetEncoder ce = cs.newEncoder();
int en = scale(len, ce.maxBytesPerChar());
byte[] ba = new byte[en];
if (len == 0)
return ba;
boolean isTrusted = false;
if (System.getSecurityManager() != null) {
if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
ca = Arrays.copyOfRange(ca, off, off + len);
off = 0;
}
}
ce.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.reset();
if (ce instanceof ArrayEncoder) {
int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
return safeTrim(ba, blen, cs, isTrusted);
} else {
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca, off, len);
try {
CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
throw new Error(x);
}
return safeTrim(ba, bb.position(), cs, isTrusted);
}
}
private StringDecoder(Charset cs, String rcn) {
this.requestedCharsetName = rcn;
this.cs = cs;
this.cd = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
this.isTrusted = (cs.getClass().getClassLoader0() == null);
}
public void testLoadFromFileWithWrongEncoding() {
try {
Yaml yaml = new Yaml();
InputStream input = new FileInputStream("src/test/resources/issues/issue68.txt");
CharsetDecoder decoder = Charset.forName("Cp1252").newDecoder();
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
Object text = yaml.load(new InputStreamReader(input, decoder));
input.close();
fail("Invalid UTF-8 must not be accepted: " + text.toString());
} catch (Exception e) {
assertTrue(e.getMessage().endsWith("Exception: Input length = 1"));
}
}
public static CharsetDecoder createDecoder(final ConnectionConfig cconfig) {
if (cconfig == null) {
return null;
}
final Charset charset = cconfig.getCharset();
final CodingErrorAction malformed = cconfig.getMalformedInputAction();
final CodingErrorAction unmappable = cconfig.getUnmappableInputAction();
if (charset != null) {
return charset.newDecoder()
.onMalformedInput(malformed != null ? malformed : CodingErrorAction.REPORT)
.onUnmappableCharacter(unmappable != null ? unmappable: CodingErrorAction.REPORT);
} else {
return null;
}
}
private void initialize() {
// Connection 配置
ConnectionConfig connectionConfig = ConnectionConfig.custom().setMalformedInputAction(CodingErrorAction.IGNORE)
.setUnmappableInputAction(CodingErrorAction.IGNORE).setCharset(Consts.UTF_8).build();
coverCA();
httpClientContext();
HttpClientBuilder httpClientBuilder = initHttpClient(createHttpClientConnPool(connectionConfig),
requestConfig(), redirectStrategy(), retryPolicy());
if (HttpUtils.userAgent != null) httpClientBuilder.setUserAgent(userAgent);
httpClient = httpClientBuilder.build();
}
public void test_replaceWith() throws Exception {
Charset ascii = Charset.forName("US-ASCII");
CharsetEncoder e = ascii.newEncoder();
e.onMalformedInput(CodingErrorAction.REPLACE);
e.onUnmappableCharacter(CodingErrorAction.REPLACE);
e.replaceWith("=".getBytes("US-ASCII"));
String input = "hello\u0666world";
String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString();
assertEquals("hello=world", output);
}
private CharsetDecoder decoder() {
if (dec == null) {
dec = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return dec;
}
/**
* Returns a cached thread-local {@link CharsetEncoder} for the specified {@link Charset}.
*
* @param charset The specified charset
* @return The encoder for the specified {@code charset}
* 为指定的字符集返回缓存的线程本地CharsetEncoder。
*/
public static CharsetEncoder encoder(Charset charset) {
checkNotNull(charset, "charset");
Map<Charset, CharsetEncoder> map = InternalThreadLocalMap.get().charsetEncoderCache();
CharsetEncoder e = map.get(charset);
if (e != null) {
e.reset().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE);
return e;
}
e = encoder(charset, CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
map.put(charset, e);
return e;
}
public StringDecoder (@Nonnull final Charset aCharset)
{
ValueEnforcer.notNull (aCharset, "Charset");
m_aDecoder = aCharset.newDecoder ();
// This matches the default behaviour for String(byte[], "UTF-8");
// TODO: Support throwing exceptions on invalid input?
m_aDecoder.onMalformedInput (CodingErrorAction.REPLACE);
}
/**
* Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is true, then malformed
* input is replaced with the
* substitution character, which is U+FFFD. Otherwise the method throws a
* MalformedInputException.
*
* @return ByteBuffer: bytes stores at ByteBuffer.array() and length is
* ByteBuffer.limit()
*/
public static ByteBuffer encode(final String string, final boolean replace) throws CharacterCodingException {
final CharsetEncoder encoder = ENCODER_FACTORY.get();
if (replace) {
encoder.onMalformedInput(CodingErrorAction.REPLACE);
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray()));
if (replace) {
encoder.onMalformedInput(CodingErrorAction.REPORT);
encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return bytes;
}