下面列出了java.nio.charset.StandardCharsets#UTF_16BE ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* @return the content PDF text string as defined in Chap 7.9 of PDF 32000-1:2008.
*/
public String getString()
{
// text string - BOM indicates Unicode
if (bytes.length >= 2)
{
if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF)
{
// UTF-16BE
return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16BE);
}
else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE)
{
// UTF-16LE - not in the PDF spec!
return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16LE);
}
}
// otherwise use PDFDocEncoding
return PDFDocEncoding.toString(bytes);
}
private static char[] byteToCharArray(final byte[] bytes) {
Charset cs = StandardCharsets.UTF_8;
int start = 0;
// BOM detection.
if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
start = 2;
cs = StandardCharsets.UTF_16BE;
} else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
start = 2;
cs = StandardCharsets.UTF_16LE;
} else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
start = 3;
cs = StandardCharsets.UTF_8;
} else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
start = 4;
cs = Charset.forName("UTF-32LE");
} else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
start = 4;
cs = Charset.forName("UTF-32BE");
}
return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
private static char[] byteToCharArray(final byte[] bytes) {
Charset cs = StandardCharsets.UTF_8;
int start = 0;
// BOM detection.
if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
start = 2;
cs = StandardCharsets.UTF_16BE;
} else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
start = 2;
cs = StandardCharsets.UTF_16LE;
} else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
start = 3;
cs = StandardCharsets.UTF_8;
} else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
start = 4;
cs = Charset.forName("UTF-32LE");
} else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
start = 4;
cs = Charset.forName("UTF-32BE");
}
return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
private static char[] byteToCharArray(final byte[] bytes) {
Charset cs = StandardCharsets.UTF_8;
int start = 0;
// BOM detection.
if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
start = 2;
cs = StandardCharsets.UTF_16BE;
} else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
start = 4;
cs = Charset.forName("UTF-32LE");
} else {
start = 2;
cs = StandardCharsets.UTF_16LE;
}
} else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
start = 3;
cs = StandardCharsets.UTF_8;
} else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
start = 4;
cs = Charset.forName("UTF-32BE");
}
return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
private static char[] byteToCharArray(final byte[] bytes) {
Charset cs = StandardCharsets.UTF_8;
int start = 0;
// BOM detection.
if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
start = 2;
cs = StandardCharsets.UTF_16BE;
} else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
start = 4;
cs = Charset.forName("UTF-32LE");
} else {
start = 2;
cs = StandardCharsets.UTF_16LE;
}
} else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
start = 3;
cs = StandardCharsets.UTF_8;
} else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
start = 4;
cs = Charset.forName("UTF-32BE");
}
return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
public void createEncodedString(String address, String string, Charset encoding,
boolean nullTerminate) throws Exception {
byte[] bytes = string.getBytes(encoding);
if (encoding == StandardCharsets.US_ASCII || encoding == StandardCharsets.UTF_8) {
if (nullTerminate) {
bytes = Arrays.copyOf(bytes, bytes.length + 1);
}
setBytes(address, bytes);
applyDataType(address, new StringDataType(), 1);
}
else if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) {
if (nullTerminate) {
bytes = Arrays.copyOf(bytes, bytes.length + 2);
setBytes(address, bytes);
applyDataType(address, new TerminatedUnicodeDataType(), 1);
}
else {
setBytes(address, bytes);
}
}
else {
setBytes(address, bytes);
}
}
@SuppressWarnings("unchecked")
private void setEncoding(Charset encoding) throws Exception {
JComboBox<Charset> encodingOptions =
(JComboBox<Charset>) findComponentByName(pane, "Encoding Options", false);
// Makes encoding UTF_16 in case encoding is UTF_16BE or UTF_16LE
// BE and LE are not choices in the combo box.
if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) {
encoding = StandardCharsets.UTF_16;
}
for (int i = 0; i < encodingOptions.getItemCount(); i++) {
if (encodingOptions.getItemAt(i) == encoding) {
int index = i;
runSwing(() -> encodingOptions.setSelectedIndex(index));
break;
}
}
}
private static char[] byteToCharArray(final byte[] bytes) {
Charset cs = StandardCharsets.UTF_8;
int start = 0;
// BOM detection.
if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
start = 2;
cs = StandardCharsets.UTF_16BE;
} else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
start = 2;
cs = StandardCharsets.UTF_16LE;
} else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
start = 3;
cs = StandardCharsets.UTF_8;
} else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
start = 4;
cs = Charset.forName("UTF-32LE");
} else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
start = 4;
cs = Charset.forName("UTF-32BE");
}
return new String(bytes, start, bytes.length - start, cs).toCharArray();
}
private String getDecodedString(byte[] raw) {
int encid = raw[0] & 0xFF;
int len = raw.length;
String v = "";
try {
int ID3_ENC_LATIN = 0x00;
int ID3_ENC_UTF8 = 0x03;
int ID3_ENC_UTF16BE = 0x02;
int ID3_ENC_UTF16LE = 0x01;
if (encid == ID3_ENC_LATIN) {
v = new String(raw, 1, len - 1, StandardCharsets.ISO_8859_1);
} else if (encid == ID3_ENC_UTF8) {
v = new String(raw, 1, len - 1, StandardCharsets.UTF_8);
} else if (encid == ID3_ENC_UTF16LE) {
v = new String(raw, 3, len - 3, StandardCharsets.UTF_16LE);
} else if (encid == ID3_ENC_UTF16BE) {
v = new String(raw, 3, len - 3, StandardCharsets.UTF_16BE);
}
} catch (Exception ignored) {
}
return v;
}
private static Charset charsetFor(BOMInputStream bis) throws IOException {
ByteOrderMark bom = bis.getBOM();
if (ByteOrderMark.UTF_8.equals(bom)) {
return StandardCharsets.UTF_8;
}
if (ByteOrderMark.UTF_16LE.equals(bom)) {
return StandardCharsets.UTF_16LE;
}
if (ByteOrderMark.UTF_16BE.equals(bom)) {
return StandardCharsets.UTF_16BE;
}
return StandardCharsets.UTF_8;
}
@Test
public void writeUTF16() throws IOException {
MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
String body = "H\u00e9llo W\u00f6rld";
this.converter.write(body, contentType, outputMessage);
assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}
BomReader(InputStream inputStream) throws IOException {
super(inputStream);
Charset encoding;
byte[] bom = new byte[MAX_BOM_SIZE];
// read first 3 bytes such that they can be pushed back later
PushbackInputStream pushbackStream = new PushbackInputStream(inputStream, MAX_BOM_SIZE);
int bytesRead = ByteStreams.read(pushbackStream, bom, 0, 3);
// look for BOM and adapt, defauling to UTF-8
if (bytesRead >= 3 && bom[0] == X_EF && bom[1] == X_BB && bom[2] == X_BF) {
encoding = StandardCharsets.UTF_8;
pushbackStream.unread(bom, 3, (bytesRead - 3));
} else if (bytesRead >= 2 && bom[0] == X_FE && bom[1] == X_FF) {
encoding = StandardCharsets.UTF_16BE;
pushbackStream.unread(bom, 2, (bytesRead - 2));
} else if (bytesRead >= 2 && bom[0] == X_FF && bom[1] == X_FE) {
encoding = StandardCharsets.UTF_16LE;
pushbackStream.unread(bom, 2, (bytesRead - 2));
} else {
encoding = StandardCharsets.UTF_8;
pushbackStream.unread(bom, 0, bytesRead);
}
// use Java standard code now we know the encoding
this.underlying = new InputStreamReader(pushbackStream, encoding);
}
@Test
public void testSerializationWithAnotherCharset() {
final Charset charset = StandardCharsets.UTF_16BE;
final String string = "之掃描古籍版實乃姚鼐的";
final byte[] bytes = string.getBytes(charset);
assertArrayEquals(bytes, new SimpleStringSchema(charset).serialize(string));
assertEquals(string, new SimpleStringSchema(charset).deserialize(bytes));
}
@Test
public void toMessageUtf16() {
MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter();
MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE);
Map<String, Object> map = new HashMap<>();
map.put(MessageHeaders.CONTENT_TYPE, contentType);
MessageHeaders headers = new MessageHeaders(map);
String payload = "H\u00e9llo W\u00f6rld";
Message<?> message = converter.toMessage(payload, headers);
assertEquals("\"" + payload + "\"", new String((byte[]) message.getPayload(), StandardCharsets.UTF_16BE));
assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE));
}
@Test
public void toMessageUtf16String() {
MappingJackson2MessageConverter converter = new MappingJackson2MessageConverter();
converter.setSerializedPayloadClass(String.class);
MimeType contentType = new MimeType("application", "json", StandardCharsets.UTF_16BE);
Map<String, Object> map = new HashMap<>();
map.put(MessageHeaders.CONTENT_TYPE, contentType);
MessageHeaders headers = new MessageHeaders(map);
String payload = "H\u00e9llo W\u00f6rld";
Message<?> message = converter.toMessage(payload, headers);
assertEquals("\"" + payload + "\"", message.getPayload());
assertEquals(contentType, message.getHeaders().get(MessageHeaders.CONTENT_TYPE));
}
@Test
public void writeUTF16() throws IOException {
MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
String body = "H\u00e9llo W\u00f6rld";
converter.write(body, contentType, outputMessage);
assertEquals("Invalid result", "\"" + body + "\"", outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}
@Test
public void writeUTF16() throws IOException {
MediaType contentType = new MediaType("application", "json", StandardCharsets.UTF_16BE);
MockHttpOutputMessage outputMessage = new MockHttpOutputMessage();
String body = "H\u00e9llo W\u00f6rld";
this.converter.write(body, contentType, outputMessage);
assertEquals("Invalid result", body, outputMessage.getBodyAsString(StandardCharsets.UTF_16BE));
assertEquals("Invalid content-type", contentType, outputMessage.getHeaders().getContentType());
}
public static String readShortUTF16BEString(ByteBuf buf) {
return new String(Utils.readBytes(buf, buf.readUnsignedShort() * 2), StandardCharsets.UTF_16BE);
}
private static String readUTF16String(HInput input) throws IOException {
int length = input.readUnsignedShort() << 1;
byte[] encoded = input.readByteArray(-length);
return new String(encoded, StandardCharsets.UTF_16BE);
}
/**
* Determine the XML charset
*
* @param aBytes
* XML byte representation
* @return <code>null</code> if no charset was found. In that case you might
* wanna try UTF-8 as the fallback.
*/
@Nullable
public static Charset determineXMLCharset (@Nonnull final byte [] aBytes)
{
ValueEnforcer.notNull (aBytes, "Bytes");
Charset aParseCharset = null;
int nSearchOfs = 0;
if (aBytes.length > 0)
{
// Check if a BOM is present
// Read at maximum 4 bytes (max BOM bytes)
try (
NonBlockingByteArrayInputStream aIS = new NonBlockingByteArrayInputStream (aBytes,
0,
Math.min (EUnicodeBOM.getMaximumByteCount (),
aBytes.length)))
{
// Check for BOM first
final InputStreamAndCharset aISC = CharsetHelper.getInputStreamAndCharsetFromBOM (aIS);
if (aISC.hasBOM ())
{
// A BOM was found, but not necessarily a charset could uniquely be
// identified - skip the
// BOM bytes and continue determination from there
nSearchOfs = aISC.getBOM ().getByteCount ();
}
if (aISC.hasCharset ())
{
// A BOM was found, and that BOM also has a unique charset assigned
aParseCharset = aISC.getCharset ();
}
}
}
// No charset found and enough bytes left?
if (aParseCharset == null && aBytes.length - nSearchOfs >= 4)
if (_match (aBytes, nSearchOfs, CS_UTF32_BE))
aParseCharset = CHARSET_UTF_32BE;
else
if (_match (aBytes, nSearchOfs, CS_UTF32_LE))
aParseCharset = CHARSET_UTF_32LE;
else
if (_match (aBytes, nSearchOfs, CS_UTF16_BE))
aParseCharset = StandardCharsets.UTF_16BE;
else
if (_match (aBytes, nSearchOfs, CS_UTF16_LE))
aParseCharset = StandardCharsets.UTF_16LE;
else
if (_match (aBytes, nSearchOfs, CS_UTF8))
aParseCharset = StandardCharsets.UTF_8;
else
if (_match (aBytes, nSearchOfs, CS_EBCDIC))
aParseCharset = CHARSET_EBCDIC;
else
if (_match (aBytes, nSearchOfs, CS_IBM290))
aParseCharset = CHARSET_IBM290;
if (aParseCharset == null)
{
// Fallback charset is always UTF-8
aParseCharset = FALLBACK_CHARSET;
}
// Now read with a reader
return _parseXMLEncoding (aBytes, nSearchOfs, aParseCharset);
}