下面列出了怎么用com.google.common.base.Utf8的API类实例代码及写法,或者点击链接到github查看源代码。
@Test
public void test_is_well_formed_utf_8() {
for (int b = 0xA0; b <= 0xBF; b++) {
for (int b2 = 0; b2 < 0xFF; b2++) {
final byte[] bytes = new byte[]{'a', 'b', 'c', (byte) 0xED, (byte) b, (byte) b2, 'd', 'e', 'f'};
final ByteBuf buf = Unpooled.buffer();
buf.writeShort(bytes.length);
buf.writeBytes(bytes);
//checking both original guava method for byte-arrays and the ByteBuf implementation
assertFalse(Utf8Utils.isWellFormed(buf, bytes.length));
assertFalse(Utf8.isWellFormed(bytes));
buf.release();
}
}
}
@Nullable
private static String getSanitizedId(@Nonnull String id) {
try {
if (Utf8.encodedLength(id) > MAX_TR_ID_SIZE) {
if (CharMatcher.ascii().matchesAllOf(id)) {
// Most of the time, the string will be of ascii characters, so return a truncated ID based on length
return id.substring(0, MAX_TR_ID_SIZE - 3) + "...";
} else {
// In theory, we could try and split the UTF-16 string and find a string that fits, but that
// is fraught with peril, not the least of which because one might accidentally split a low
// surrogate/high surrogate pair.
return null;
}
} else {
return id;
}
} catch (IllegalArgumentException e) {
return null;
}
}
/**
* Validate if value can be decoded by given charset.
*
* @param value nls string in byte array
* @param charset charset
* @throws RuntimeException If the given value cannot be represented in the
* given charset
*/
public static void validateCharset(ByteString value, Charset charset) {
if (charset == StandardCharsets.UTF_8) {
final byte[] bytes = value.getBytes();
if (!Utf8.isWellFormed(bytes)) {
//CHECKSTYLE: IGNORE 1
final String string = new String(bytes, charset);
throw RESOURCE.charsetEncoding(string, charset.name()).ex();
}
}
}
/**
* Validate if value can be decoded by given charset.
*
* @param value nls string in byte array
* @param charset charset
* @throws RuntimeException If the given value cannot be represented in the
* given charset
*/
public static void validateCharset(ByteString value, Charset charset) {
if (charset == StandardCharsets.UTF_8) {
final byte[] bytes = value.getBytes();
if (!Utf8.isWellFormed(bytes)) {
//CHECKSTYLE: IGNORE 1
final String string = new String(bytes, charset);
throw RESOURCE.charsetEncoding(string, charset.name()).ex();
}
}
}
/**
* Decodes and optionally validates a publish payload
* <p>
* Results in {@link Mqtt5DisconnectReasonCode#PAYLOAD_FORMAT_INVALID} with DISCONNECT by:
* <p>
* - payloadFormatIndicator == UTF-8 AND validatePayloadFormat = true AND payload is not UTF-8 well formed.
*
* @param channel the channel of the mqtt client
* @param buf the encoded ByteBuf of the message
* @param payloadLength the length of the payload
* @param payloadFormatIndicator the nullable {@link Mqtt5PayloadFormatIndicator}
* @param validatePayloadFormat the configured boolean for payload validation (default false)
* @return the payload as a byte[] or {@code null} if this method disconnected.
*/
protected @Nullable byte[] decodePayload(final @NotNull Channel channel,
final @NotNull ByteBuf buf,
final int payloadLength,
final @Nullable Mqtt5PayloadFormatIndicator payloadFormatIndicator,
final boolean validatePayloadFormat) {
final byte[] payload;
if (payloadLength > 0) {
payload = new byte[payloadLength];
buf.readBytes(payload);
if (Mqtt5PayloadFormatIndicator.UTF_8 == payloadFormatIndicator) {
if (validatePayloadFormat) {
if (!Utf8.isWellFormed(payload)) {
disconnector.disconnect(channel,
"A client (IP: {}) sent a PUBLISH with an invalid UTF-8 payload. This is not allowed. Disconnecting client.",
"Sent a PUBLISH with an invalid UTF-8 payload",
Mqtt5DisconnectReasonCode.PAYLOAD_FORMAT_INVALID,
ReasonStrings.DISCONNECT_PAYLOAD_FORMAT_INVALID_PUBLISH);
return null;
}
}
}
} else {
payload = emptyPayload;
}
return payload;
}
/**
* Checks whether the given UTF-8 encoded byte array contains characters a UTF-8 encoded String must not according
* to the MQTT 5 specification.
* <p>
* These characters are the null character U+0000 and UTF-16 surrogates.
*
* @param binary the UTF-8 encoded byte array.
* @return whether the binary data contains characters a UTF-8 encoded String must not.
*/
public static boolean containsMustNotCharacters(@NotNull final byte[] binary) {
if (!Utf8.isWellFormed(binary)) {
return true;
}
for (final byte b : binary) {
if (b == 0) {
return true;
}
}
return false;
}
/**
* 计算字符串被UTF8编码后的字节数 via guava
*
* @see Utf8#encodedLength(CharSequence)
*/
public static int utf8EncodedLength(@Nullable CharSequence sequence) {
if (StringUtils.isEmpty(sequence)) {
return 0;
}
return Utf8.encodedLength(sequence);
}
/**
* 计算字符串被UTF8编码后的字节数 via guava
*
* @see Utf8#encodedLength(CharSequence)
*/
public static int utf8EncodedLength(@Nullable CharSequence sequence) {
if (StringUtils.isEmpty(sequence)) {
return 0;
}
return Utf8.encodedLength(sequence);
}
/**
* 计算字符串被UTF8编码后的字节数 via guava
*
* @see Utf8#encodedLength(CharSequence)
*/
public static int utf8EncodedLength(@Nullable CharSequence sequence) {
if (StringUtils.isEmpty(sequence)) {
return 0;
}
return Utf8.encodedLength(sequence);
}
public static void sendMessageSplitLarge(PlayerContext ctx, Text text) {
String json = TextSerializers.JSON.serialize(text);
int size = Utf8.encodedLength(json);
if (size > 32767) {
List<Text> lines = ctx.utils().splitLines(text, ctx.width);
ctx.getPlayer().sendMessages(lines);
} else {
ctx.getPlayer().sendMessage(text);
}
}
/**
* Item to show on chat message under this text.
*
* @param item {@link ItemStack}
* @return instance of same {@link UltimateFancy}.
*/
public UltimateFancy hoverShowItem(ItemStack item) {
JSONObject jItem = parseHoverItem(item);
if (Utf8.encodedLength(jItem.toJSONString()) > 32767)
pendentElements.add(new ExtraElement("hoverEvent", parseHoverItem(new ItemStack(item.getType()))));
pendentElements.add(new ExtraElement("hoverEvent", jItem));
return this;
}
private JSONObject parseHoverItem(ItemStack item) {
JSONObject obj = new JSONObject();
obj.put("action", "show_item");
String jItem = convertItemStackToJson(item);
if (Utf8.encodedLength(jItem) > 32767)
obj.put("value", convertItemStackToJson(new ItemStack(item.getType())));
obj.put("value", jItem);
return obj;
}
@Test
public void sanity(){
String s = "Hello, World!";
Doc doc = createDoc("testField", s);
Map<String, Object> origMap = new HashMap<>(doc.getSource());
DocSizeProcessor sizeProcessor = createProcessor(DocSizeProcessor.class);
ProcessResult processResult = sizeProcessor.process(doc);
assertThat(processResult.isSucceeded()).isTrue();
assertThat(doc.hasField("docSize")).isTrue();
assertThat((int) doc.getField("docSize")).isEqualTo(Utf8.encodedLength(JsonUtils.toJsonString(origMap)));
}
@Test
public void differentLangTest(){
String s = "こんにちは世界!";
Doc doc = createDoc("testField", s);
Map<String, Object> origMap = new HashMap<>(doc.getSource());
DocSizeProcessor sizeProcessor = createProcessor(DocSizeProcessor.class);
ProcessResult processResult = sizeProcessor.process(doc);
assertThat(processResult.isSucceeded()).isTrue();
assertThat(doc.hasField("docSize")).isTrue();
assertThat((int) doc.getField("docSize")).isEqualTo(Utf8.encodedLength(JsonUtils.toJsonString(origMap)));
}
/**
* Item to show on chat message under this text.
*
* @param item {@link ItemStack}
* @return instance of same {@link UltimateFancy}.
*/
public UltimateFancy hoverShowItem(ItemStack item) {
JSONObject jItem = parseHoverItem(item);
if (Utf8.encodedLength(jItem.toJSONString()) > 32767)
pendentElements.add(new ExtraElement("hoverEvent", parseHoverItem(new ItemStack(item.getType()))));
pendentElements.add(new ExtraElement("hoverEvent", jItem));
return this;
}
private JSONObject parseHoverItem(ItemStack item) {
JSONObject obj = new JSONObject();
obj.put("action", "show_item");
String jItem = convertItemStackToJson(item);
if (Utf8.encodedLength(jItem) > 32767)
obj.put("value", convertItemStackToJson(new ItemStack(item.getType())));
obj.put("value", jItem);
return obj;
}
/**
* Validate if value can be decoded by given charset.
*
* @param value nls string in byte array
* @param charset charset
* @throws RuntimeException If the given value cannot be represented in the
* given charset
*/
public static void validateCharset(ByteString value, Charset charset) {
if (charset == StandardCharsets.UTF_8) {
final byte[] bytes = value.getBytes();
if (!Utf8.isWellFormed(bytes)) {
//CHECKSTYLE: IGNORE 1
final String string = new String(bytes, charset);
throw RESOURCE.charsetEncoding(string, charset.name()).ex();
}
}
}
public static int shortLengthStringSize(@Nullable final String string) {
return Short.BYTES + ((string == null) ? 0 : Utf8.encodedLength(string));
}
@Override
public void read(
final AsnObjectSerializationContext context,
final AsnCharStringBasedObjectCodec instance,
final InputStream inputStream
) throws IOException {
Objects.requireNonNull(context);
Objects.requireNonNull(instance);
Objects.requireNonNull(inputStream);
// WARNING: This length can be maliciously specified by the packet creator, so be careful not to use it for unsafe
// operations, such as creating a new array of initial size `length`. This usage is safe because it merely caps the
// InputStream to the specified packet-length, whereas the InputStream is authoritative for when it actually ends,
// and this limit may be well smaller than `length`.
int lengthToRead;
final AsnSizeConstraint sizeConstraint = instance.getSizeConstraint();
if (sizeConstraint.isFixedSize()) {
lengthToRead = sizeConstraint.getMax();
} else {
// Read the lengthToRead of the encoded OctetString...
lengthToRead = OerLengthSerializer.readLength(inputStream);
}
final String result;
/* beware the 0-lengthToRead string */
if (lengthToRead == 0) {
result = "";
} else {
// Use a limited input stream so we don't read too many bytes.
final InputStream limitedInputStream = ByteStreams.limit(inputStream, lengthToRead);
// WARNING: Don't close the InputStreamReader so that the underlying inputStream is not closed.
result = CharStreams.toString(new InputStreamReader(limitedInputStream, instance.getCharacterSet().name()));
// For UTF-8 characters, result.length() will report the viewable length (e.g., 3) but for certain encoded
// characters, the actual byte-length will be larger (e.g., the String 元元元 is 3 viewable bytes, but 9 encoded
// UTF-8 bytes). Thus, when we write the length-prefix, the code will write 9, so when we read, we need to
// validate that 9 bytes were read, and not 3 (in this example).
if (Utf8.encodedLength(result) != lengthToRead) {
throw new IOException(
format("Unable to properly decode %s bytes (could only read %s bytes)", lengthToRead, result.length())
);
}
}
instance.setCharString(result);
}
@Override
public ProcessResult process(Doc doc) {
String sourceAsJsonString = JsonUtils.toJsonString(doc.getSource());
doc.addField(targetField, Utf8.encodedLength(sourceAsJsonString));
return ProcessResult.success();
}