下面列出了怎么用org.apache.commons.io.ByteOrderMark的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Attempts to sniff an encoding from a <a href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Byte Order Mark</a>
* in the specified byte array.
*
* @param bytes the bytes to check for a Byte Order Mark
* @return the encoding sniffed from the specified bytes, or {@code null} if the encoding
* could not be determined
*/
static Charset sniffEncodingFromUnicodeBom(final byte[] bytes) {
if (bytes == null) {
return null;
}
Charset encoding = null;
if (startsWith(bytes, ByteOrderMark.UTF_8)) {
encoding = UTF_8;
}
else if (startsWith(bytes, ByteOrderMark.UTF_16BE)) {
encoding = UTF_16BE;
}
else if (startsWith(bytes, ByteOrderMark.UTF_16LE)) {
encoding = UTF_16LE;
}
if (encoding != null && LOG.isDebugEnabled()) {
LOG.debug("Encoding found in Unicode Byte Order Mark: '" + encoding + "'.");
}
return encoding;
}
/**
* @throws Exception if the test fails
*/
@Test
@Alerts("\u0623\u0647\u0644\u0627\u064b\u0623\u0647\u0644\u0627"
+ "\u064b\u0623\u0647\u0644\u0627\u064b\u0623\u0647\u0644\u0627\u064b")
public void incorrectCharset() throws Exception {
final String html
= "<html><head>\n"
+ " <script src='" + URL_SECOND + "' charset='" + ISO_8859_1 + "'></script>\n"
+ "</head>\n"
+ "<body></body>\n"
+ "</html>";
final String script = new String(ByteOrderMark.UTF_8.getBytes())
+ "alert('" + "\u0623\u0647\u0644\u0627\u064b\u0623\u0647\u0644\u0627"
+ "\u064b\u0623\u0647\u0644\u0627\u064b\u0623\u0647\u0644\u0627\u064b" + "');";
getMockWebConnection().setResponse(URL_SECOND, script, MimeType.APPLICATION_JAVASCRIPT, UTF_8);
loadPageWithAlerts2(html);
}
/**
* Attempts to sniff an encoding from a <a href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Byte Order Mark</a>
* in the specified byte array.
*
* @param bytes the bytes to check for a Byte Order Mark
* @return the encoding sniffed from the specified bytes, or {@code null} if the encoding
* could not be determined
*/
static Charset sniffEncodingFromUnicodeBom(final byte[] bytes) {
if (bytes == null) {
return null;
}
Charset encoding = null;
if (startsWith(bytes, ByteOrderMark.UTF_8)) {
encoding = UTF_8;
}
else if (startsWith(bytes, ByteOrderMark.UTF_16BE)) {
encoding = UTF_16BE;
}
else if (startsWith(bytes, ByteOrderMark.UTF_16LE)) {
encoding = UTF_16LE;
}
if (encoding != null && LOG.isDebugEnabled()) {
LOG.debug("Encoding found in Unicode Byte Order Mark: '" + encoding + "'.");
}
return encoding;
}
private final boolean checkBom(ByteOrderMark bom) {
int bomLength = bom.length();
if (bufferPtr + bomLength >= length) {
// Not enough bytes from the current position to the end of the buffer
return false;
}
if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
buffer.checkBytes(bufferPtr - 1, bufferPtr + bomLength);
}
byte[] bomBytes = bom.getBytes();
for (int i = 0; i < bomLength; i++) {
byte nextChar = PlatformDependent.getByte(bStartMinus1 + bufferPtr + i);
if (nextChar != bomBytes[i]) {
// No BOM. Position is unchanged
return false;
}
}
return true;
}
@Test
public void testBomUtf8() throws Exception {
// Simple .csv file with a UTF-8 BOM. Should read successfully
File testFolder = tempDir.newFolder("testUtf8Folder");
File testFile = new File(testFolder, "utf8.csv");
PrintStream p = new PrintStream(testFile);
p.write(ByteOrderMark.UTF_8.getBytes(), 0, ByteOrderMark.UTF_8.length());
p.print("A,B\n");
p.print("5,7\n");
p.close();
testBuilder()
.sqlQuery(String.format("select * from table(dfs.\"%s\" (type => 'text', " +
"fieldDelimiter => ',', lineDelimiter => '\n', extractHeader => true))",
testFile.getAbsolutePath()))
.unOrdered()
.baselineColumns("A","B")
.baselineValues("5", "7")
.go();
}
@Test
public void testErrorBomUtf16() throws Exception {
// UTF-16 BOM should cause a dataReadError user exception
File testFolder = tempDir.newFolder("testUtf16Folder");
File testFile = new File(testFolder, "utf16.csv");
PrintStream p = new PrintStream(testFile);
p.write(ByteOrderMark.UTF_16LE.getBytes(), 0, ByteOrderMark.UTF_16LE.length());
p.print("A,B\n");
p.print("5,7\n");
p.close();
thrownException.expect(new UserExceptionMatcher(UserBitShared.DremioPBError.ErrorType.DATA_READ,
"DATA_READ ERROR: UTF-16 files not supported"));
// NB: using test() instead of testBuilder() because it unwraps the thrown RpcException and re-throws the
// underlying UserException (which is then matched with the UserExceptionMatcher)
test(String.format("select * from table(dfs.\"%s\" (type => 'text', " +
"fieldDelimiter => ',', lineDelimiter => '\n', extractHeader => true))",
testFile.getAbsolutePath()));
}
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws HopException {
String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() );
String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() );
String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() );
try ( FileObject fileObject = HopVfs.getFileObject( fileName );
BOMInputStream inputStream =
new BOMInputStream( HopVfs.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE ) ) {
InputStreamReader reader = null;
if ( Utils.isEmpty( realEncoding ) ) {
reader = new InputStreamReader( inputStream );
} else {
reader = new InputStreamReader( inputStream, realEncoding );
}
EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() );
String line =
TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder(
1000 ) );
String[] fieldNames = TextFileLineUtil.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() );
if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) {
removeEnclosure( fieldNames, csvInputMeta.getEnclosure() );
}
trimFieldNames( fieldNames );
return fieldNames;
} catch ( IOException e ) {
throw new HopFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e );
}
}
/**
* <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
*
* Returns the response content as a string, using the specified charset,
* rather than the charset/encoding specified in the server response.
* If there is a bom header the charset parameter will be overwritten by the bom.
* @param encoding the charset/encoding to use to convert the response content into a string
* @param ignoreUtf8Bom if true utf8 bom header will be ignored
* @return the response content as a string or null if the content retrieval was failing
*/
public String getContentAsString(final Charset encoding, final boolean ignoreUtf8Bom) {
if (responseData_ != null) {
try (InputStream in = responseData_.getInputStreamWithBomIfApplicable(BOM_HEADERS)) {
if (in instanceof BOMInputStream) {
try (BOMInputStream bomIn = (BOMInputStream) in) {
// there seems to be a bug in BOMInputStream
// we have to call this before hasBOM(ByteOrderMark)
if (bomIn.hasBOM()) {
if (!ignoreUtf8Bom && bomIn.hasBOM(ByteOrderMark.UTF_8)) {
return IOUtils.toString(bomIn, UTF_8);
}
if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
return IOUtils.toString(bomIn, UTF_16BE);
}
if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
return IOUtils.toString(bomIn, UTF_16LE);
}
}
return IOUtils.toString(bomIn, encoding);
}
}
return IOUtils.toString(in, encoding);
}
catch (final IOException e) {
LOG.warn(e.getMessage(), e);
}
}
return null;
}
/**
* <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
*
* Returns the response content as a string, using the specified charset,
* rather than the charset/encoding specified in the server response.
* If there is a bom header the charset parameter will be overwritten by the bom.
* @param encoding the charset/encoding to use to convert the response content into a string
* @param ignoreUtf8Bom if true utf8 bom header will be ignored
* @return the response content as a string or null if the content retrieval was failing
*/
public String getContentAsString(final Charset encoding, final boolean ignoreUtf8Bom) {
if (responseData_ != null) {
try (InputStream in = responseData_.getInputStream()) {
if (in != null) {
try (BOMInputStream bomIn = new BOMInputStream(in, BOM_HEADERS)) {
// there seems to be a bug in BOMInputStream
// we have to call this before hasBOM(ByteOrderMark)
if (bomIn.hasBOM()) {
if (!ignoreUtf8Bom && bomIn.hasBOM(ByteOrderMark.UTF_8)) {
return IOUtils.toString(bomIn, UTF_8);
}
if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
return IOUtils.toString(bomIn, UTF_16BE);
}
if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
return IOUtils.toString(bomIn, UTF_16LE);
}
}
return IOUtils.toString(bomIn, encoding);
}
}
}
catch (final IOException e) {
LOG.warn(e);
}
}
return null;
}
public int compare(ByteOrderMark bom1, ByteOrderMark bom2) {
int len1 = bom1.length();
int len2 = bom2.length();
if (len1 > len2) {
return -1;
}
if (len2 > len1) {
return 1;
}
return 0;
}
/**
* Constructs a new BOM InputStream that detects the specified BOMs and optionally includes them.
*
* @param delegate
* the InputStream to delegate to
* @param include
* true to include the specified BOMs or false to exclude them
* @param boms
* The BOMs to detect and optionally exclude
*/
public BOMInputStream(InputStream delegate, boolean include, ByteOrderMark... boms) {
super(delegate);
if (boms == null || boms.length == 0) {
throw new IllegalArgumentException("No BOMs specified");
}
this.include = include;
// Sort the BOMs to match the longest BOM first because some BOMs have the same starting two bytes.
Arrays.sort(boms, ByteOrderMarkLengthComparator);
this.boms = Arrays.asList(boms);
}
/**
* Return the BOM (Byte Order Mark).
*
* @return The BOM or null if none
* @throws IOException
* if an error reading the first bytes of the stream occurs
*/
public ByteOrderMark getBOM() throws IOException {
if (firstBytes == null) {
fbLength = 0;
// BOMs are sorted from longest to shortest
final int maxBomSize = boms.get(0).length();
firstBytes = new int[maxBomSize];
// Read first maxBomSize bytes
for (int i = 0; i < firstBytes.length; i++) {
firstBytes[i] = in.read();
fbLength++;
if (firstBytes[i] < 0) {
break;
}
}
// match BOM in firstBytes
byteOrderMark = find();
if (byteOrderMark != null) {
if (!include) {
if (byteOrderMark.length() < firstBytes.length) {
fbIndex = byteOrderMark.length();
} else {
fbLength = 0;
}
}
}
}
return byteOrderMark;
}
/**
* Find a BOM with the specified bytes.
*
* @return The matched BOM or null if none matched
*/
private ByteOrderMark find() {
for (ByteOrderMark bom : boms) {
if (matches(bom)) {
return bom;
}
}
return null;
}
/**
* Check if the bytes match a BOM.
*
* @param bom
* The BOM
* @return true if the bytes match the bom, otherwise false
*/
private boolean matches(ByteOrderMark bom) {
// if (bom.length() != fbLength) {
// return false;
// }
// firstBytes may be bigger than the BOM bytes
for (int i = 0; i < bom.length(); i++) {
if (bom.get(i) != firstBytes[i]) {
return false;
}
}
return true;
}
public int compare(final ByteOrderMark bom1, final ByteOrderMark bom2) {
final int len1 = bom1.length();
final int len2 = bom2.length();
if (len1 > len2) {
return EOF;
}
if (len2 > len1) {
return 1;
}
return 0;
}
/**
* Constructs a new BOM InputStream that detects the specified BOMs and optionally includes them.
*
* @param delegate
* the InputStream to delegate to
* @param include
* true to include the specified BOMs or false to exclude them
* @param boms
* The BOMs to detect and optionally exclude
*/
public BOMInputStream(final InputStream delegate, final boolean include, final ByteOrderMark... boms) {
super(delegate);
if (boms == null || boms.length == 0) {
throw new IllegalArgumentException("No BOMs specified");
}
this.include = include;
// Sort the BOMs to match the longest BOM first because some BOMs have the same starting two bytes.
Arrays.sort(boms, ByteOrderMarkLengthComparator);
this.boms = Arrays.asList(boms);
}
/**
* Return the BOM (Byte Order Mark).
*
* @return The BOM or null if none
* @throws IOException
* if an error reading the first bytes of the stream occurs
*/
public ByteOrderMark getBOM() throws IOException {
if (firstBytes == null) {
fbLength = 0;
// BOMs are sorted from longest to shortest
final int maxBomSize = boms.get(0).length();
firstBytes = new int[maxBomSize];
// Read first maxBomSize bytes
for (int i = 0; i < firstBytes.length; i++) {
firstBytes[i] = in.read();
fbLength++;
if (firstBytes[i] < 0) {
break;
}
}
// match BOM in firstBytes
byteOrderMark = find();
if (byteOrderMark != null) {
if (!include) {
if (byteOrderMark.length() < firstBytes.length) {
fbIndex = byteOrderMark.length();
} else {
fbLength = 0;
}
}
}
}
return byteOrderMark;
}
/**
* Find a BOM with the specified bytes.
*
* @return The matched BOM or null if none matched
*/
private ByteOrderMark find() {
for (final ByteOrderMark bom : boms) {
if (matches(bom)) {
return bom;
}
}
return null;
}
/**
* Check if the bytes match a BOM.
*
* @param bom
* The BOM
* @return true if the bytes match the bom, otherwise false
*/
private boolean matches(final ByteOrderMark bom) {
// if (bom.length() != fbLength) {
// return false;
// }
// firstBytes may be bigger than the BOM bytes
for (int i = 0; i < bom.length(); i++) {
if (bom.get(i) != firstBytes[i]) {
return false;
}
}
return true;
}
@Override
public void startPartition(WritePartition partition) throws Exception {
if(this.partition != null){
close();
}
this.partition = partition;
// open a new file for writing data with new schema
try {
this.path = fs.canonicalizePath(partition.qualified(location, prefix + "_" + index + "." + extension));
dos = new DataOutputStream(fs.create(path));
stream = new PrintStream(dos);
stream.write(ByteOrderMark.UTF_8.getBytes(), 0, ByteOrderMark.UTF_8.length());
logger.debug("Created file: {}", path);
} catch (IOException e) {
throw UserException.dataWriteError(e)
.message("Failure while attempting to write file %s.", path)
.build(logger);
}
index++;
String columns = Joiner.on(fieldDelimiter).join(columnNames);
stream.print(columns);
stream.print(lineDelimiter);
}
private final void skipOptionalBOM() throws IOException {
if (checkBom(ByteOrderMark.UTF_8)) {
bufferPtr += ByteOrderMark.UTF_8.length();
} else if (checkBom(ByteOrderMark.UTF_16LE) || checkBom(ByteOrderMark.UTF_16BE)) {
throw UserException.dataReadError()
.message("UTF-16 files not supported")
.build(logger);
}
}
public static XMLEventReader createEventReader(InputStream stream) throws XMLStreamException {
Charset charset = UTF8Charset.get();
BOMInputStream bomStream = new BOMInputStream(stream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE);
try {
if (bomStream.hasBOM())
charset = Charset.forName(bomStream.getBOMCharsetName());
} catch (IOException e) {
throw new XMLStreamException(e);
}
XMLInputFactory factory = XMLInputFactory.newInstance();
return factory.createXMLEventReader(new XMLFixInputStreamReader(bomStream, charset));
}
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
throws IOException {
try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
charset)) {
CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
char[] buffer = new char[bufferSize];
int read;
while ((read = encodedStreamReader.read(buffer)) > 0) {
// Convert encoded stream to UTF8 since server digest is UTF8
ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
.encode(CharBuffer.wrap(buffer, 0, read));
if (isRequireLineEndingConvert) {
ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
clientLineEnding);
update(convert.array(), convert.arrayOffset(), convert.limit());
} else {
update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
utf8ByteBuffer.limit());
}
}
}
}
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
new FileInputStream(testResourceFile),
false,
ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE)) {
byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
return convert.convert(buf).limit();
}
}
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
throws IOException {
try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
charset)) {
CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
char[] buffer = new char[bufferSize];
int read;
while ((read = encodedStreamReader.read(buffer)) > 0) {
// Convert encoded stream to UTF8 since server digest is UTF8
ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
.encode(CharBuffer.wrap(buffer, 0, read));
if (isRequireLineEndingConvert) {
ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
clientLineEnding);
update(convert.array(), convert.arrayOffset(), convert.limit());
} else {
update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
utf8ByteBuffer.limit());
}
}
}
}
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
new FileInputStream(testResourceFile),
false,
ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE)) {
byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
return convert.convert(buf).limit();
}
}
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
throws IOException {
try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
charset)) {
CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
char[] buffer = new char[bufferSize];
int read;
while ((read = encodedStreamReader.read(buffer)) > 0) {
// Convert encoded stream to UTF8 since server digest is UTF8
ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
.encode(CharBuffer.wrap(buffer, 0, read));
if (isRequireLineEndingConvert) {
ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
clientLineEnding);
update(convert.array(), convert.arrayOffset(), convert.limit());
} else {
update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
utf8ByteBuffer.limit());
}
}
}
}
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
new FileInputStream(testResourceFile),
false,
ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE)) {
byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
return convert.convert(buf).limit();
}
}
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
throws IOException {
try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
charset)) {
CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
char[] buffer = new char[bufferSize];
int read;
while ((read = encodedStreamReader.read(buffer)) > 0) {
// Convert encoded stream to UTF8 since server digest is UTF8
ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
.encode(CharBuffer.wrap(buffer, 0, read));
if (isRequireLineEndingConvert) {
ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
clientLineEnding);
update(convert.array(), convert.arrayOffset(), convert.limit());
} else {
update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
utf8ByteBuffer.limit());
}
}
}
}
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
new FileInputStream(testResourceFile),
false,
ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE)) {
byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
return convert.convert(buf).limit();
}
}