下面列出了org.apache.commons.io.ByteOrderMark#UTF_16LE ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws HopException {
String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() );
String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() );
String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() );
try ( FileObject fileObject = HopVfs.getFileObject( fileName );
BOMInputStream inputStream =
new BOMInputStream( HopVfs.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE ) ) {
InputStreamReader reader = null;
if ( Utils.isEmpty( realEncoding ) ) {
reader = new InputStreamReader( inputStream );
} else {
reader = new InputStreamReader( inputStream, realEncoding );
}
EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() );
String line =
TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder(
1000 ) );
String[] fieldNames = TextFileLineUtil.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() );
if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) {
removeEnclosure( fieldNames, csvInputMeta.getEnclosure() );
}
trimFieldNames( fieldNames );
return fieldNames;
} catch ( IOException e ) {
throw new HopFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e );
}
}
public static XMLEventReader createEventReader(InputStream stream) throws XMLStreamException {
Charset charset = UTF8Charset.get();
BOMInputStream bomStream = new BOMInputStream(stream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE);
try {
if (bomStream.hasBOM())
charset = Charset.forName(bomStream.getBOMCharsetName());
} catch (IOException e) {
throw new XMLStreamException(e);
}
XMLInputFactory factory = XMLInputFactory.newInstance();
return factory.createXMLEventReader(new XMLFixInputStreamReader(bomStream, charset));
}
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
throws IOException {
try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
charset)) {
CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
char[] buffer = new char[bufferSize];
int read;
while ((read = encodedStreamReader.read(buffer)) > 0) {
// Convert encoded stream to UTF8 since server digest is UTF8
ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
.encode(CharBuffer.wrap(buffer, 0, read));
if (isRequireLineEndingConvert) {
ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
clientLineEnding);
update(convert.array(), convert.arrayOffset(), convert.limit());
} else {
update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
utf8ByteBuffer.limit());
}
}
}
}
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
new FileInputStream(testResourceFile),
false,
ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE)) {
byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
return convert.convert(buf).limit();
}
}
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
throws IOException {
try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
charset)) {
CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
char[] buffer = new char[bufferSize];
int read;
while ((read = encodedStreamReader.read(buffer)) > 0) {
// Convert encoded stream to UTF8 since server digest is UTF8
ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
.encode(CharBuffer.wrap(buffer, 0, read));
if (isRequireLineEndingConvert) {
ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
clientLineEnding);
update(convert.array(), convert.arrayOffset(), convert.limit());
} else {
update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
utf8ByteBuffer.limit());
}
}
}
}
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
new FileInputStream(testResourceFile),
false,
ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE)) {
byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
return convert.convert(buf).limit();
}
}
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
throws IOException {
try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
charset)) {
CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
char[] buffer = new char[bufferSize];
int read;
while ((read = encodedStreamReader.read(buffer)) > 0) {
// Convert encoded stream to UTF8 since server digest is UTF8
ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
.encode(CharBuffer.wrap(buffer, 0, read));
if (isRequireLineEndingConvert) {
ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
clientLineEnding);
update(convert.array(), convert.arrayOffset(), convert.limit());
} else {
update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
utf8ByteBuffer.limit());
}
}
}
}
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
new FileInputStream(testResourceFile),
false,
ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE)) {
byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
return convert.convert(buf).limit();
}
}
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
throws IOException {
try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
charset)) {
CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
char[] buffer = new char[bufferSize];
int read;
while ((read = encodedStreamReader.read(buffer)) > 0) {
// Convert encoded stream to UTF8 since server digest is UTF8
ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
.encode(CharBuffer.wrap(buffer, 0, read));
if (isRequireLineEndingConvert) {
ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
clientLineEnding);
update(convert.array(), convert.arrayOffset(), convert.limit());
} else {
update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
utf8ByteBuffer.limit());
}
}
}
}
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
new FileInputStream(testResourceFile),
false,
ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_16BE)) {
byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
return convert.convert(buf).limit();
}
}
private static @Nonnull BOMInputStream bomInputStream(@Nonnull InputStream inputStream) {
return new BOMInputStream(
inputStream,
ByteOrderMark.UTF_8,
ByteOrderMark.UTF_16BE,
ByteOrderMark.UTF_16LE,
ByteOrderMark.UTF_32BE,
ByteOrderMark.UTF_32LE);
}
/**
* Return a Reader that reads the InputStream in the character set specified by the BOM. If no BOM is found, a default character set is used.
*/
public static Reader getCharsetDetectingInputStreamReader(InputStream inputStream, String defaultCharset) throws IOException {
BOMInputStream bOMInputStream = new BOMInputStream(inputStream,ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
ByteOrderMark bom = bOMInputStream.getBOM();
String charsetName = bom == null ? defaultCharset : bom.getCharsetName();
return new InputStreamReader(new BufferedInputStream(bOMInputStream), charsetName);
}