下面列出了org.apache.commons.io.input.BOMInputStream#hasBOM ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/** Parses a <code>robots.txt</code> file contained in a {@link FetchData} and
* returns the corresponding filter as an array of sorted prefixes. HTTP statuses
* different from 2xx are {@linkplain Logger#warn(String) logged}. HTTP statuses of class 4xx
* generate an empty filter. HTTP statuses 2xx/3xx cause the tentative parsing of the
* request content. In the remaining cases we return {@code null}.
*
* @param robotsResponse the response containing <code>robots.txt</code>.
* @param userAgent the string representing the user agent of interest.
* @return an array of character arrays, which are prefixes of the URLs not to follow, in sorted order,
* or {@code null}
*/
public static char[][] parseRobotsResponse(final URIResponse robotsResponse, final String userAgent) throws IOException {
final int status = robotsResponse.response().getStatusLine().getStatusCode();
if (status / 100 != 2) LOGGER.info("Got status " + status + " while fetching robots: URL was " + robotsResponse.uri());
if (status / 100 == 4 || status / 100 == 5) return EMPTY_ROBOTS_FILTER; // For status 4xx and 5xx, we consider everything allowed.
if (status / 100 != 2 && status / 100 != 3) return null; // For status 2xx and 3xx we parse the content. For the rest, we consider everything forbidden.
// See if BOM is present and compute its length
BOMInputStream bomInputStream = new BOMInputStream(robotsResponse.response().getEntity().getContent(), true);
int bomLength = bomInputStream.hasBOM()? bomInputStream.getBOM().length() : 0;
// Skip BOM, if necessary
bomInputStream.skip(bomLength);
// Parse robots (BOM is ignored, robots are UTF-8, as suggested by https://developers.google.com/search/reference/robots_txt
char[][] result = parseRobotsReader(new InputStreamReader(bomInputStream, Charsets.UTF_8), userAgent);
if (LOGGER.isDebugEnabled()) LOGGER.debug("Robots for {} successfully got with status {}: {}", robotsResponse.uri(), Integer.valueOf(status), toString(result));
return result;
}
/**
* Get content from {@link java.nio.file.Path} using UTF8
*
* @param path
* @return
* @throws CommandException
*/
public String getFileContent(Path path) {
try {
File file = path.toFile();
BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(file), false, boms);
String fileContent;
if (inputStream.hasBOM()) {
fileContent = IOUtils.toString(inputStream, inputStream.getBOMCharsetName());
} else {
fileContent = IOUtils.toString(inputStream, StandardCharsets.UTF_8);
}
return fileContent;
} catch (IOException e) {
throw new UncheckedIOException("Cannot get file content for path: " + path.toString(), e);
}
}
/**
* Writes the content into a file using same format as source file
*
* @param content content to be written
* @param path path to the file
* @param sourceFileMatch
* @throws CommandException
*/
public void writeFileContent(String content, Path path, FileMatch sourceFileMatch) throws CommandException {
try {
File outputFile = path.toFile();
BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(sourceFileMatch.getPath().toFile()), false, boms);
if (inputStream.hasBOM()) {
FileUtils.writeByteArrayToFile(outputFile, inputStream.getBOM().getBytes());
FileUtils.writeByteArrayToFile(outputFile, content.getBytes(inputStream.getBOMCharsetName()), true);
} else {
FileUtils.writeStringToFile(outputFile, content, StandardCharsets.UTF_8);
}
} catch (IOException e) {
throw new CommandException("Cannot write file content in path: " + path.toString(), e);
}
}
public static XMLEventReader createEventReader(InputStream stream) throws XMLStreamException {
Charset charset = UTF8Charset.get();
BOMInputStream bomStream = new BOMInputStream(stream, false,
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE);
try {
if (bomStream.hasBOM())
charset = Charset.forName(bomStream.getBOMCharsetName());
} catch (IOException e) {
throw new XMLStreamException(e);
}
XMLInputFactory factory = XMLInputFactory.newInstance();
return factory.createXMLEventReader(new XMLFixInputStreamReader(bomStream, charset));
}
private List<RefCCDAValidationResult> runValidators(String validationObjective, String referenceFileName,
MultipartFile ccdaFile, boolean curesUpdate, String vocabularyConfig, SeverityLevel severityLevel)
throws SAXException, Exception {
List<RefCCDAValidationResult> validatorResults = new ArrayList<>();
InputStream ccdaFileInputStream = null;
try {
ccdaFileInputStream = ccdaFile.getInputStream();
BOMInputStream bomInputStream = new BOMInputStream(ccdaFileInputStream);
if (bomInputStream.hasBOM()) {
logger.warn(
"The C-CDA file has a BOM which is supposed to be removed by BOMInputStream - encoding w/o BOM: "
+ bomInputStream.getBOMCharsetName());
}
String ccdaFileContents = IOUtils.toString(bomInputStream, "UTF-8");
List<RefCCDAValidationResult> mdhtResults = doMDHTValidation(validationObjective, referenceFileName,
ccdaFileContents, severityLevel);
if (mdhtResults != null && !mdhtResults.isEmpty()) {
logger.info("Adding MDHT results");
validatorResults.addAll(mdhtResults);
}
boolean isSchemaErrorInMdhtResults = mdhtResultsHaveSchemaError(mdhtResults);
boolean isObjectiveAllowingVocabularyValidation = objectiveAllowsVocabularyValidation(validationObjective);
if (!isSchemaErrorInMdhtResults && isObjectiveAllowingVocabularyValidation) {
if (vocabularyConfig == null || vocabularyConfig.isEmpty()) {
logger.warn("Invalid vocabularyConfig of '" + vocabularyConfig != null ? vocabularyConfig
: "null" + "' " + "received. Assigned default config of '"
+ VocabularyConstants.Config.DEFAULT + "'.");
vocabularyConfig = VocabularyConstants.Config.DEFAULT;
}
List<RefCCDAValidationResult> vocabResults = doVocabularyValidation(validationObjective,
referenceFileName, ccdaFileContents, vocabularyConfig, severityLevel);
if (vocabResults != null && !vocabResults.isEmpty()) {
logger.info("Adding Vocabulary results");
validatorResults.addAll(vocabResults);
}
if (objectiveAllowsContentValidation(validationObjective)) {
List<RefCCDAValidationResult> contentResults = doContentValidation(validationObjective,
referenceFileName, ccdaFileContents, curesUpdate, severityLevel);
if (contentResults != null && !contentResults.isEmpty()) {
logger.info("Adding Content results");
validatorResults.addAll(contentResults);
}
} else {
logger.info("Skipping Content validation due to: " + "validationObjective ("
+ (validationObjective != null ? validationObjective : "null objective")
+ ") is not relevant or valid for Content validation");
}
} else {
String separator = !isObjectiveAllowingVocabularyValidation && isSchemaErrorInMdhtResults ? " and "
: "";
logger.info("Skipping Vocabulary (and thus Content) validation due to: "
+ (isObjectiveAllowingVocabularyValidation ? ""
: "validationObjective POSTed: "
+ (validationObjective != null ? validationObjective : "null objective")
+ separator)
+ (isSchemaErrorInMdhtResults ? "C-CDA Schema error(s) found" : ""));
}
} catch (IOException e) {
throw new RuntimeException("Error getting CCDA contents from provided file", e);
} finally {
closeFileInputStream(ccdaFileInputStream);
}
return validatorResults;
}
public RpcInputStream(RpcPerforceFile file, Charset fromCharset) throws IOException, FileEncoderException {
super(file);
if (file == null) {
throw new NullPointerError(
"Null RpcPerforceFile passed to RpcInputStream constructor");
}
this.file = file;
this.fileType = this.file.getFileType();
this.lineEnding = this.file.getLineEnding();
if (this.lineEnding == null) {
this.lineEnding = ClientLineEnding.FST_L_LOCAL;
}
if (this.fileType == null) {
this.fileType = RpcPerforceFileType.FST_TEXT;
}
if (isTextType(this.fileType)) {
if (this.fileType == RpcPerforceFileType.FST_TEXT || this.fileType == RpcPerforceFileType.FST_XTEXT) {
this.lineEndStream = new BufferedInputStream(new FileInputStream(file));
} else {
BOMInputStream bis = new BOMInputStream(new FileInputStream(file), UTF_8, UTF_16LE, UTF_16BE);
if (fromCharset == CharsetDefs.UTF16) {
fromCharset = bis.hasBOM() ? Charset.forName(bis.getBOMCharsetName())
: ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN) ?
Charset.forName("UTF-16BE") : Charset.forName("UTF-16LE");
}
this.lineEndStream = new BufferedInputStream(bis);
}
boolean doLineCvt = ClientLineEnding.needsLineEndFiltering(this.lineEnding);
if (fromCharset != null && fromCharset != CharsetDefs.UTF8) {
this.lineEndStream = new CharsetConverterStream(lineEndStream, fromCharset, doLineCvt);
}
if (doLineCvt) {
this.lineEndStream = new RpcLineEndFilterInputStream(
new BufferedInputStream(lineEndStream), this.lineEnding);
}
}
}
public RpcInputStream(RpcPerforceFile file, Charset fromCharset) throws IOException, FileEncoderException {
super(file);
if (file == null) {
throw new NullPointerError(
"Null RpcPerforceFile passed to RpcInputStream constructor");
}
this.file = file;
this.fileType = this.file.getFileType();
this.lineEnding = this.file.getLineEnding();
if (this.lineEnding == null) {
this.lineEnding = ClientLineEnding.FST_L_LOCAL;
}
if (this.fileType == null) {
this.fileType = RpcPerforceFileType.FST_TEXT;
}
if (isTextType(this.fileType)) {
if (this.fileType == RpcPerforceFileType.FST_TEXT || this.fileType == RpcPerforceFileType.FST_XTEXT) {
this.lineEndStream = new BufferedInputStream(new FileInputStream(file));
} else {
BOMInputStream bis = new BOMInputStream(new FileInputStream(file), UTF_8, UTF_16LE, UTF_16BE);
if (fromCharset == CharsetDefs.UTF16) {
fromCharset = bis.hasBOM() ? Charset.forName(bis.getBOMCharsetName())
: ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN) ?
Charset.forName("UTF-16BE") : Charset.forName("UTF-16LE");
}
this.lineEndStream = new BufferedInputStream(bis);
}
boolean doLineCvt = ClientLineEnding.needsLineEndFiltering(this.lineEnding);
if (fromCharset != null && fromCharset != CharsetDefs.UTF8) {
this.lineEndStream = new CharsetConverterStream(lineEndStream, fromCharset, doLineCvt);
}
if (doLineCvt) {
this.lineEndStream = new RpcLineEndFilterInputStream(
new BufferedInputStream(lineEndStream), this.lineEnding);
}
}
}
public RpcInputStream(RpcPerforceFile file, Charset fromCharset) throws IOException, FileEncoderException {
super(file);
if (file == null) {
throw new NullPointerError(
"Null RpcPerforceFile passed to RpcInputStream constructor");
}
this.file = file;
this.fileType = this.file.getFileType();
this.lineEnding = this.file.getLineEnding();
if (this.lineEnding == null) {
this.lineEnding = ClientLineEnding.FST_L_LOCAL;
}
if (this.fileType == null) {
this.fileType = RpcPerforceFileType.FST_TEXT;
}
if (isTextType(this.fileType)) {
if (this.fileType == RpcPerforceFileType.FST_TEXT || this.fileType == RpcPerforceFileType.FST_XTEXT) {
this.lineEndStream = new BufferedInputStream(new FileInputStream(file));
} else {
BOMInputStream bis = new BOMInputStream(new FileInputStream(file), UTF_8, UTF_16LE, UTF_16BE);
if (fromCharset == CharsetDefs.UTF16) {
fromCharset = bis.hasBOM() ? Charset.forName(bis.getBOMCharsetName())
: ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN) ?
Charset.forName("UTF-16BE") : Charset.forName("UTF-16LE");
}
this.lineEndStream = new BufferedInputStream(bis);
}
boolean doLineCvt = ClientLineEnding.needsLineEndFiltering(this.lineEnding);
if (fromCharset != null && fromCharset != CharsetDefs.UTF8) {
this.lineEndStream = new CharsetConverterStream(lineEndStream, fromCharset, doLineCvt);
}
if (doLineCvt) {
this.lineEndStream = new RpcLineEndFilterInputStream(
new BufferedInputStream(lineEndStream), this.lineEnding);
}
}
}
public RpcInputStream(RpcPerforceFile file, Charset fromCharset) throws IOException, FileEncoderException {
super(file);
if (file == null) {
throw new NullPointerError(
"Null RpcPerforceFile passed to RpcInputStream constructor");
}
this.file = file;
this.fileType = this.file.getFileType();
this.lineEnding = this.file.getLineEnding();
if (this.lineEnding == null) {
this.lineEnding = ClientLineEnding.FST_L_LOCAL;
}
if (this.fileType == null) {
this.fileType = RpcPerforceFileType.FST_TEXT;
}
if (isTextType(this.fileType)) {
if (this.fileType == RpcPerforceFileType.FST_TEXT || this.fileType == RpcPerforceFileType.FST_XTEXT) {
this.lineEndStream = new BufferedInputStream(new FileInputStream(file));
} else {
BOMInputStream bis = new BOMInputStream(new FileInputStream(file), UTF_8, UTF_16LE, UTF_16BE);
if (fromCharset == CharsetDefs.UTF16) {
fromCharset = bis.hasBOM() ? Charset.forName(bis.getBOMCharsetName())
: ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN) ?
Charset.forName("UTF-16BE") : Charset.forName("UTF-16LE");
}
this.lineEndStream = new BufferedInputStream(bis);
}
boolean doLineCvt = ClientLineEnding.needsLineEndFiltering(this.lineEnding);
if (fromCharset != null && fromCharset != CharsetDefs.UTF8) {
this.lineEndStream = new CharsetConverterStream(lineEndStream, fromCharset, doLineCvt);
}
if (doLineCvt) {
this.lineEndStream = new RpcLineEndFilterInputStream(
new BufferedInputStream(lineEndStream), this.lineEnding);
}
}
}