类org.apache.commons.io.input.BOMInputStream源码实例Demo

下面列出了怎么用org.apache.commons.io.input.BOMInputStream的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: quaerite   文件: ElevateQueryComparer.java
private static QuerySet loadQueries(Path file) throws Exception {
    QuerySet querySet = new QuerySet();
    Matcher uc = Pattern.compile("[A-Z]").matcher("");
    try (InputStream is = Files.newInputStream(file)) {
        try (Reader reader = new InputStreamReader(new BOMInputStream(is), "UTF-8")) {
            Iterable<CSVRecord> records = CSVFormat.EXCEL
                    .withFirstRecordAsHeader().parse(reader);
            for (CSVRecord record : records) {
                String q = record.get("query");
                Integer c = Integer.parseInt(record.get("count"));
                if (querySet.queries.containsKey(q)) {
                    LOG.warn("duplicate queries?! >" + q + "<");
                }

                querySet.set(q, c);
            }
        }
    }
    LOG.info("loaded " + querySet.queries.size() + " queries");
    return querySet;
}
 
源代码2 项目: BUbiNG   文件: URLRespectsRobots.java
/** Parses a <code>robots.txt</code> file contained in a {@link FetchData} and
 * returns the corresponding filter as an array of sorted prefixes. HTTP statuses
 * different from 2xx are {@linkplain Logger#warn(String) logged}. HTTP statuses of class 4xx
 * generate an empty filter. HTTP statuses 2xx/3xx cause the tentative parsing of the
 * request content. In the remaining cases we return {@code null}.
 *
 * @param robotsResponse the response containing <code>robots.txt</code>.
 * @param userAgent the string representing the user agent of interest.
 * @return an array of character arrays, which are prefixes of the URLs not to follow, in sorted order,
 * or {@code null}
 */
public static char[][] parseRobotsResponse(final URIResponse robotsResponse, final String userAgent) throws IOException {
	final int status = robotsResponse.response().getStatusLine().getStatusCode();
	if (status / 100 != 2) LOGGER.info("Got status " + status + " while fetching robots: URL was " + robotsResponse.uri());
	if (status / 100 == 4 || status / 100 == 5) return EMPTY_ROBOTS_FILTER; // For status 4xx and 5xx, we consider everything allowed.
	if (status / 100 != 2 && status / 100 != 3) return null; // For status 2xx and 3xx we parse the content. For the rest, we consider everything forbidden.
	// See if BOM is present and compute its length
	BOMInputStream bomInputStream = new BOMInputStream(robotsResponse.response().getEntity().getContent(), true);
	int bomLength = bomInputStream.hasBOM()? bomInputStream.getBOM().length() : 0;
	// Skip BOM, if necessary
	bomInputStream.skip(bomLength);
	// Parse robots (BOM is ignored, robots are UTF-8, as suggested by https://developers.google.com/search/reference/robots_txt
	char[][] result = parseRobotsReader(new InputStreamReader(bomInputStream, Charsets.UTF_8), userAgent);
	if (LOGGER.isDebugEnabled()) LOGGER.debug("Robots for {} successfully got with status {}: {}", robotsResponse.uri(), Integer.valueOf(status), toString(result));
	return result;
}
 
源代码3 项目: rdf4j   文件: TurtleParser.java
/**
 * Implementation of the <tt>parse(InputStream, String)</tt> method defined in the RDFParser interface.
 *
 * @param in      The InputStream from which to read the data, must not be <tt>null</tt>. The InputStream is
 *                supposed to contain UTF-8 encoded Unicode characters, as per the Turtle specification.
 * @param baseURI The URI associated with the data in the InputStream, must not be <tt>null</tt>.
 * @throws IOException              If an I/O error occurred while data was read from the InputStream.
 * @throws RDFParseException        If the parser has found an unrecoverable parse error.
 * @throws RDFHandlerException      If the configured statement handler encountered an unrecoverable error.
 * @throws IllegalArgumentException If the supplied input stream or base URI is <tt>null</tt>.
 */
@Override
public synchronized void parse(InputStream in, String baseURI)
		throws IOException, RDFParseException, RDFHandlerException {
	if (in == null) {
		throw new IllegalArgumentException("Input stream must not be 'null'");
	}
	// Note: baseURI will be checked in parse(Reader, String)

	try {
		parse(new InputStreamReader(new BOMInputStream(in, false), StandardCharsets.UTF_8), baseURI);
	} catch (UnsupportedEncodingException e) {
		// Every platform should support the UTF-8 encoding...
		throw new RuntimeException(e);
	}
}
 
源代码4 项目: rdf4j   文件: RDFXMLParser.java
/**
 * Parses the data from the supplied InputStream, using the supplied baseURI to resolve any relative URI references.
 *
 * @param in      The InputStream from which to read the data, must not be <tt>null</tt>.
 * @param baseURI The URI associated with the data in the InputStream, must not be <tt>null</tt>.
 * @throws IOException              If an I/O error occurred while data was read from the InputStream.
 * @throws RDFParseException        If the parser has found an unrecoverable parse error.
 * @throws RDFHandlerException      If the configured statement handler encountered an unrecoverable error.
 * @throws IllegalArgumentException If the supplied input stream or base URI is <tt>null</tt>.
 */
@Override
public synchronized void parse(InputStream in, String baseURI)
		throws IOException, RDFParseException, RDFHandlerException {
	if (in == null) {
		throw new IllegalArgumentException("Input stream cannot be 'null'");
	}
	if (baseURI == null) {
		throw new IllegalArgumentException("Base URI cannot be 'null'");
	}

	InputSource inputSource = new InputSource(new BOMInputStream(in, false));
	inputSource.setSystemId(baseURI);

	parse(inputSource);
}
 
源代码5 项目: rdf4j   文件: NTriplesParser.java
/**
 * Implementation of the <tt>parse(InputStream, String)</tt> method defined in the RDFParser interface.
 *
 * @param in      The InputStream from which to read the data, must not be <tt>null</tt>. The InputStream is
 *                supposed to contain 7-bit US-ASCII characters, as per the N-Triples specification.
 * @param baseURI The URI associated with the data in the InputStream, must not be <tt>null</tt>.
 * @throws IOException              If an I/O error occurred while data was read from the InputStream.
 * @throws RDFParseException        If the parser has found an unrecoverable parse error.
 * @throws RDFHandlerException      If the configured statement handler encountered an unrecoverable error.
 * @throws IllegalArgumentException If the supplied input stream or base URI is <tt>null</tt>.
 */
@Override
public synchronized void parse(InputStream in, String baseURI)
		throws IOException, RDFParseException, RDFHandlerException {
	if (in == null) {
		throw new IllegalArgumentException("Input stream can not be 'null'");
	}
	// Note: baseURI will be checked in parse(Reader, String)

	try {
		parse(new InputStreamReader(new BOMInputStream(in, false), StandardCharsets.UTF_8), baseURI);
	} catch (UnsupportedEncodingException e) {
		// Every platform should support the UTF-8 encoding...
		throw new RuntimeException(e);
	}
}
 
源代码6 项目: mojito   文件: CommandHelper.java
/**
 * Get content from {@link java.nio.file.Path} using UTF8
 *
 * @param path
 * @return
 * @throws CommandException
 */
public String getFileContent(Path path) {
    try {
        File file = path.toFile();
        BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(file), false, boms);
        String fileContent;
        if (inputStream.hasBOM()) {
            fileContent = IOUtils.toString(inputStream, inputStream.getBOMCharsetName());
        } else {
            fileContent = IOUtils.toString(inputStream, StandardCharsets.UTF_8);
        }
        return fileContent;
    } catch (IOException e) {
        throw new UncheckedIOException("Cannot get file content for path: " + path.toString(), e);
    }
}
 
源代码7 项目: zap-extensions   文件: BinList.java
private static Trie<String, BinRecord> createTrie() {
    Trie<String, BinRecord> trie = new PatriciaTrie<>();
    Iterable<CSVRecord> records;
    try (InputStream in = BinList.class.getResourceAsStream(BINLIST);
            BOMInputStream bomStream = new BOMInputStream(in);
            InputStreamReader inStream =
                    new InputStreamReader(bomStream, StandardCharsets.UTF_8)) {
        records = CSVFormat.DEFAULT.withFirstRecordAsHeader().parse(inStream).getRecords();
    } catch (NullPointerException | IOException e) {
        LOGGER.warn("Exception while loading: " + BINLIST, e);
        return trie;
    }

    for (CSVRecord record : records) {
        trie.put(
                record.get("bin"),
                new BinRecord(
                        record.get("bin"),
                        record.get("brand"),
                        record.get("category"),
                        record.get("issuer")));
    }
    return trie;
}
 
源代码8 项目: webanno   文件: ConstraintsServiceImpl.java
@Override
public String readConstrainSet(ConstraintSet aSet)
    throws IOException
{
    String constraintRulesPath = repositoryProperties.getPath().getAbsolutePath() + "/"
            + PROJECT_FOLDER + "/" + aSet.getProject().getId() + "/"
            + ConstraintsService.CONSTRAINTS + "/";
    String filename = aSet.getId() + ".txt";
    
    String data;
    try (BOMInputStream is = new BOMInputStream(
            new FileInputStream(new File(constraintRulesPath, filename)))) {
        data = IOUtils.toString(is, "UTF-8");
    }

    try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID,
            String.valueOf(aSet.getProject().getId()))) {
        log.info("Read constraints set [{}] in project [{}]({})",
                aSet.getName(), aSet.getProject().getName(), aSet.getProject().getId());
    }
    
    return data;
}
 
源代码9 项目: crawler-commons   文件: SiteMapParser.java
/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 * 
 * @param url
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws UnknownFormatException
 *             if there is an error parsing the gzip
 * @throws IOException
 *             if there is an error reading in the gzip {@link java.net.URL}
 */
protected AbstractSiteMap processGzippedXML(URL url, byte[] response) throws IOException, UnknownFormatException {

    LOG.debug("Processing gzipped XML");

    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");
    LOG.debug("XML url = {}", xmlUrl);

    InputStream decompressed = new SkipLeadingWhiteSpaceInputStream(new BOMInputStream(new GZIPInputStream(is)));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    return processXml(url, in);
}
 
源代码10 项目: nifi   文件: CSVRecordReader.java
public CSVRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema, final CSVFormat csvFormat, final boolean hasHeader, final boolean ignoreHeader,
                       final String dateFormat, final String timeFormat, final String timestampFormat, final String encoding) throws IOException {
    super(logger, schema, hasHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat);

    final Reader reader = new InputStreamReader(new BOMInputStream(in), encoding);

    CSVFormat withHeader;
    if (hasHeader) {
        withHeader = csvFormat.withSkipHeaderRecord();

        if (ignoreHeader) {
            withHeader = withHeader.withHeader(schema.getFieldNames().toArray(new String[0]));
        } else {
            withHeader = withHeader.withFirstRecordAsHeader();
        }
    } else {
        withHeader = csvFormat.withHeader(schema.getFieldNames().toArray(new String[0]));
    }

    csvParser = new CSVParser(reader, withHeader);
}
 
源代码11 项目: nifi   文件: CSVHeaderSchemaStrategy.java
@Override
public RecordSchema getSchema(Map<String, String> variables, final InputStream contentStream, final RecordSchema readSchema) throws SchemaNotFoundException {
    if (this.context == null) {
        throw new SchemaNotFoundException("Schema Access Strategy intended only for validation purposes and cannot obtain schema");
    }

    try {
        final CSVFormat csvFormat = CSVUtils.createCSVFormat(context, variables).withFirstRecordAsHeader();
        try (final Reader reader = new InputStreamReader(new BOMInputStream(contentStream));
            final CSVParser csvParser = new CSVParser(reader, csvFormat)) {

            final List<RecordField> fields = new ArrayList<>();
            for (final String columnName : csvParser.getHeaderMap().keySet()) {
                fields.add(new RecordField(columnName, RecordFieldType.STRING.getDataType(), true));
            }

            return new SimpleRecordSchema(fields);
        }
    } catch (final Exception e) {
        throw new SchemaNotFoundException("Failed to read Header line from CSV", e);
    }
}
 
源代码12 项目: quaerite   文件: ElevateAnalysisEvaluator.java
private Map<String, Integer> loadQueries(Path queryFile) throws IOException {
    Map<String, Integer> queries = new HashMap<>();
    try (Reader reader = new InputStreamReader(
            new BOMInputStream(Files.newInputStream(queryFile)), "UTF-8")) {
        Iterable<CSVRecord> records = CSVFormat.EXCEL
                .withFirstRecordAsHeader().parse(reader);
        boolean hasCount = false;
        if ((((CSVParser) records)).getHeaderMap().containsKey("count")) {
            hasCount = true;
        }

        for (CSVRecord r : records) {
            String query = r.get("query");

            query = query.toLowerCase(Locale.US);
            int cnt = 1;
            if (hasCount) {
                String count = r.get("count");
                cnt = Integer.parseInt(count);
            }
            Integer existing = queries.get(query);
            if (existing != null) {
                cnt += existing;
            }
            queries.put(query, cnt);
        }
    }
    return queries;

}
 
源代码13 项目: quaerite   文件: QueryLoader.java
public static void loadJudgments(ExperimentDB experimentDB, Path file,
                                 boolean freshStart) throws IOException, SQLException {
    if (freshStart) {
        experimentDB.clearJudgments();
    }

    Map<String, Judgments> judgmentsMap = null;
    try (InputStream is = Files.newInputStream(file)) {
        try (Reader reader = new InputStreamReader(new BOMInputStream(is), "UTF-8")) {
            Iterable<CSVRecord> records = CSVFormat.EXCEL
                    .withFirstRecordAsHeader().parse(reader);
            boolean hasJudgments = (((CSVParser) records)).getHeaderMap().containsKey(
                    DOCUMENT_ID) ? true : false;
            boolean hasQuerySet = (((CSVParser) records).getHeaderMap().containsKey(
                    QUERY_SET)) ? true : false;
            boolean hasCount = (((CSVParser) records).getHeaderMap().containsKey(
                    COUNT)) ? true : false;
            boolean hasQueryId = (((CSVParser) records).getHeaderMap().containsKey(
                    QUERY_ID)) ? true : false;
            Set<String> queryStringNames = getQueryStringNames(((CSVParser) records)
                    .getHeaderMap().keySet());
            if (hasQueryId) {
                judgmentsMap = loadJudgmentsWithId(hasJudgments, hasQuerySet, hasCount,
                        queryStringNames, records);
            } else {
                judgmentsMap = loadJudmentsWithoutId(hasJudgments, hasQuerySet,
                        hasCount, queryStringNames, records);
            }
        }
    }
    for (Judgments judgments : judgmentsMap.values()) {
        experimentDB.addJudgment(judgments);
    }
}
 
源代码14 项目: hop   文件: CsvInput.java
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws HopException {
  String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() );
  String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() );
  String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() );

  try ( FileObject fileObject = HopVfs.getFileObject( fileName );
        BOMInputStream inputStream =
          new BOMInputStream( HopVfs.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE,
            ByteOrderMark.UTF_16BE ) ) {
    InputStreamReader reader = null;
    if ( Utils.isEmpty( realEncoding ) ) {
      reader = new InputStreamReader( inputStream );
    } else {
      reader = new InputStreamReader( inputStream, realEncoding );
    }
    EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() );
    String line =
      TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder(
        1000 ) );
    String[] fieldNames = TextFileLineUtil.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() );
    if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) {
      removeEnclosure( fieldNames, csvInputMeta.getEnclosure() );
    }
    trimFieldNames( fieldNames );
    return fieldNames;
  } catch ( IOException e ) {
    throw new HopFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e );
  }
}
 
源代码15 项目: htmlunit   文件: WebResponse.java
/**
 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
 *
 * Returns the response content as a string, using the specified charset,
 * rather than the charset/encoding specified in the server response.
 * If there is a bom header the charset parameter will be overwritten by the bom.
 * @param encoding the charset/encoding to use to convert the response content into a string
 * @param ignoreUtf8Bom if true utf8 bom header will be ignored
 * @return the response content as a string or null if the content retrieval was failing
 */
public String getContentAsString(final Charset encoding, final boolean ignoreUtf8Bom) {
    if (responseData_ != null) {
        try (InputStream in = responseData_.getInputStreamWithBomIfApplicable(BOM_HEADERS)) {
            if (in instanceof BOMInputStream) {
                try (BOMInputStream bomIn = (BOMInputStream) in) {
                    // there seems to be a bug in BOMInputStream
                    // we have to call this before hasBOM(ByteOrderMark)
                    if (bomIn.hasBOM()) {
                        if (!ignoreUtf8Bom && bomIn.hasBOM(ByteOrderMark.UTF_8)) {
                            return IOUtils.toString(bomIn, UTF_8);
                        }
                        if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
                            return IOUtils.toString(bomIn, UTF_16BE);
                        }
                        if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
                            return IOUtils.toString(bomIn, UTF_16LE);
                        }
                    }
                    return IOUtils.toString(bomIn, encoding);
                }
            }

            return IOUtils.toString(in, encoding);
        }
        catch (final IOException e) {
            LOG.warn(e.getMessage(), e);
        }
    }
    return null;
}
 
源代码16 项目: htmlunit   文件: XmlUtils.java
/**
 * Builds a document from the content of the web response.
 * A warning is logged if an exception is thrown while parsing the XML content
 * (for instance when the content is not a valid XML and can't be parsed).
 *
 * @param webResponse the response from the server
 * @throws IOException if the page could not be created
 * @return the parse result
 * @throws SAXException if the parsing fails
 * @throws ParserConfigurationException if a DocumentBuilder cannot be created
 */
public static Document buildDocument(final WebResponse webResponse)
    throws IOException, SAXException, ParserConfigurationException {

    final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    if (webResponse == null) {
        return factory.newDocumentBuilder().newDocument();
    }

    factory.setNamespaceAware(true);
    final InputStreamReader reader = new InputStreamReader(
            new BOMInputStream(webResponse.getContentAsStream()),
            webResponse.getContentCharset());

    // we have to do the blank input check and the parsing in one step
    final TrackBlankContentReader tracker = new TrackBlankContentReader(reader);

    final InputSource source = new InputSource(tracker);
    final DocumentBuilder builder = factory.newDocumentBuilder();
    builder.setErrorHandler(DISCARD_MESSAGES_HANDLER);
    builder.setEntityResolver(new EntityResolver() {
        @Override
        public InputSource resolveEntity(final String publicId, final String systemId)
            throws SAXException, IOException {
            return new InputSource(new StringReader(""));
        }
    });
    try {
        // this closes the input source/stream
        return builder.parse(source);
    }
    catch (final SAXException e) {
        if (tracker.wasBlank()) {
            return factory.newDocumentBuilder().newDocument();
        }
        throw e;
    }
}
 
源代码17 项目: HtmlUnit-Android   文件: WebResponse.java
/**
 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
 *
 * Returns the response content as a string, using the specified charset,
 * rather than the charset/encoding specified in the server response.
 * If there is a bom header the charset parameter will be overwritten by the bom.
 * @param encoding the charset/encoding to use to convert the response content into a string
 * @param ignoreUtf8Bom if true utf8 bom header will be ignored
 * @return the response content as a string or null if the content retrieval was failing
 */
public String getContentAsString(final Charset encoding, final boolean ignoreUtf8Bom) {
    if (responseData_ != null) {
        try (InputStream in = responseData_.getInputStream()) {
            if (in != null) {
                try (BOMInputStream bomIn = new BOMInputStream(in, BOM_HEADERS)) {
                    // there seems to be a bug in BOMInputStream
                    // we have to call this before hasBOM(ByteOrderMark)
                    if (bomIn.hasBOM()) {
                        if (!ignoreUtf8Bom && bomIn.hasBOM(ByteOrderMark.UTF_8)) {
                            return IOUtils.toString(bomIn, UTF_8);
                        }
                        if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
                            return IOUtils.toString(bomIn, UTF_16BE);
                        }
                        if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
                            return IOUtils.toString(bomIn, UTF_16LE);
                        }
                    }
                    return IOUtils.toString(bomIn, encoding);
                }
            }
        }
        catch (final IOException e) {
            LOG.warn(e);
        }
    }
    return null;
}
 
源代码18 项目: HtmlUnit-Android   文件: XmlUtil.java
/**
 * Builds a document from the content of the web response.
 * A warning is logged if an exception is thrown while parsing the XML content
 * (for instance when the content is not a valid XML and can't be parsed).
 *
 * @param webResponse the response from the server
 * @throws IOException if the page could not be created
 * @return the parse result
 * @throws SAXException if the parsing fails
 * @throws ParserConfigurationException if a DocumentBuilder cannot be created
 */
public static Document buildDocument(final WebResponse webResponse)
    throws IOException, SAXException, ParserConfigurationException {

    final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

    if (webResponse == null) {
        return factory.newDocumentBuilder().newDocument();
    }

    factory.setNamespaceAware(true);
    final InputStreamReader reader = new InputStreamReader(
            new BOMInputStream(webResponse.getContentAsStream()),
            webResponse.getContentCharset());

    // we have to do the blank input check and the parsing in one step
    final TrackBlankContentReader tracker = new TrackBlankContentReader(reader);

    final InputSource source = new InputSource(tracker);
    final DocumentBuilder builder = factory.newDocumentBuilder();
    builder.setErrorHandler(DISCARD_MESSAGES_HANDLER);
    builder.setEntityResolver(new EntityResolver() {
        @Override
        public InputSource resolveEntity(final String publicId, final String systemId)
            throws SAXException, IOException {
            return new InputSource(new StringReader(""));
        }
    });
    try {
        // this closes the input source/stream
        return builder.parse(source);
    }
    catch (final SAXException e) {
        if (tracker.wasBlank()) {
            return factory.newDocumentBuilder().newDocument();
        }
        throw e;
    }
}
 
源代码19 项目: rdf4j   文件: TriXParser.java
/**
 * Parses the data from the supplied InputStream, using the supplied baseURI to resolve any relative URI references.
 *
 * @param in      The InputStream from which to read the data, must not be <tt>null</tt>.
 * @param baseURI The URI associated with the data in the InputStream, must not be <tt>null</tt>.
 * @throws IOException              If an I/O error occurred while data was read from the InputStream.
 * @throws RDFParseException        If the parser has found an unrecoverable parse error.
 * @throws RDFHandlerException      If the configured statement handler encountered an unrecoverable error.
 * @throws IllegalArgumentException If the supplied input stream or base URI is <tt>null</tt>.
 */
@Override
public void parse(InputStream in, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
	if (in == null) {
		throw new IllegalArgumentException("Input stream cannot be 'null'");
	}
	if (baseURI == null) {
		throw new IllegalArgumentException("Base URI cannot be 'null'");
	}

	InputSource inputSource = new InputSource(new BOMInputStream(in, false));
	inputSource.setSystemId(baseURI);

	parse(inputSource);
}
 
源代码20 项目: rdf4j   文件: NQuadsParser.java
@Override
public synchronized void parse(final InputStream inputStream, final String baseURI)
		throws IOException, RDFParseException, RDFHandlerException {
	if (inputStream == null) {
		throw new IllegalArgumentException("Input stream can not be 'null'");
	}
	// Note: baseURI will be checked in parse(Reader, String)

	try {
		parse(new InputStreamReader(new BOMInputStream(inputStream, false), StandardCharsets.UTF_8), baseURI);
	} catch (UnsupportedEncodingException e) {
		// Every platform should support the UTF-8 encoding...
		throw new RuntimeException(e);
	}
}
 
源代码21 项目: saml-client   文件: SamlClient.java
/**
 * Wrap a {@link java.io.Reader Reader} to skip a BOM if it is present.
 * OpenSaml won't accept a metadata file if it starts with a BOM.
 * @param metadata The metadata with optional BOM
 * @return A {@link Reader} which will never return a BOM
 */
private static InputStream skipBom(Reader metadata) throws SamlException {
  try {
    InputStream metadataInputStream;
    metadataInputStream =
        IOUtils.toInputStream(IOUtils.toString(metadata), StandardCharsets.UTF_8);

    return new BOMInputStream(metadataInputStream, false);
  } catch (IOException e) {
    throw new SamlException("Couldn't read metadata", e);
  }
}
 
源代码22 项目: mojito   文件: CommandHelper.java
/**
 * Writes the content into a file using same format as source file
 *
 * @param content         content to be written
 * @param path            path to the file
 * @param sourceFileMatch
 * @throws CommandException
 */
public void writeFileContent(String content, Path path, FileMatch sourceFileMatch) throws CommandException {
    try {
        File outputFile = path.toFile();
        BOMInputStream inputStream = new BOMInputStream(FileUtils.openInputStream(sourceFileMatch.getPath().toFile()), false, boms);
        if (inputStream.hasBOM()) {
            FileUtils.writeByteArrayToFile(outputFile, inputStream.getBOM().getBytes());
            FileUtils.writeByteArrayToFile(outputFile, content.getBytes(inputStream.getBOMCharsetName()), true);
        } else {
            FileUtils.writeStringToFile(outputFile, content, StandardCharsets.UTF_8);
        }
    } catch (IOException e) {
        throw new CommandException("Cannot write file content in path: " + path.toString(), e);
    }
}
 
源代码23 项目: sonar-tsql-plugin   文件: SourceLinesProvider.java
public SourceLine[] getLines(final InputStream inputStream, final Charset charset) {
	if (inputStream == null) {
		return new SourceLine[0];
	}
	final List<SourceLine> sourceLines = new ArrayList<>();

	try (final BufferedReader bufferedReader = new BufferedReader(
			new InputStreamReader(new BOMInputStream(inputStream, false), charset))) {
		int totalLines = 1;
		int global = 0;
		int count = 0;

		int currentChar;
		while ((currentChar = bufferedReader.read()) != -1) {

			global++;
			count++;
			if (currentChar == 10) {
				sourceLines.add(new SourceLine(totalLines, count, global - count, global));
				totalLines++;
				count = 0;
			}

		}
		sourceLines.add(new SourceLine(totalLines, count, global - count, global));
	} catch (final Throwable e) {
		LOGGER.warn("Error occured reading file", e);
	}

	return sourceLines.toArray(new SourceLine[0]);
}
 
源代码24 项目: modernmt   文件: XMLUtils.java
public static XMLEventReader createEventReader(InputStream stream) throws XMLStreamException {
    Charset charset = UTF8Charset.get();

    BOMInputStream bomStream = new BOMInputStream(stream, false,
            ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE);
    try {
        if (bomStream.hasBOM())
            charset = Charset.forName(bomStream.getBOMCharsetName());
    } catch (IOException e) {
        throw new XMLStreamException(e);
    }

    XMLInputFactory factory = XMLInputFactory.newInstance();
    return factory.createXMLEventReader(new XMLFixInputStreamReader(bomStream, charset));
}
 
源代码25 项目: p4ic4idea   文件: MD5Digester.java
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
                                       boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
		throws IOException {

	try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
			ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
			ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);

	     InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
			     charset)) {
		CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
				.onMalformedInput(CodingErrorAction.REPORT)
				.onUnmappableCharacter(CodingErrorAction.REPORT);

		char[] buffer = new char[bufferSize];
		int read;
		while ((read = encodedStreamReader.read(buffer)) > 0) {
			// Convert encoded stream to UTF8 since server digest is UTF8
			ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
					.encode(CharBuffer.wrap(buffer, 0, read));

			if (isRequireLineEndingConvert) {
				ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
						encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
						clientLineEnding);

				update(convert.array(), convert.arrayOffset(), convert.limit());
			} else {
				update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
						utf8ByteBuffer.limit());
			}
		}
	}
}
 
源代码26 项目: p4ic4idea   文件: SubmitAndSyncUtf16FileTypeTest.java
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
    try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
            new FileInputStream(testResourceFile),
            false,
            ByteOrderMark.UTF_16LE,
            ByteOrderMark.UTF_16BE)) {
        byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
        ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
		CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
		return convert.convert(buf).limit();
    }
}
 
源代码27 项目: p4ic4idea   文件: MD5Digester.java
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
                                       boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
		throws IOException {

	try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
			ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
			ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);

	     InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
			     charset)) {
		CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
				.onMalformedInput(CodingErrorAction.REPORT)
				.onUnmappableCharacter(CodingErrorAction.REPORT);

		char[] buffer = new char[bufferSize];
		int read;
		while ((read = encodedStreamReader.read(buffer)) > 0) {
			// Convert encoded stream to UTF8 since server digest is UTF8
			ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
					.encode(CharBuffer.wrap(buffer, 0, read));

			if (isRequireLineEndingConvert) {
				ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
						encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
						clientLineEnding);

				update(convert.array(), convert.arrayOffset(), convert.limit());
			} else {
				update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
						utf8ByteBuffer.limit());
			}
		}
	}
}
 
源代码28 项目: p4ic4idea   文件: SubmitAndSyncUtf16FileTypeTest.java
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
    try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
            new FileInputStream(testResourceFile),
            false,
            ByteOrderMark.UTF_16LE,
            ByteOrderMark.UTF_16BE)) {
        byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
        ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
		CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
		return convert.convert(buf).limit();
    }
}
 
源代码29 项目: p4ic4idea   文件: MD5Digester.java
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset,
                                       boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding)
		throws IOException {

	try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false,
			ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
			ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);

	     InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream,
			     charset)) {
		CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder()
				.onMalformedInput(CodingErrorAction.REPORT)
				.onUnmappableCharacter(CodingErrorAction.REPORT);

		char[] buffer = new char[bufferSize];
		int read;
		while ((read = encodedStreamReader.read(buffer)) > 0) {
			// Convert encoded stream to UTF8 since server digest is UTF8
			ByteBuffer utf8ByteBuffer = utf8CharsetEncoder
					.encode(CharBuffer.wrap(buffer, 0, read));

			if (isRequireLineEndingConvert) {
				ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert(
						encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer,
						clientLineEnding);

				update(convert.array(), convert.arrayOffset(), convert.limit());
			} else {
				update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(),
						utf8ByteBuffer.limit());
			}
		}
	}
}
 
源代码30 项目: p4ic4idea   文件: SubmitAndSyncUtf16FileTypeTest.java
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception {
    try (BOMInputStream bomSkipedInputStream = new BOMInputStream(
            new FileInputStream(testResourceFile),
            false,
            ByteOrderMark.UTF_16LE,
            ByteOrderMark.UTF_16BE)) {
        byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream);
        ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes);
		CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8);
		return convert.convert(buf).limit();
    }
}
 
 类所在包
 类方法
 同包方法