下面列出了org.apache.commons.io.LineIterator#nextLine ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public static void createInputFile(Configuration conf, Path file, Path targetFile,
String startNode)
throws IOException {
FileSystem fs = file.getFileSystem(conf);
OutputStream os = fs.create(targetFile);
LineIterator iter = org.apache.commons.io.IOUtils
.lineIterator(fs.open(file), "UTF8");
while (iter.hasNext()) {
String line = iter.nextLine();
String[] parts = StringUtils.split(line);
int distance = Node.INFINITE;
if (startNode.equals(parts[0])) {
distance = 0;
}
IOUtils.write(parts[0] + '\t' + String.valueOf(distance) + "\t\t",
os);
IOUtils.write(StringUtils.join(parts, '\t', 1, parts.length), os);
IOUtils.write("\n", os);
}
os.close();
}
/**
* Read in GOKRIMP sequences (sorted by compression benefit)
*
* @deprecated gives slightly different results to reference implementation
*/
@Deprecated
public static LinkedHashMap<Sequence, Double> readGoKrimpSequencesSPMF(final File output) throws IOException {
final LinkedHashMap<Sequence, Double> sequences = new LinkedHashMap<>();
final LineIterator it = FileUtils.lineIterator(output);
while (it.hasNext()) {
final String line = it.nextLine();
if (!line.trim().isEmpty()) {
final String[] splitLine = line.split("#SUP:");
final String[] items = splitLine[0].trim().split(" ");
final Sequence seq = new Sequence();
for (final String item : items)
seq.add(Integer.parseInt(item.trim()));
final double compressionBenefit = Double.parseDouble(splitLine[1].trim());
sequences.put(seq, compressionBenefit);
}
}
return sequences;
}
/** Read in GoKrimp sequences (sorted by compression benefit) */
public static LinkedHashMap<Sequence, Double> readGoKrimpSequences(final File output) throws IOException {
final LinkedHashMap<Sequence, Double> sequences = new LinkedHashMap<>();
final LineIterator it = FileUtils.lineIterator(output);
while (it.hasNext()) {
final String line = it.nextLine();
if (!line.trim().isEmpty() && line.charAt(0) == '[') {
final String[] splitLine = line.split(" ");
final double worth = Double.parseDouble(splitLine[splitLine.length - 1]);
final Sequence seq = new Sequence();
for (int i = 1; i < splitLine.length - 2; i++)
seq.add(Integer.parseInt(splitLine[i]));
sequences.put(seq, worth);
}
}
return sequences;
}
private static void _CheckReload(){
if(wordfilter.lastModified() > lastModified){
synchronized(SensitiveWord.class){
try{
lastModified = wordfilter.lastModified();
LineIterator lines = FileUtils.lineIterator(wordfilter, "utf-8");
while(lines.hasNext()){
String line = lines.nextLine();
if(StringUtils.isNotBlank(line))
words.add(StringUtils.trim(line).toLowerCase());
}
}catch(IOException e){
e.printStackTrace();
}
}
}
}
@Test
public void count_lines_text_apache() throws IOException {
LineIterator lineIterator = FileUtils.lineIterator(
Paths.get(fileLocation).toFile(), Charset.defaultCharset()
.toString());
long linesInTextFile = 0;
try {
while (lineIterator.hasNext()) {
linesInTextFile++;
lineIterator.nextLine();
}
} finally {
LineIterator.closeQuietly(lineIterator);
}
assertEquals(10, linesInTextFile);
}
private static Map<Integer, Integer> loadDictionary(String filename) {
HashMap<Integer, Integer> result = new HashMap<>();
InputStream stream = null;
LineIterator iterator = null;
try {
stream = ChineseCharacterConverter.class.getResourceAsStream(filename);
iterator = IOUtils.lineIterator(stream, "UTF-8");
while (iterator.hasNext()) {
String line = iterator.nextLine();
String[] keyValues = line.split("\t", 2);
Integer key = keyValues[0].codePointAt(0);
Integer value = keyValues[1].codePointAt(0);
result.put(key, value);
}
return result;
} catch (IOException e) {
throw new Error(e);
} finally {
IOUtils.closeQuietly(stream);
if (iterator != null)
iterator.close();
}
}
public HashSet<String> getAuthors(Event event){
HashSet<String> authors = new HashSet<>();
String[] interval = event.getTemporalDescription().split(",");
int timeSliceA = convertDayToTimeSlice(Double.parseDouble(interval[0]));
int timeSliceB = convertDayToTimeSlice(Double.parseDouble(interval[1]));
String term = event.getTextualDescription().split(" ")[0];
NumberFormat formatter = new DecimalFormat("00000000");
for(int i = timeSliceA; i <= timeSliceB; i++){
try {
File textFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".text");
File authorFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".author");
LineIterator textIter = FileUtils.lineIterator(textFile);
LineIterator authorIter = FileUtils.lineIterator(authorFile);
while(textIter.hasNext()){
String text = textIter.nextLine();
String author = authorIter.nextLine();
if(text.contains(term)){
authors.add(author);
}
}
} catch (IOException ex) {
Logger.getLogger(Corpus.class.getName()).log(Level.SEVERE, null, ex);
}
}
return authors;
}
public static TransactionList readTransactions(final File inputFile) throws IOException {
final List<Transaction> transactions = new ArrayList<>();
// for each line (transaction) until the end of file
final LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");
while (it.hasNext()) {
final String line = it.nextLine();
// if the line is a comment, is empty or is a
// kind of metadata
if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
continue;
}
// split the transaction into items
final String[] lineSplited = line.split(" ");
// convert to Transaction class and add it to the structure
transactions.add(getTransaction(lineSplited));
}
// close the input file
LineIterator.closeQuietly(it);
return new TransactionList(transactions);
}
/** Read in frequent sequences (sorted by support) */
public static SortedMap<Sequence, Integer> readFrequentSequences(final File output) throws IOException {
final HashMap<Sequence, Integer> sequences = new HashMap<>();
final LineIterator it = FileUtils.lineIterator(output);
while (it.hasNext()) {
final String line = it.nextLine();
if (!line.trim().isEmpty()) {
final String[] splitLine = line.split("#SUP:");
final String[] items = splitLine[0].trim().split("-1");
final Sequence seq = new Sequence();
for (final String item : items)
seq.add(Integer.parseInt(item.trim()));
final int supp = Integer.parseInt(splitLine[1].trim());
sequences.put(seq, supp);
}
}
// Sort sequences by support
final Ordering<Sequence> comparator = Ordering.natural().reverse().onResultOf(Functions.forMap(sequences))
.compound(Ordering.usingToString());
return ImmutableSortedMap.copyOf(sequences, comparator);
}
public String uploadFile(String fileName) {
File file = new File(getDataFolder(), fileName);
if (!file.exists())
return null;
LineIterator it;
String lines = "";
try {
it = FileUtils.lineIterator(file, "UTF-8");
try {
while (it.hasNext()) {
String line = it.nextLine();
lines += line + "\n";
}
} finally {
it.close();
}
} catch (IOException e) {
e.printStackTrace();
}
return MCDebug.paste(fileName, lines);
}
/**
* Searches the contents of a file
* @param file The file to search
* @return returns whether the search was successful
* @throws IOException thrown if there is an error reading the file
*/
private boolean searchFile(File file) {
LineIterator lineIterator = null;
try {
lineIterator = FileUtils.lineIterator(file);
} catch (IOException e) {
logger.error("FileGrepper.searchFile: " + e.getLocalizedMessage());
return false;
}
try {
while (lineIterator.hasNext()) {
String line = lineIterator.nextLine();
if (line.contains("\0")) {
// file contains binary content
return false;
}
matcher.reset(line);
if (matcher.find()) {
return true;
}
}
} finally {
if (lineIterator != null)
lineIterator.close();
}
return false;
}
public static int getTotalNumberOfLinesUsingApacheCommonsIO(String fileName) {
int lines = 0;
try {
LineIterator lineIterator = FileUtils.lineIterator(new File(fileName));
while (lineIterator.hasNext()) {
lineIterator.nextLine();
lines++;
}
} catch (IOException ioe) {
ioe.printStackTrace();
}
return lines;
}
@Override
public Boolean call() throws IOException, InterruptedException {
try (Reader reader = new InputStreamReader(in)) {
LineIterator it = IOUtils.lineIterator(reader);
while (it.hasNext()) {
String line = it.nextLine();
if (outputObserver.onReadLine(line)) {
return true; // interrupted by OutputObserver
}
}
}
return false;
}
private static Set<String> loadBadWord(File file) {
Set<String> badWordSet = new HashSet<>();
try {
LineIterator it = FileUtils.lineIterator(file);
while(it.hasNext()) {
String badWord = it.nextLine();
badWordSet.add(badWord);
}
} catch (Exception e) {
e.printStackTrace();
}
return badWordSet;
}
private static void generateTransactionDatabase(final String arffFile, final BiMap<String, Integer> dictionary,
final File transactionDB) throws IOException {
int mID = 0;
boolean found = false;
final PrintWriter out = new PrintWriter(transactionDB);
final LineIterator it = FileUtils.lineIterator(new File(arffFile));
while (it.hasNext()) {
final String line = it.nextLine();
if (found) {
for (final String raw_call : line.split(",")[1].replace("\'", "").split(" ")) {
final String call = raw_call.trim();
if (call.isEmpty()) // skip empty strings
continue;
if (dictionary.containsKey(call)) {
final int ID = dictionary.get(call);
out.print(ID + " -1 ");
} else {
out.print(mID + " -1 ");
dictionary.put(call, mID);
mID++;
}
}
out.println("-2");
}
if (line.contains("@data"))
found = true;
}
it.close();
out.close();
}
/**
* This method scans the input database to calculate the support of single
* items.
*
* @param inputFile
* the input file
* @return a multiset for storing the support of each singleton
*/
public static Multiset<Sequence> scanDatabaseToDetermineFrequencyOfSingleItems(final File inputFile)
throws IOException {
final Multiset<Sequence> singletons = HashMultiset.create();
// for each line (transaction) until the end of file
final LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");
while (it.hasNext()) {
final String line = it.nextLine();
// if the line is a comment, is empty or is a
// kind of metadata
if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
continue;
}
// split the line into items
final String[] lineSplit = line.split(" ");
// for each item
final HashSet<Sequence> seenItems = new HashSet<>();
for (final String itemString : lineSplit) {
final int item = Integer.parseInt(itemString);
if (item >= 0) { // ignore end of itemset/sequence tags
final Sequence seq = new Sequence(item);
PAMCore.recursiveSetOccurrence(seq, seenItems); // set
// occurrence
seenItems.add(seq); // add item to seen
}
}
singletons.addAll(seenItems); // increase the support of the items
}
// close the input file
LineIterator.closeQuietly(it);
return singletons;
}
/**
* Loads an in memory cache from the given input stream (sets syn0 and the vocab).
*
* @param inputStream input stream
* @return a {@link Pair} holding the lookup table and the vocab cache.
*/
public static Pair<InMemoryLookupTable, VocabCache> loadTxt(@NonNull InputStream inputStream) {
AbstractCache<VocabWord> cache = new AbstractCache<>();
LineIterator lines = null;
try (InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
BufferedReader reader = new BufferedReader(inputStreamReader)) {
lines = IOUtils.lineIterator(reader);
String line = null;
boolean hasHeader = false;
/* Check if first line is a header */
if (lines.hasNext()) {
line = lines.nextLine();
hasHeader = isHeader(line, cache);
}
if (hasHeader) {
log.debug("First line is a header");
line = lines.nextLine();
}
List<INDArray> arrays = new ArrayList<>();
long[] vShape = new long[]{ 1, -1 };
do {
String[] tokens = line.split(" ");
String word = ReadHelper.decodeB64(tokens[0]);
VocabWord vocabWord = new VocabWord(1.0, word);
vocabWord.setIndex(cache.numWords());
cache.addToken(vocabWord);
cache.addWordToIndex(vocabWord.getIndex(), word);
cache.putVocabWord(word);
float[] vector = new float[tokens.length - 1];
for (int i = 1; i < tokens.length; i++) {
vector[i - 1] = Float.parseFloat(tokens[i]);
}
vShape[1] = vector.length;
INDArray row = Nd4j.create(vector, vShape);
arrays.add(row);
line = lines.hasNext() ? lines.next() : null;
} while (line != null);
INDArray syn = Nd4j.vstack(arrays);
InMemoryLookupTable<VocabWord> lookupTable = new InMemoryLookupTable
.Builder<VocabWord>()
.vectorLength(arrays.get(0).columns())
.useAdaGrad(false)
.cache(cache)
.useHierarchicSoftmax(false)
.build();
lookupTable.setSyn0(syn);
return new Pair<>((InMemoryLookupTable) lookupTable, (VocabCache) cache);
} catch (IOException readeTextStreamException) {
throw new RuntimeException(readeTextStreamException);
} finally {
if (lines != null) {
lines.close();
}
}
}
public ObservableList<Message> getFilteredMessages(Event event, String[] words, int operator){
ObservableList<Message> messages = FXCollections.observableArrayList();
String[] interval = event.getTemporalDescription().split(",");
int timeSliceA = convertDayToTimeSlice(Double.parseDouble(interval[0]));
int timeSliceB = convertDayToTimeSlice(Double.parseDouble(interval[1]));
String term = event.getTextualDescription().split(" ")[0];
NumberFormat formatter = new DecimalFormat("00000000");
for(int i = timeSliceA; i <= timeSliceB; i++){
try {
File textFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".text");
File timeFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".time");
File authorFile = new File(path+File.separator+preprocessing+File.separator+formatter.format(i)+".author");
LineIterator textIter = FileUtils.lineIterator(textFile);
LineIterator timeIter = FileUtils.lineIterator(timeFile);
LineIterator authorIter = FileUtils.lineIterator(authorFile);
while(textIter.hasNext()){
String text = textIter.nextLine();
short[] test = new short[words.length];
for(int j = 0; j < words.length; j++){
if(StringUtils.containsIgnoreCase(text,words[j])){
test[j] = 1;
}else{
test[j] = 0;
}
}
if(StringUtils.containsIgnoreCase(text,term)){
int testSum = ArrayUtils.sum(test, 0, test.length-1);
String author = authorIter.nextLine();
String time = timeIter.nextLine();
if(operator==0 && testSum == test.length){
messages.add(new Message(author,time,text));
}
if(operator==1 && testSum > 0){
messages.add(new Message(author,time,text));
}
}
}
} catch (IOException ex) {
Logger.getLogger(Corpus.class.getName()).log(Level.SEVERE, null, ex);
}
}
return messages;
}
/**
* @param fileName
* @throws IOException
*/
@Override
public void loadLCSCache(String fileName) throws IOException {
try {
clearLCSCache();
} catch (UnknownOWLClassException e) {
// TODO Auto-generated catch block
e.printStackTrace();
throw new IOException("Cannot clear cache");
}
LOG.info("Loading LCS cache from "+fileName);
FileInputStream s = new FileInputStream(fileName);
//List<String> lines = IOUtils.readLines(s);
LineIterator itr = IOUtils.lineIterator(s, UTF_8);
while (itr.hasNext()) {
String line = itr.nextLine();
String[] vals = line.split("\t");
OWLClass c1 = getOWLClassFromShortId(vals[0]);
OWLClass c2 = getOWLClassFromShortId(vals[1]);
OWLClass a = getOWLClassFromShortId(vals[3]);
Integer cix = classIndex.get(c1);
Integer dix = classIndex.get(c2);
Integer aix = classIndex.get(a);
if (cix == null) {
LOG.error("Unknown class C: "+c1);
}
if (dix == null) {
LOG.error("Unknown class D: "+c2);
}
if (aix == null) {
LOG.error("Unknown ancestor class: "+a);
}
// Note that we only populate half the cache
// Ensure cix < dix
int temp;
if (cix > dix) {
// swap
temp = cix;
cix = dix;
dix = temp;
}
ciPairIsCached[cix][dix] = true;
//ciPairScaledScore[cix][dix] = (short)(Double.valueOf(vals[2]) * scaleFactor);
// TODO - set all IC caches
ciPairLCS[cix][dix] = aix;
}
s.close();
LOG.info("Finished loading LCS cache from "+fileName);
isLCSCacheFullyPopulated = true;
}
/** Convert dataset from SPMF format to GoKrimp format */
private static void convertDatasetGoKrimpFormat(final File inputDB, final File outputDB) throws IOException {
// Output DB
final BufferedWriter db = new BufferedWriter(new FileWriter(outputDB));
// for each line (transaction) until the end of file
boolean newSeq = false;
final LineIterator it = FileUtils.lineIterator(inputDB, "UTF-8");
while (it.hasNext()) {
final String line = it.nextLine();
// if the line is a comment, is empty or is a
// kind of metadata
if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
continue;
}
// sequence separator
if (newSeq)
db.write("\n");
// split the transaction into items
final String[] lineSplited = line.split(" ");
for (int i = 0; i < lineSplited.length; i++) {
if (lineSplited[i].equals("-1")) { // end of item
} else if (lineSplited[i].equals("-2")) { // end of sequence
newSeq = true;
} else { // extract the value for an item
db.write(lineSplited[i] + " ");
}
}
}
db.newLine();
db.close();
// close the input file
LineIterator.closeQuietly(it);
}