下面列出了org.apache.commons.io.LineIterator#closeQuietly ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Test
public final void givenUsingApacheIo_whenStreamingThroughAFile_thenCorrect() throws IOException {
final String path = "G:\\full\\train\\input\\" + "trainDataNegative.csv";
// final String path = "G:\\full\\train\\input\\" + "trainDataPositive.csv";
logMemory();
final LineIterator it = FileUtils.lineIterator(new File(path), "UTF-8");
try {
while (it.hasNext()) {
final String line = it.nextLine();
// do something with line
}
} finally {
LineIterator.closeQuietly(it);
}
logMemory();
}
public static int countClasses(File file) {
int numClasses = 0;
LineIterator iterator = null;
try {
iterator = FileUtils.lineIterator(file);
while (iterator.hasNext()) {
String line = iterator.nextLine();
if (StringUtils.contains(line.trim(), "class ")) {
numClasses++;
}
}
} catch (IOException e) {
LOG.error("Error determining class count for file " + file, e);
} finally {
LineIterator.closeQuietly(iterator);
}
return numClasses;
}
/**
* @return the original document text
*/
public String getOriginal() {
final StringBuffer doc = new StringBuffer();
LineIterator iterator = null;
try {
iterator = FileUtils.lineIterator(docLoc);
} catch (final IOException e) {
e.printStackTrace();
}
while (iterator.hasNext())
doc.append(iterator.nextLine().trim() + "\n");
LineIterator.closeQuietly(iterator);
return doc.toString();
}
/**
* Import the GRISP general domains
*/
private void importDomains() throws IOException {
domain2id = new HashMap<String, Integer>();
id2domain = new HashMap<Integer, String>();
LineIterator domainIterator = FileUtils.lineIterator(new File(grispDomains));
int n = 0;
while (domainIterator.hasNext()) {
String line = domainIterator.next();
final String domain = line.replace('\t', ' ').trim();
domain2id.put(domain, new Integer(n));
id2domain.put(new Integer(n), domain);
n++;
}
LineIterator.closeQuietly(domainIterator);
}
static public LinkedList<String> readStopWords(String pathToStopwordsFile){
LinkedList<String> stopWords = new LinkedList<>();
if(pathToStopwordsFile != null){
LineIterator it = null;
try {
it = FileUtils.lineIterator(new File(pathToStopwordsFile), "UTF-8");
while (it.hasNext()) {
stopWords.add(it.nextLine());
}
} catch (IOException ex) {
Logger.getLogger(MABED.class.getName()).log(Level.SEVERE, null, ex);
} finally {
LineIterator.closeQuietly(it);
}
}
return stopWords;
}
@Test
public void count_lines_text_apache() throws IOException {
LineIterator lineIterator = FileUtils.lineIterator(
Paths.get(fileLocation).toFile(), Charset.defaultCharset()
.toString());
long linesInTextFile = 0;
try {
while (lineIterator.hasNext()) {
linesInTextFile++;
lineIterator.nextLine();
}
} finally {
LineIterator.closeQuietly(lineIterator);
}
assertEquals(10, linesInTextFile);
}
/** Print useful statistics for the transaction database */
public static void printTransactionDBStats(final File dbFile) throws IOException {
int noTransactions = 0;
double sparsity = 0;
final Set<Integer> singletons = new HashSet<>();
final LineIterator it = FileUtils.lineIterator(dbFile, "UTF-8");
while (it.hasNext()) {
final String[] items = it.nextLine().replace("-2", "").split(" -1 ");
for (final String item : items)
singletons.add(Integer.parseInt(item));
sparsity += items.length;
noTransactions++;
}
LineIterator.closeQuietly(it);
System.out.println("\nDatabase: " + dbFile);
System.out.println("Items: " + singletons.size());
System.out.println("Transactions: " + noTransactions);
System.out.println("Avg. items per transaction: " + sparsity / noTransactions + "\n");
}
/**
* Read the missing meta data report from a previous run.
*/
private void readMissingMetaDataReport(File aReportFile, Multimap<String, String> aReportData)
throws MojoExecutionException {
if (!aReportFile.exists()) {
// Ignore if the file is missing
return;
}
LineIterator i = null;
try {
String clazz = null;
i = IOUtils.lineIterator(new FileInputStream(aReportFile), encoding);
while (i.hasNext()) {
String line = i.next();
// Report say there is no missing meta data
if (line.startsWith(MARK_NO_MISSING_META_DATA)) {
return;
}
// Line containing class name
if (line.startsWith(MARK_CLASS)) {
clazz = line.substring(MARK_CLASS.length()).trim();
} else if (StringUtils.isBlank(line)) {
// Empty line, ignore
} else {
// Line containing a missing meta data instance
if (clazz == null) {
throw new MojoExecutionException("Missing meta data report has invalid format.");
}
aReportData.put(clazz, line.trim());
}
}
} catch (IOException e) {
throw new MojoExecutionException("Unable to read missing meta data report: "
+ ExceptionUtils.getRootCauseMessage(e), e);
} finally {
LineIterator.closeQuietly(i);
}
}
/**
* This method scans the input database to calculate the support of single
* items.
*
* @param inputFile
* the input file
* @return a multiset for storing the support of each singleton
*/
public static Multiset<Sequence> scanDatabaseToDetermineFrequencyOfSingleItems(final File inputFile)
throws IOException {
final Multiset<Sequence> singletons = HashMultiset.create();
// for each line (transaction) until the end of file
final LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");
while (it.hasNext()) {
final String line = it.nextLine();
// if the line is a comment, is empty or is a
// kind of metadata
if (line.isEmpty() == true || line.charAt(0) == '#' || line.charAt(0) == '%' || line.charAt(0) == '@') {
continue;
}
// split the line into items
final String[] lineSplit = line.split(" ");
// for each item
final HashSet<Sequence> seenItems = new HashSet<>();
for (final String itemString : lineSplit) {
final int item = Integer.parseInt(itemString);
if (item >= 0) { // ignore end of itemset/sequence tags
final Sequence seq = new Sequence(item);
PAMCore.recursiveSetOccurrence(seq, seenItems); // set
// occurrence
seenItems.add(seq); // add item to seen
}
}
singletons.addAll(seenItems); // increase the support of the items
}
// close the input file
LineIterator.closeQuietly(it);
return singletons;
}
@Override
protected CustomBatchExecutor getCustomBatchExecutor() {
return new CustomBatchExecutor() {
public boolean execute() {
StopWatch stopWatch = new StopWatch();
stopWatch.start();
String inputFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.BACKUP_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
String outputFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.PRE_SCRUBBER_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
PreScrubberReportData preScrubberReportData = null;
LineIterator oeIterator = null;
try {
oeIterator = FileUtils.lineIterator(new File(inputFile));
preScrubberReportData = preScrubberService.preprocessOriginEntries(oeIterator, outputFile);
}
catch (IOException e) {
LOG.error("IO exception occurred during pre scrubbing.", e);
throw new RuntimeException("IO exception occurred during pre scrubbing.", e);
}
finally {
LineIterator.closeQuietly(oeIterator);
}
if (preScrubberReportData != null) {
new PreScrubberReport().generateReport(preScrubberReportData, preScrubberReportWriterService);
}
stopWatch.stop();
if (LOG.isDebugEnabled()) {
LOG.debug("scrubber step of took " + (stopWatch.getTotalTimeSeconds() / 60.0) + " minutes to complete");
}
return true;
}
};
}
/**
* Scrub this single group read only. This will only output the scrubber report. It won't output any other groups.
*
* @param group the origin entry group that should be scrubbed
* @param the document number of any specific entries to scrub
*/
@Override
public void scrubGroupReportOnly(String fileName, String documentNumber) {
LOG.debug("scrubGroupReportOnly() started");
String unsortedFile = fileName;
this.inputFile = fileName + ".sort";
this.validFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.SCRUBBER_VALID_OUTPUT_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
this.errorFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.SCRUBBER_ERROR_OUTPUT_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
this.expiredFile = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.SCRUBBER_EXPIRED_OUTPUT_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
String prescrubOutput = batchFileDirectoryName + File.separator + GeneralLedgerConstants.BatchFileSystem.PRE_SCRUBBER_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
this.ledgerSummaryReport = new LedgerSummaryReport();
runDate = calculateRunDate(dateTimeService.getCurrentDate());
PreScrubberReportData preScrubberReportData = null;
// run pre-scrubber on the raw input into the sort process
LineIterator inputEntries = null;
try {
inputEntries = FileUtils.lineIterator(new File(unsortedFile));
preScrubberReportData = preScrubberService.preprocessOriginEntries(inputEntries, prescrubOutput);
}
catch (IOException e1) {
LOG.error("Error encountered trying to prescrub GLCP/LLCP document", e1);
throw new RuntimeException("Error encountered trying to prescrub GLCP/LLCP document", e1);
}
finally {
LineIterator.closeQuietly(inputEntries);
}
if (preScrubberReportData != null) {
preScrubberReportWriterService.setDocumentNumber(documentNumber);
((WrappingBatchService)preScrubberReportWriterService).initialize();
try {
new PreScrubberReport().generateReport(preScrubberReportData, preScrubberReportWriterService);
}
finally {
((WrappingBatchService)preScrubberReportWriterService).destroy();
}
}
BatchSortUtil.sortTextFileWithFields(prescrubOutput, inputFile, new ScrubberSortComparator());
scrubEntries(true, documentNumber);
// delete files
File deleteSortFile = new File(inputFile);
File deleteValidFile = new File(validFile);
File deleteErrorFile = new File(errorFile);
File deleteExpiredFile = new File(expiredFile);
try {
deleteSortFile.delete();
deleteValidFile.delete();
deleteErrorFile.delete();
deleteExpiredFile.delete();
} catch (Exception e){
LOG.error("scrubGroupReportOnly delete output files process Stopped: " + e.getMessage());
throw new RuntimeException("scrubGroupReportOnly delete output files process Stopped: " + e.getMessage(), e);
}
}
private static void processNormalizedInterContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, String chrLarger, ArrayList<DesiredChrContact> contactsToCheck, String resolution, double minValue, TextFile outWriter) throws IOException {
//ReadIn normalization chr1
TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R);
ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList();
inputNormChr1.close();
// System.out.println("Done reading norm factor 1");
//ReadIn normalization chr2
TextFile inputNormChr2 = new TextFile(baseName + "\\chr" + chrLarger + "_" + resolution + "." + normMethod, TextFile.R);
ArrayList<String> normFactorLargerChr = inputNormChr2.readAsArrayList();
inputNormChr2.close();
// System.out.println("Done reading norm factor 2");
if (!Gpio.exists(fileToRead + ".sorted")) {
umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
}
int numberToBeMatched = 0;
LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");
try {
while (it.hasNext()) {
String[] parts = StringUtils.split(it.nextLine(), '\t');
int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);
while (numberToBeMatched < contactsToCheck.size()) {
if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
break;
} else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
break;
}
if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
if (((posChr1 / getNumericResolution(resolution)) + 1) > normFactorSmallerChr.size()) {
System.out.println(baseName);
System.out.println("Smaller");
System.out.println((posChr1 / getNumericResolution(resolution) + 1));
System.out.println(normFactorSmallerChr.size());
System.exit(-1);
}
if (((posChr2 / getNumericResolution(resolution)) + 1) > normFactorLargerChr.size()) {
System.out.println(baseName);
System.out.println("Larger");
System.out.println((posChr2 / getNumericResolution(resolution)) + 1);
System.out.println(normFactorLargerChr.size());
System.exit(-1);
}
String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1);
String factor2Base = normFactorLargerChr.get((posChr2 / getNumericResolution(resolution)) + 1);
double factor1 = 1.0;
double factor2 = 1.0;
if (NumberUtils.isNumber(factor1Base) && NumberUtils.isNumber(factor2Base)) {
factor1 = Double.parseDouble(factor1Base);
factor2 = Double.parseDouble(factor2Base);
} else if (NumberUtils.isNumber(factor1Base)) {
factor1 = Double.parseDouble(factor1Base);
System.out.println("Error in files.");
System.out.println("Base 2 is reset to 1");
} else if (NumberUtils.isNumber(factor2Base)) {
factor2 = Double.parseDouble(factor2Base);
System.out.println("Error in files.");
System.out.println("Base 1 is reset to 1");
}
double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]) / (factor1 * factor2);
if (contact >= minValue) {
outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\tContact\t" + contact + "\t" + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]));
numberToBeMatched++;
} else {
outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
numberToBeMatched++;
}
} else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
numberToBeMatched++;
}
} else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
numberToBeMatched++;
}
}
}
} finally {
LineIterator.closeQuietly(it);
}
}
private Map<Integer, List<Integer>> readMapping(String mappingFilePath) throws IOException {
LineIterator iterator = FileUtils.lineIterator(new File(mappingFilePath));
Map<Integer, List<Integer>> domains = new HashMap<Integer, List<Integer>>();
while (iterator.hasNext()) {
String line = iterator.nextLine();
if (isBlank(line)) {
continue;
}
StringTokenizer st = new StringTokenizer(line, "\t");
String category = null;
int categoryId = -1;
if (st.hasMoreTokens()) {
category = st.nextToken();
com.scienceminer.nerd.kb.model.Category theCategory = wikipedia.getCategoryByTitle(category);
if (theCategory == null)
LOGGER.warn(category + " is not a category found in Wikipedia.");
else {
categoryId = theCategory.getId();
if (domains.get(new Integer(categoryId)) != null) {
LOGGER.warn(category + " is already defined in " + mappingFilePath);
}
}
}
if (categoryId != -1) {
List<Integer> dom = new ArrayList<Integer>();
while (st.hasMoreTokens()) {
String domain = st.nextToken();
if (domain2id.get(domain) == null)
LOGGER.warn(domain + " is an invalid GRISP domain label in " + mappingFilePath);
else {
Integer domainId = domain2id.get(domain);
dom.add(domainId);
}
}
domains.put(new Integer(categoryId), dom);
}
}
LineIterator.closeQuietly(iterator);
return domains;
}
/**
* Generate transactions from set of interesting sequences
*
* @return set of sequences added to transaction
*/
public static HashMap<Sequence, Double> generateTransactionDatabase(
final Map<Sequence, Double> sequences, final int noTransactions,
final File outFile) throws IOException {
// Set random number seeds
final Random random = new Random(1);
final Random randomI = new Random(10);
// Storage for sequences actually added
final HashMap<Sequence, Double> addedSequences = new HashMap<>();
// Set output file
final PrintWriter out = new PrintWriter(outFile, "UTF-8");
// Generate transaction database
int count = 0;
while (count < noTransactions) {
// Generate transaction from distribution
final Transaction transaction = sampleFromDistribution(random,
sequences, addedSequences, randomI);
for (final int item : transaction) {
out.print(item + " -1 ");
}
if (!transaction.isEmpty()) {
out.print("-2");
out.println();
count++;
}
}
out.close();
// Print file to screen
if (VERBOSE) {
final FileReader reader = new FileReader(outFile);
final LineIterator it = new LineIterator(reader);
while (it.hasNext()) {
System.out.println(it.nextLine());
}
LineIterator.closeQuietly(it);
}
return addedSequences;
}
/**
* @see org.kuali.kfs.sys.batch.AbstractWrappedBatchStep#getCustomBatchExecutor()
*/
@Override
protected CustomBatchExecutor getCustomBatchExecutor() {
return new CustomBatchExecutor() {
/**
* @see org.kuali.kfs.sys.batch.service.WrappedBatchExecutorService.CustomBatchExecutor#execute()
*/
public boolean execute() {
StopWatch stopWatch = new StopWatch();
stopWatch.start();
String inputFile = batchFileDirectoryName + File.separator + LaborConstants.BatchFileSystem.BACKUP_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
String outputFile = batchFileDirectoryName + File.separator + LaborConstants.BatchFileSystem.PRE_SCRUBBER_FILE + GeneralLedgerConstants.BatchFileSystem.EXTENSION;
PreScrubberReportData preScrubberReportData = null;
LineIterator oeIterator = null;
try {
oeIterator = FileUtils.lineIterator(new File(inputFile));
preScrubberReportData = laborPreScrubberService.preprocessOriginEntries(oeIterator, outputFile);
}
catch (IOException e) {
LOG.error("IO exception occurred during pre scrubbing.", e);
throw new RuntimeException("IO exception occurred during pre scrubbing.", e);
}
finally {
LineIterator.closeQuietly(oeIterator);
}
if (preScrubberReportData != null) {
((WrappingBatchService) laborPreScrubberReportWriterService).initialize();
new PreScrubberReport().generateReport(preScrubberReportData, laborPreScrubberReportWriterService);
((WrappingBatchService) laborPreScrubberReportWriterService).destroy();
}
stopWatch.stop();
if (LOG.isDebugEnabled()) {
LOG.debug("labor pre-scrubber scrubber step took " + (stopWatch.getTotalTimeSeconds() / 60.0) + " minutes to complete");
}
return true;
}
};
}
private boolean ignoreLine(GraphContext context, FileLocationModel model)
{
boolean isPropertiesFile = model.getFile() instanceof PropertiesModel;
int lineNumber = model.getLineNumber();
LineIterator li = null;
try
{
li = FileUtils.lineIterator(model.getFile().asFile());
int i = 0;
while (li.hasNext())
{
i++;
// read the line to memory only if it is the line of interest
if (i == lineNumber)
{
String line = StringUtils.trim(li.next());
// check that it isn't commented.
if (isPropertiesFile && StringUtils.startsWith(line, "#"))
return true;
// WINDUP-808 - Remove matches with "version" or "revision" on the same line
else if (StringUtils.containsIgnoreCase(line, "version") || StringUtils.containsIgnoreCase(line, "revision"))
return true;
else if (isMavenVersionTag(context, model))
return true;
else
return false;
}
else if (i < lineNumber)
{
// seek
li.next();
}
else if (i > lineNumber)
{
LOG.warning("Did not find line: " + lineNumber + " in file: " + model.getFile().getFileName());
break;
}
}
}
catch (IOException | RuntimeException e)
{
LOG.log(Level.WARNING, "Exception reading properties from file: " + model.getFile().getFilePath(), e);
}
finally
{
LineIterator.closeQuietly(li);
}
return false;
}
/**
* Generate transactions from set of interesting sequences
*
* @return set of sequences added to transaction
*/
public static HashMap<Sequence, Double> generateTransactionDatabase(final Map<Sequence, Double> sequences,
final Table<Sequence, Integer, Double> probabilities, final int noTransactions, final File outFile)
throws IOException {
// Set random number seeds
final Random random = new Random(1);
final Random randomI = new Random(10);
final RandomGenerator randomC = new JDKRandomGenerator();
randomC.setSeed(100);
// Storage for sequences actually added
final HashMap<Sequence, Double> addedSequences = new HashMap<>();
// Set output file
final PrintWriter out = new PrintWriter(outFile, "UTF-8");
// Add to distribution class for easy sampling
final Map<Sequence, EnumeratedIntegerDistribution> dists = new HashMap<>();
for (final Sequence seq : sequences.keySet()) {
final List<Integer> singletons = new ArrayList<>();
final List<Double> probs = new ArrayList<>();
for (final Entry<Integer, Double> entry : probabilities.row(seq).entrySet()) {
singletons.add(entry.getKey());
probs.add(entry.getValue());
}
final EnumeratedIntegerDistribution dist = new EnumeratedIntegerDistribution(randomC,
Ints.toArray(singletons), Doubles.toArray(probs));
dists.put(seq, dist);
}
// Generate transaction database
int count = 0;
while (count < noTransactions) {
// Generate transaction from distribution
final Transaction transaction = sampleFromDistribution(random, sequences, dists, addedSequences, randomI);
for (final int item : transaction) {
out.print(item + " -1 ");
}
if (!transaction.isEmpty()) {
out.print("-2");
out.println();
count++;
}
}
out.close();
// Print file to screen
if (VERBOSE) {
final FileReader reader = new FileReader(outFile);
final LineIterator it = new LineIterator(reader);
while (it.hasNext()) {
System.out.println(it.nextLine());
}
LineIterator.closeQuietly(it);
}
return addedSequences;
}
private static void processRawContactInformation(String fileToRead, double minValue, ArrayList<DesiredChrContact> contactsToCheck, boolean intra) throws IOException {
//Check if sorted version is available
//If not make sorted available.
if (!Gpio.exists(fileToRead + ".sorted")) {
if (intra) {
umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
} else {
umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
}
}
int numberToBeMatched = 0;
LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");
try {
while (it.hasNext()) {
String[] parts = StringUtils.split(it.nextLine(), '\t');
int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);
while (numberToBeMatched < contactsToCheck.size()) {
if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
break;
} else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
break;
}
if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]);
if (contact >= minValue) {
contactsToCheck.get(numberToBeMatched).setContact();
numberToBeMatched++;
} else {
numberToBeMatched++;
}
} else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
numberToBeMatched++;
}
} else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
numberToBeMatched++;
}
}
}
} finally {
LineIterator.closeQuietly(it);
}
}
public static void index(Indexer indexer, String file) {
UriEncodingHandlingSameAsRetriever retriever = new UriEncodingHandlingSameAsRetriever();
LineIterator iterator = null;
long size = 0, rounds = 0;
try {
iterator = FileUtils.lineIterator(new File(file), "UTF-8");
String uri = null;
Set<String> uris;
String old = null;
Date start = Calendar.getInstance().getTime();
// iterate over the lines
while (iterator.hasNext()) {
String[] split = iterator.next().split("\\s+");
if (split.length > 2) {
// get the subject of the triple
uri = split[0];
if (uri.startsWith("<")) {
uri = uri.substring(1);
}
if (uri.endsWith(">")) {
uri = uri.substring(0, uri.length() - 1);
}
// if this subject is new
if (!uri.equals(old)) {
// retrieve other writings of this URI
uris = retriever.retrieveSameURIs(uri);
if (uris != null) {
for (String u : uris) {
indexer.index(u);
}
} else {
indexer.index(uri);
}
}
size++;
if (size % 100000 == 0) {
Date end = Calendar.getInstance().getTime();
rounds++;
String avgTime = DurationFormatUtils
.formatDurationHMS((end.getTime() - start.getTime()) / rounds);
LOGGER.info("Got 100000 entities...(Sum: {}, AvgTime: {})", size, avgTime);
}
}
}
} catch (IOException e) {
LOGGER.error("Exception while reading file. It will be ignored.", e);
} finally {
LineIterator.closeQuietly(iterator);
}
LOGGER.info("Successfully indexed {} triples", size);
}
private static void processNormalizedIntraContactInformation(String fileToRead, String baseName, String normMethod, String chrSmaller, ArrayList<DesiredChrContact> contactsToCheck, String resolution, double minValue, TextFile outWriter) throws IOException {
//ReadIn normalization chr1
TextFile inputNormChr1 = new TextFile(baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R);
ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList();
inputNormChr1.close();
// System.out.println("Done reading norm factor 1");
if (!Gpio.exists(fileToRead + ".sorted")) {
umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted(fileToRead, fileToRead + ".sorted");
}
int numberToBeMatched = 0;
LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8");
try {
while (it.hasNext()) {
String[] parts = StringUtils.split(it.nextLine(), '\t');
int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]);
int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]);
while (numberToBeMatched < contactsToCheck.size()) {
if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
break;
} else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
break;
}
if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1);
String factor2Base = normFactorSmallerChr.get((posChr2 / getNumericResolution(resolution)) + 1);
double factor1;
double factor2;
if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) {
factor1 = org.apache.commons.lang.math.NumberUtils.createDouble(factor1Base);
factor2 = org.apache.commons.lang.math.NumberUtils.createDouble(factor2Base);
double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]) / (factor1 * factor2);
if (contact >= minValue) {
outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\tContact\t" + contact + "\t" + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]));
numberToBeMatched++;
} else {
outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
numberToBeMatched++;
}
} else {
System.out.println("Error in files.");
numberToBeMatched++;
}
} else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) {
outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
numberToBeMatched++;
}
} else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) {
outWriter.writeln(contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-");
numberToBeMatched++;
}
}
}
} finally {
LineIterator.closeQuietly(it);
}
}