org.apache.hadoop.fs.Seekable#htsjdk.samtools.util.BlockCompressedInputStream源码实例Demo

下面列出了org.apache.hadoop.fs.Seekable#htsjdk.samtools.util.BlockCompressedInputStream 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: rtg-tools   文件: SamUtils.java
/**
 * @param file the file to check.
 * @return true if this looks like a BAM file.
 * @throws IOException if an IO Error occurs
 */
public static boolean isBAMFile(final File file) throws IOException {
  final boolean result;
  try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file))) {
    if (!BlockCompressedInputStream.isValidFile(bis)) {
      return false;
    }
    final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE;
    bis.mark(buffSize);
    final byte[] buffer = new byte[buffSize];
    final int len = IOUtils.readAmount(bis, buffer, 0, buffSize);
    bis.reset();
    final byte[] magicBuf = new byte[4];
    final int magicLength = IOUtils.readAmount(new BlockCompressedInputStream(new ByteArrayInputStream(buffer, 0, len)), magicBuf, 0, 4);
    //checks we read 4 bytes and they were "BAM\1" in ascii
    result = magicLength == 4 && Arrays.equals(new byte[]{(byte) 66, (byte) 65, (byte) 77, (byte) 1}, magicBuf);

  }
  return result;
}
 
源代码2 项目: rtg-tools   文件: TabixIndexMerge.java
/**
 * Merge indexes for files that will be concatenated.
 * @param output output index file
 * @param files <code>tabix</code> index files
 * @param dataFileSizes file size of corresponding data files
 * @throws IOException if an IO error occurs
 */
public static void mergeTabixFiles(File output, List<File> files, List<Long> dataFileSizes) throws IOException {
  long pointerAdjust = 0;
  final SequenceIndex[][] indexesSquared = new SequenceIndex[files.size()][];
  final String[][] sequenceNames = new String[files.size()][];
  TabixHeader mergedHeader = null;
  for (int i = 0; i < files.size(); ++i) {
    final File tbiFile = files.get(i);
    try (BlockCompressedInputStream bcis = new BlockCompressedInputStream(tbiFile)) {
      final TabixHeader th = TabixHeader.readHeader(bcis);
      sequenceNames[i] = th.getSequenceNamesUnpacked();
      if (mergedHeader != null) {
        mergedHeader = TabixHeader.mergeHeaders(mergedHeader, th);
      } else {
        mergedHeader = th;
      }
      indexesSquared[i] = loadFileIndexes(bcis, th.getNumSequences(), pointerAdjust);
    }
    pointerAdjust += dataFileSizes.get(i);
  }
  final List<SequenceIndex> indexes = collapseIndexes(indexesSquared, sequenceNames);
  TabixIndexer.mergeChunks(indexes);
  try (BlockCompressedOutputStream fos = new BlockCompressedOutputStream(output)) {
    TabixIndexer.writeIndex(indexes, mergedHeader.getOptions(), Arrays.asList(mergedHeader.getSequenceNamesUnpacked()), fos);
  }
}
 
源代码3 项目: rtg-tools   文件: TabixIndexer.java
/**
 * Creates a <code>TABIX</code> index for given BED file and saves it.
 * @throws IOException if an IO Error occurs.
 * @throws UnindexableDataException If data cannot be indexed because of properties of the data
 */
public void saveBedIndex() throws IOException, UnindexableDataException {
  int skip = 0;
  //by setting true we indicate we want to be able to read at least a portion from the start of the inputstream without affecting the main run
  mInputHandler.start(true);
  try {
    final BlockCompressedLineReader bcli = new BlockCompressedLineReader(new BlockCompressedInputStream(mInputHandler.getInputStream()));
    String line;
    while ((line = bcli.readLine()) != null) {
      if (BED_SKIP_LINES.matcher(line).matches()) {
        ++skip;
      } else {
        break;
      }
    }
  } finally {
    //resets the stream
    mInputHandler.close();
  }
  saveIndex(new BedIndexerFactory(skip));
}
 
源代码4 项目: rtg-tools   文件: VcfMergeTest.java
public void checkMerge(String id, String resourcea, String resourceb, String... argsIn) throws Exception {
  try (final TestDirectory dir = new TestDirectory("vcfmerge")) {
    final File snpsA = BgzipFileHelper.bytesToBgzipFile(FileHelper.resourceToString("com/rtg/vcf/resources/" + resourcea).getBytes(), new File(dir, "fileA.vcf.gz"));
    new TabixIndexer(snpsA, TabixIndexer.indexFileName(snpsA)).saveVcfIndex();
    final File snpsB = BgzipFileHelper.bytesToBgzipFile(FileHelper.resourceToString("com/rtg/vcf/resources/" + resourceb).getBytes(), new File(dir, "fileB.vcf.gz"));
    new TabixIndexer(snpsB, TabixIndexer.indexFileName(snpsB)).saveVcfIndex();
    final File output = new File(dir, "out.vcf.gz");
    final String out = checkMainInit(Utils.append(argsIn, "-o", output.toString(), "--stats", snpsA.toString(), snpsB.toString())).out();
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(output));
    assertTrue(new File(dir, output.getName() + ".tbi").isFile());
    mNano.check("vcfmerge_out_" + id + ".vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(output)), false);
    mNano.check("vcfmerge_stats_" + id + ".txt", out);

    final File inlist = new File(dir, "infiles.txt");
    FileUtils.stringToFile(snpsA.getAbsolutePath() + StringUtils.LS + snpsB.getAbsolutePath() + StringUtils.LS, inlist);
    final File output2 = new File(dir, "out2.vcf.gz");
    checkMainInit(Utils.append(argsIn, "-o", output2.toString(), "--stats", "-I", inlist.toString()));
    mNano.check("vcfmerge_out_" + id + ".vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(output2)), false);
    mNano.check("vcfmerge_stats_" + id + ".txt", out);
  }
}
 
源代码5 项目: rtg-tools   文件: MendeliannessCheckerTest.java
public void testOptions() throws IOException {
  try (TestDirectory dir = new TestDirectory("mendelianness")) {
    final File sdf = ReaderTestUtils.getDNADir(">chr21\nacgt", dir);
    final File file1 = FileHelper.resourceToFile("com/rtg/vcf/mendelian/resources/merge.vcf", new File(dir, "merge.vcf"));
    final File inconsistent = new File(dir, "failed.vcf.gz");
    final File consistent = new File(dir, "nonfailed.vcf.gz");
    final File annot = new File(dir, "checked.vcf.gz");
    final MainResult res = MainResult.run(getCli(), "-t", sdf.getPath(), "-i", file1.getPath(), "--all-records", "--output", annot.getPath(), "--output-inconsistent", inconsistent.getPath(), "--output-consistent", consistent.getPath());
    assertEquals(res.err(), 0, res.rc());
    final String s = res.out().replaceAll("Checking: [^\n]*\n", "Checking: \n");
    mNano.check("mendelian.out.txt", s);
    mNano.check("mendelian.annotated.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(annot)));
    mNano.check("mendelian.inconsistent.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(inconsistent)));
    mNano.check("mendelian.consistent.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(consistent)));
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(inconsistent));
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(consistent));

    final MainResult res2 = MainResult.run(getCli(), "-t", sdf.getPath(), "-i", file1.getPath());
    assertEquals(0, res2.rc());
    final String s3 = res2.out().replaceAll("Checking: [^\n]*\n", "Checking: \n");
    //System.err.println(s3);
    mNano.check("mendelian2.out.txt", s3);
  }

}
 
源代码6 项目: rtg-tools   文件: VcfDecomposerCliTest.java
private void runResourceTest(String inResourceLoc, String expResourceLoc, boolean useRef, String... extrArgs) throws IOException {
  try (TestDirectory dir = new TestDirectory()) {
    final File in = FileHelper.resourceToFile(inResourceLoc, new File(dir, new File(Resources.getResource(inResourceLoc).getFile()).getName()));
    final File out = new File(dir, "out.vcf.gz");
    String[] args = {
      "-i", in.getPath(), "-o", out.getPath()
    };
    if (useRef) {
      final File sdf = ReaderTestUtils.getDNASubDir(REF, dir);
      args = Utils.append(args, "-t", sdf.getPath());
    }
    args = Utils.append(args, extrArgs);
    final String output = checkMainInitOk(args);
    mNano.check(expResourceLoc + ".txt", output, true);

    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out));

    final String o = StringUtils.grep(FileHelper.gzFileToString(out), "^[^#]").replaceAll("[\r\n]+", "\n");
    mNano.check(expResourceLoc, o, true);
  }
}
 
源代码7 项目: rtg-tools   文件: TabixIndexMergeTest.java
public void testSam() throws Exception {
  final File dir = FileUtils.createTempDir("indexmerge", "test");
  try {
    final ArrayList<File> files = new ArrayList<>();
    final ArrayList<Long> dataFileSizes = new ArrayList<>();
    for (int i = 1; i <= 4; ++i) {
      final String samFileName = String.format(SAM_FILES, i);
      final File samFile = new File(dir, samFileName);
      final File tbiFile = new File(dir, samFileName + ".tbi");
      FileHelper.resourceToFile(String.format("%s/%s", SAM_RESOURCE, samFileName), samFile);
      FileHelper.resourceToFile(String.format("%s/%s.tbi", SAM_RESOURCE, samFileName), tbiFile);
      files.add(tbiFile);
      dataFileSizes.add(samFile.length());
    }
    final File mergedIndex = new File(dir, "merged.sam.gz.tbi");
    TabixIndexMerge.mergeTabixFiles(mergedIndex, files, dataFileSizes);
    try (InputStream fis = new BlockCompressedInputStream(new FileInputStream(mergedIndex))) {
      final String indexDebug = IndexTestUtils.tbiIndexToUniqueString(fis);
      mNano.check("merged.sam.gz.tbi.debug", indexDebug);
    }
  } finally {
    assertTrue(FileHelper.deleteAll(dir));
  }
}
 
源代码8 项目: rtg-tools   文件: TabixHeaderTest.java
public void test() throws IOException {
  final TabixHeader th1;
  try (BlockCompressedInputStream is = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/tabix/resources/tabixmerge1.sam.gz.tbi"))) {
    th1 = TabixHeader.readHeader(is);
    assertEquals(4, th1.getNumSequences());
    checkOptions(th1.getOptions());
    assertTrue(Arrays.equals(new String[]{"simulatedSequence1", "simulatedSequence2", "simulatedSequence3", "simulatedSequence4"}, th1.getSequenceNamesUnpacked()));
  }
  final TabixHeader th2;
  try (BlockCompressedInputStream is2 = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/tabix/resources/tabixmerge2.sam.gz.tbi"))) {
    th2 = TabixHeader.readHeader(is2);
    assertEquals(5, th2.getNumSequences());
    checkOptions(th2.getOptions());
    assertTrue(Arrays.equals(new String[]{"simulatedSequence4", "simulatedSequence5", "simulatedSequence6", "simulatedSequence7", "simulatedSequence8"}, th2.getSequenceNamesUnpacked()));
    final TabixHeader merged = TabixHeader.mergeHeaders(th1, th2);
    assertEquals(8, merged.getNumSequences());
    checkOptions(th2.getOptions());
    assertTrue(Arrays.equals(new String[]{"simulatedSequence1", "simulatedSequence2", "simulatedSequence3", "simulatedSequence4", "simulatedSequence5", "simulatedSequence6", "simulatedSequence7", "simulatedSequence8"}, merged.getSequenceNamesUnpacked()));
  }
}
 
源代码9 项目: rtg-tools   文件: BlockCompressedLineReaderTest.java
public void test() throws IOException {
  final File dir = FileUtils.createTempDir("bclr", "test");
  try {
    final File sam = FileHelper.resourceToFile("com/rtg/sam/resources/readerWindow1.sam.gz", new File(dir, "readerWindow1.sam.gz"));
    try (BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(sam))) {
      final long firstSeekPos = (44947L << 16) | 22870;
      bclr.seek(firstSeekPos);
      assertEquals(firstSeekPos, bclr.getFilePointer());
      final String line = bclr.readLine();
      assertTrue(line.startsWith("857\t147\tsimulatedSequence2\t32834"));
      assertEquals(firstSeekPos, bclr.getLineFilePointer());
      assertEquals(firstSeekPos + line.length() + 1, bclr.getFilePointer());
      final String line2 = bclr.readLine();
      assertTrue(line2.startsWith("251\t99\tsimulatedSequence2\t33229"));
      assertEquals((int) '9', bclr.peek());
      final String line3 = bclr.readLine();
      assertTrue(line3.startsWith("91\t163\tsimulatedSequence2\t33238"));
      assertEquals(3, bclr.getLineNumber());
    }
  } finally {
    assertTrue(FileHelper.deleteAll(dir));
  }
}
 
源代码10 项目: rtg-tools   文件: BlockCompressedLineReaderTest.java
public void testLinearRead() throws IOException {
  final File dir = FileUtils.createTempDir("bclr", "test");
  try {
    final File sam = FileHelper.resourceToFile("com/rtg/sam/resources/readerWindow1.sam.gz", new File(dir, "readerWindow1.sam.gz"));
    try (BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(sam))) {
      try (BufferedReader br = new BufferedReader(new InputStreamReader(GzipUtils.createGzipInputStream(new FileInputStream(sam))))) {
        String lineA;
        String lineB;
        while (true) {
          lineA = br.readLine();
          lineB = bclr.readLine();
          if (lineA == null || lineB == null) {
            break;
          }
          assertEquals(lineA, lineB);
        }
        assertNull(lineA);
        assertNull(lineB);
      }
    }
  } finally {
    assertTrue(FileHelper.deleteAll(dir));
  }
}
 
源代码11 项目: rtg-tools   文件: SamPositionReaderTest.java
public void testSomeMethod() throws IOException {
  try (InputStream is = Resources.getResourceAsStream("com/rtg/sam/resources/mixed.sam.gz")) {
    try (SamPositionReader spr = new SamPositionReader(new BlockCompressedLineReader(new BlockCompressedInputStream(is)), 0)) {
      int ref = 0;
      int i = 0;
      while (spr.hasNext()) {
        spr.next();
        if (i >= ENTRIES[ref]) {
          i = 0;
          ++ref;
        }
        assertEquals(EXP_REF_NAME[ref], spr.getReferenceName());
        assertEquals(ref, spr.getReferenceId());
        assertEquals(START[ref][i], spr.getStartPosition());
        assertEquals(LENGTH[ref][i], spr.getLengthOnReference());
        assertEquals(BINS[ref], spr.getBinNum());
        assertEquals(VIRTUAL_OFFSETS[ref][i], spr.getVirtualOffset());
        assertEquals(VIRTUAL_OFFSET_ENDS[ref][i], spr.getNextVirtualOffset());
        assertTrue(spr.hasReference());
        assertTrue(spr.hasCoordinates());
        assertFalse(spr.isUnmapped());
        ++i;
      }
    }
  }
}
 
源代码12 项目: picard   文件: BclReader.java
public int seek(final List<File> files, final TileIndex tileIndex, final int currentTile) {
    int count = 0;
    int numClustersInTile = 0;
    for (final InputStream inputStream : streams) {
        final TileIndex.TileIndexRecord tileIndexRecord = tileIndex.findTile(currentTile);
        final BclIndexReader bclIndexReader = new BclIndexReader(files.get(count));
        final long virtualFilePointer = bclIndexReader.get(tileIndexRecord.getZeroBasedTileNumber());
        if (!(inputStream instanceof BlockCompressedInputStream)) {
            throw new UnsupportedOperationException("Seeking only allowed on bzgf");
        } else {
            try {
                if (tileIndex.getNumTiles() != bclIndexReader.getNumTiles()) {
                    throw new PicardException(String.format("%s.getNumTiles(%d) != %s.getNumTiles(%d)",
                            tileIndex.getFile().getAbsolutePath(), tileIndex.getNumTiles(), bclIndexReader.getBciFile().getAbsolutePath(), bclIndexReader.getNumTiles()));
                }
                ((BlockCompressedInputStream) inputStream).seek(virtualFilePointer);
                numClustersInTile = tileIndexRecord.getNumClustersInTile();
            } catch (final IOException e) {
                throw new PicardException("Problem seeking to " + virtualFilePointer, e);
            }
        }
        count++;
    }
    return numClustersInTile;
}
 
源代码13 项目: picard   文件: CheckTerminatorBlock.java
@Override protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    try {
        final FileTermination term = BlockCompressedInputStream.checkTermination(INPUT);
        System.err.println(term.name());
        if (term == FileTermination.DEFECTIVE) {
            return 100;
        }
        else {
            return 0;
        }
    }
    catch (IOException ioe) {
        throw new PicardException("Exception reading terminator block of file: " + INPUT.getAbsolutePath());
    }
}
 
源代码14 项目: cramtools   文件: BGZF_ReferenceSequenceFile.java
public BGZF_ReferenceSequenceFile(File file) throws FileNotFoundException {
	if (!file.canRead())
		throw new RuntimeException("Cannot find or read fasta file: " + file.getAbsolutePath());

	File indexFile = new File(file.getAbsolutePath() + ".fai");
	if (!indexFile.canRead())
		throw new RuntimeException("Cannot find or read fasta index file: " + indexFile.getAbsolutePath());

	Scanner scanner = new Scanner(indexFile);
	int seqID = 0;
	dictionary = new SAMSequenceDictionary();
	while (scanner.hasNextLine()) {
		String line = scanner.nextLine();
		FAIDX_FastaIndexEntry entry = FAIDX_FastaIndexEntry.fromString(seqID++, line);
		index.put(entry.getName(), entry);
		dictionary.addSequence(new SAMSequenceRecord(entry.getName(), entry.getLen()));
	}
	scanner.close();

	if (index.isEmpty())
		log.warn("No entries in the index: " + indexFile.getAbsolutePath());

	is = new BlockCompressedInputStream(new SeekableFileStream(file));
}
 
源代码15 项目: cramtools   文件: BGZF_FastaIndexer.java
public static void main(String[] args) throws IOException {
	Params params = new Params();
	JCommander jc = new JCommander(params);
	jc.parse(args);

	for (File file : params.files) {
		log.info("Indexing file: " + file.getAbsolutePath());
		BlockCompressedInputStream bcis = new BlockCompressedInputStream(new SeekableFileStream(file));
		bcis.available();
		BGZF_FastaIndexer mli = new BGZF_FastaIndexer(bcis);

		PrintWriter writer = new PrintWriter(file.getAbsolutePath() + ".fai");

		FAIDX_FastaIndexEntry e;
		while (!writer.checkError() && (e = mli.readNext()) != null)
			writer.println(e);

		writer.close();
	}
}
 
源代码16 项目: rtg-tools   文件: SamMultiRestrictingIterator.java
SamMultiRestrictingIterator(BlockCompressedInputStream stream, VirtualOffsets offsets, SequencesReader reference, SAMFileHeader header, SamReader.Type type, String label) throws IOException {
  mStream = stream;
  mOffsets = offsets;
  mHeader = header;
  mReference = reference;
  mType = type;
  mLabel = label;

  mCurrentIt = null;
  mCurrentOffset = 0;
  mCurrentTemplate = -1;

  // Set up for first region and if it has no data, skip ahead to find one that does
  populateNext(true);
}
 
源代码17 项目: rtg-tools   文件: TabixHeader.java
static TabixHeader readHeader(BlockCompressedInputStream is) throws IOException {
  final byte[] fixedData = new byte[FIXED_SIZE];
  IOUtils.readFully(is, fixedData, 0, FIXED_SIZE);
  final int numberReferences = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 4);
  final int format = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 8);
  final int seqCol = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 12) - 1;
  final int begCol = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 16) - 1;
  final int endCol = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 20) - 1;
  final int meta = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 24);
  final int skip = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 28);
  final int sequenceNameLength = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 32);
  final byte[] sequenceNames = new byte[sequenceNameLength];
  IOUtils.readFully(is, sequenceNames, 0, sequenceNameLength);
  return new TabixHeader(numberReferences, new TabixIndexer.TabixOptions(format, seqCol, begCol, endCol, meta, skip), sequenceNames);
}
 
源代码18 项目: rtg-tools   文件: TabixLineReader.java
SingleRestrictionLineReader(File input, TabixIndexReader tir) throws IOException {
  mSequence = null;
  mBeg = -1;
  mEnd = -1;
  final BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(input));
  mBCPositionReader = tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF ? new VcfPositionReader(bclr, tir.getOptions().mSkip) : new GenericPositionReader(bclr, tir.getOptions());
  mRange = new VirtualOffsets(0, 0xFFFFFFFFFFFFFFFFL, null);
}
 
源代码19 项目: rtg-tools   文件: TabixLineReader.java
SingleRestrictionLineReader(File input, TabixIndexReader tir, RegionRestriction region) throws IOException {
  if (region == null) {
    throw new NullPointerException();
  }
  mSequence = region.getSequenceName();
  mBeg = region.getStart();
  mEnd = region.getEnd();
  final BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(input));
  mBCPositionReader = tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF ? new VcfPositionReader(bclr, tir.getOptions().mSkip) : new GenericPositionReader(bclr, tir.getOptions());
  mRange = tir.getFilePointers(region);
  if (mRange != null) {
    mBCPositionReader.seek(mRange.start(0));
  }
}
 
源代码20 项目: rtg-tools   文件: TabixLineReader.java
MultiRestrictionLineReader(File input, TabixIndexReader tir, ReferenceRanges<String> ranges) throws IOException {
  if (ranges == null) {
    throw new NullPointerException();
  }
  //Diagnostic.developerLog("Creating MultiRestrictionLineReader");
  final BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(new ClosedFileInputStream(input)));
  mReader = tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF ? new VcfPositionReader(bclr, tir.getOptions().mSkip) : new GenericPositionReader(bclr, tir.getOptions());
  final VirtualOffsets offsets = tir.getFilePointers(ranges);
  mOffsets = offsets == null ? new VirtualOffsets() : offsets;
  mSequenceLookup = tir.mSequenceLookup;
  populateNext(true);
}
 
源代码21 项目: rtg-tools   文件: BlockCompressedLineReader.java
/**
 * @param stream create reader from given stream
 */
public BlockCompressedLineReader(BlockCompressedInputStream stream) {
  mLineNumber = 0;
  mStream = stream;
  mLineFilePointer = 0;
  mFilePointer = 0;
  mPos = 0;
  mBufferUsed = 0;
  mLineBufferUsed = 0;
  mInit = false;
}
 
源代码22 项目: rtg-tools   文件: ExtractCli.java
private void extractHeader(File input, char metaChar, OutputStream out) throws IOException {
  try (BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(input))) {
    String line;
    while ((line = bclr.readLine()) != null && (line.length() == 0 || line.charAt(0) == metaChar)) {
      out.write((line + StringUtils.LS).getBytes());
    }
  }
}
 
源代码23 项目: rtg-tools   文件: TabixIndexer.java
/**
 * check if given file is block compressed
 * @param file file to check
 * @return true iff file is block compressed
 * @throws IOException if an IO error occurs
 */
public static boolean isBlockCompressed(File file) throws IOException {
  final boolean result;
  try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file))) {
    result = BlockCompressedInputStream.isValidFile(bis);
  }
  return result;
}
 
源代码24 项目: rtg-tools   文件: BedReader.java
private BedReader(TabixLineReader reader, File bedFile, int minAnnotations) throws IOException {
  mMinAnnotations = minAnnotations;
  mIn = reader;
  try (BrLineReader headerReader = new BrLineReader(new BufferedReader(new InputStreamReader(new BlockCompressedInputStream(bedFile))))) {
    mHeader = parseHeader(headerReader);
  }
  setNext();
}
 
源代码25 项目: rtg-tools   文件: VcfFilterCliTest.java
private void runResourceTest(String inResourceLoc, String expResourceLoc, String... extraArgs) throws IOException {
  try (TestDirectory dir = new TestDirectory()) {
    final File in = FileHelper.resourceToFile(inResourceLoc, new File(dir, new File(Resources.getResource(inResourceLoc).getFile()).getName()));
    final File out = new File(dir, "out.vcf.gz");
    final String output = checkMainInitOk(Utils.append(extraArgs, "-i", in.getPath(), "-o", out.getPath()));
    mNano.check(expResourceLoc + ".txt", output, true);

    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out));

    final String o = StringUtils.grep(FileHelper.gzFileToString(out), "^[^#]").replaceAll("[\r\n]+", "\n");
    mNano.check(expResourceLoc, o, true);
  }
}
 
源代码26 项目: rtg-tools   文件: VcfAnnotatorCliTest.java
public void testNanoVcfIds() throws IOException {
  try (final TestDirectory dir = new TestDirectory()) {
    final File inVcf = FileUtils.stringToFile(mNano.loadReference("snpAnnotate_small.vcf"), new File(dir, "input.vcf"));
    final File idVcf = FileUtils.stringToFile(mNano.loadReference("snpAnnotate_small_ids_vcf.vcf"), new File(dir, "id.vcf"));
    final File outFile = new File(dir, "output.vcf.gz");

    final String str = checkMainInitOk("-i", inVcf.getPath(), "--vcf-ids", idVcf.getPath(), "-o", outFile.getPath(), "--fill-an-ac", "--annotation", "NAA,ZY,PD");
    assertEquals("", str);
    assertTrue(outFile.isFile());
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(outFile));
    final String actual = StringUtils.grep(FileHelper.gzFileToString(outFile), "^[^#]").replaceAll("[\r\n]+", "\n");
    mNano.check("snpAnnotate_small_vcf_ids_exp.vcf", actual, false);
  }
}
 
源代码27 项目: rtg-tools   文件: VcfSubsetTest.java
public void testKeepInfoACAN() throws Exception {
  try (TestDirectory td = new TestDirectory()) {
    final File f = FileHelper.resourceToGzFile("com/rtg/vcf/resources/vcfsubset.vcf", new File(td, "vcf.vcf.gz"));
    final File out = new File(td, "out.vcf.gz");

    checkMainInitOk("-i", f.getPath(), "-o", out.getPath(), "--keep-info", "AC", "--keep-info", "AN");
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out));
    mNano.check("vcfsubset-keepinfoACAN.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(out)));
  }
}
 
源代码28 项目: rtg-tools   文件: VcfSubsetTest.java
public void testKeepFilter() throws Exception {
  try (TestDirectory td = new TestDirectory()) {
    final File f = FileHelper.resourceToGzFile("com/rtg/vcf/resources/vcfsubset.vcf", new File(td, "vcf.vcf.gz"));
    final File out = new File(td, "out.vcf.gz");
    checkMainInitOk("-i", f.getPath(), "-o", out.getPath(), "--keep-filter", "YEA");
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out));
    mNano.check("vcfsubset-keepfilter.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(out)));

    final File out2 = new File(td, "out2.vcf.gz");
    checkMainInitOk("-i", f.getPath(), "-o", out2.getPath(), "--keep-filter", "PASS");
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out2));
    mNano.check("vcfsubset-keepfilter-pass.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(out2)));
  }
}
 
源代码29 项目: rtg-tools   文件: GenericPositionReaderTest.java
private static GenericPositionReader makeGpr(String contents) throws IOException {
  final ByteArrayOutputStream baos = new ByteArrayOutputStream();
  try (final BlockCompressedOutputStream out = new BlockCompressedOutputStream(baos, (File) null)) {
    out.write(contents.getBytes());
  }
  final ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
  return new GenericPositionReader(new BlockCompressedLineReader(new BlockCompressedInputStream(bais)), new TabixIndexer.TabixOptions(TabixIndexer.TabixOptions.FORMAT_GENERIC, 0, 1, 1, '#', 0, false));
}
 
源代码30 项目: rtg-tools   文件: TabixIndexerTest.java
public void test() throws Exception {
  final File index = file("index");
  final File sam = FileHelper.resourceToFile("com/rtg/sam/resources/test.sam.gz", file("test.sam.gz"));
  new TabixIndexer(sam, index).saveSamIndex();
  final String myBai = IndexTestUtils.tbiIndexToUniqueString(new BlockCompressedInputStream(lazyStream(index)));
  final String exp;
  try (InputStream baiIs = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/sam/resources/test.sam.gz.tbi"))) {
    exp = IndexTestUtils.tbiIndexToUniqueString(baiIs);
  }

  assertEquals(exp, myBai);
}