下面列出了org.apache.hadoop.io.compress.GzipCodec# createOutputStream ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Test
public void testGzCompressedInput() throws IOException
{
// write gzip-compressed data
GzipCodec codec = new GzipCodec();
PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
fastqOut.write(twoFastq);
fastqOut.close();
// now try to read it
split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoFastq.length(), null);
FastqRecordReader reader = new FastqRecordReader(conf, split);
boolean retval = reader.next(key, fragment);
assertTrue(retval);
assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString());
retval = reader.next(key, fragment);
assertTrue(retval);
assertEquals("ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1", key.toString());
assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString());
}
@Test
public void testGzCompressedInput() throws IOException
{
// write gzip-compressed data
GzipCodec codec = new GzipCodec();
PrintWriter qseqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
qseqOut.write(twoQseq);
qseqOut.close();
// now try to read it
split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoQseq.length(), null);
QseqRecordReader reader = new QseqRecordReader(conf, split);
boolean retval = reader.next(key, fragment);
assertTrue(retval);
assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString());
retval = reader.next(key, fragment);
assertTrue(retval);
assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString());
assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString());
}
private EmoSplitInputStream(String table, String split)
throws IOException {
if (isEmptySplit(split)) {
_rows = Iterators.emptyIterator();
} else {
// Get the DataStore and begin streaming the split's rows.
CloseableDataStore dataStore = HadoopDataStoreManager.getInstance().getDataStore(_uri, _apiKey, _metricRegistry);
_closer.register(dataStore);
_rows = DataStoreStreaming.getSplit(dataStore, table, split, false, ReadConsistency.STRONG).iterator();
}
_buffer.clear();
_buffer.limit(0);
GzipCodec gzipCodec = new GzipCodec();
gzipCodec.setConf(new Configuration());
// Set up the pipes
PipedOutputStream pipeRawToGzip = new PipedOutputStream();
_gzipIn = new PipedInputStream(pipeRawToGzip, 10 * 1024 * 1024);
_rawOut = gzipCodec.createOutputStream(pipeRawToGzip);
_closer.register(_gzipIn);
_closer.register(pipeRawToGzip);
// Start the asynchronous buffering thread
_bufferThread = new Thread(new Runnable() {
@Override
public void run() {
streamAndCompressInput();
}
});
_bufferThread.start();
}
@Test(expected=RuntimeException.class)
public void testCompressedSplit() throws IOException
{
// write gzip-compressed data
GzipCodec codec = new GzipCodec();
PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
fastqOut.write(twoFastq);
fastqOut.close();
// now try to read it starting from the middle
split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoFastq.length(), null);
FastqRecordReader reader = new FastqRecordReader(conf, split);
}
@Test(expected=RuntimeException.class)
public void testCompressedSplit() throws IOException
{
// write gzip-compressed data
GzipCodec codec = new GzipCodec();
PrintWriter qseqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
qseqOut.write(twoQseq);
qseqOut.close();
// now try to read it starting from the middle
split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoQseq.length(), null);
QseqRecordReader reader = new QseqRecordReader(conf, split);
}
@Override
public void writeTestData(File file, int recordCounts, int columnCount,
String colSeparator) throws IOException {
// write random test data
GzipCodec gzipCodec = new GzipCodec();
CompressionOutputStream out = gzipCodec
.createOutputStream(new FileOutputStream(file));
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
out));
try {
for (int r = 0; r < recordCounts; r++) {
// foreach row write n columns
for (int c = 0; c < columnCount; c++) {
if (c != 0) {
writer.append(colSeparator);
}
writer.append(String.valueOf(Math.random()));
}
writer.append("\n");
}
} finally {
writer.close();
out.close();
}
}