下面列出了org.apache.hadoop.fs.Seekable#org.apache.hadoop.io.compress.Decompressor 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public Decompressor getDecompressor() throws IOException {
CompressionCodec codec = getCodec();
if (codec != null) {
Decompressor decompressor = CodecPool.getDecompressor(codec);
if (decompressor != null) {
if (decompressor.finished()) {
// Somebody returns the decompressor to CodecPool but is still using
// it.
LOG.warn("Deompressor obtained from CodecPool already finished()");
} else {
if(LOG.isDebugEnabled()) {
LOG.debug("Got a decompressor: " + decompressor.hashCode());
}
}
/**
* Following statement is necessary to get around bugs in 0.18 where a
* decompressor is referenced after returned back to the codec pool.
*/
decompressor.reset();
}
return decompressor;
}
return null;
}
@Test
public void testZlibCompressorDecompressorSetDictionary() {
Configuration conf = new Configuration();
conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
if (ZlibFactory.isNativeZlibLoaded(conf)) {
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
checkSetDictionaryNullPointerException(zlibCompressor);
checkSetDictionaryNullPointerException(zlibDecompressor);
checkSetDictionaryArrayIndexOutOfBoundsException(zlibDecompressor);
checkSetDictionaryArrayIndexOutOfBoundsException(zlibCompressor);
} else {
assertTrue("ZlibFactory is using native libs against request",
ZlibFactory.isNativeZlibLoaded(conf));
}
}
@Test
public void testZlibCompressorDecompressorWithConfiguration() {
Configuration conf = new Configuration();
conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
if (ZlibFactory.isNativeZlibLoaded(conf)) {
byte[] rawData;
int tryNumber = 5;
int BYTE_SIZE = 10 * 1024;
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
rawData = generate(BYTE_SIZE);
try {
for (int i = 0; i < tryNumber; i++)
compressDecompressZlib(rawData, (ZlibCompressor) zlibCompressor,
(ZlibDecompressor) zlibDecompressor);
zlibCompressor.reinit(conf);
} catch (Exception ex) {
fail("testZlibCompressorDecompressorWithConfiguration ex error " + ex);
}
} else {
assertTrue("ZlibFactory is using native libs against request",
ZlibFactory.isNativeZlibLoaded(conf));
}
}
@Test
public void testZlibCompressorDecompressorSetDictionary() {
Configuration conf = new Configuration();
conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
if (ZlibFactory.isNativeZlibLoaded(conf)) {
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
checkSetDictionaryNullPointerException(zlibCompressor);
checkSetDictionaryNullPointerException(zlibDecompressor);
checkSetDictionaryArrayIndexOutOfBoundsException(zlibDecompressor);
checkSetDictionaryArrayIndexOutOfBoundsException(zlibCompressor);
} else {
assertTrue("ZlibFactory is using native libs against request",
ZlibFactory.isNativeZlibLoaded(conf));
}
}
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
public InputStream openFile(Path path) throws IOException {
CompressionCodec codec=compressionCodecs.getCodec(path);
FSDataInputStream fileIn=fs.open(path);
// check if compressed
if (codec==null) { // uncompressed
LOG.debug("Reading from an uncompressed file \""+path+"\"");
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
LOG.debug("Reading from a compressed file \""+path+"\" with splittable compression codec");
long end = fs.getFileStatus(path).getLen();
return ((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
} else {
LOG.debug("Reading from a compressed file \""+path+"\" with non-splittable compression codec");
return codec.createInputStream(fileIn,decompressor);
}
}
}
public Decompressor getDecompressor() throws IOException {
CompressionCodec codec = getCodec();
if (codec != null) {
Decompressor decompressor = CodecPool.getDecompressor(codec);
if (decompressor != null) {
if (decompressor.finished()) {
// Somebody returns the decompressor to CodecPool but is still using
// it.
LOG.warn("Deompressor obtained from CodecPool already finished()");
} else {
LOG.debug("Got a decompressor: " + decompressor.hashCode());
}
/**
* Following statement is necessary to get around bugs in 0.18 where a
* decompressor is referenced after returned back to the codec pool.
*/
decompressor.reset();
}
return decompressor;
}
return null;
}
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(conf).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
private InputStream openFile(Path path) throws IOException {
CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
// check if compressed
if (codec==null) { // uncompressed
return fileIn;
} else { // compressed
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.openDecompressors.add(decompressor); // to be returned later using close
if (codec instanceof SplittableCompressionCodec) {
long end = dfsCluster.getFileSystem().getFileStatus(path).getLen();
final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
return cIn;
} else {
return codec.createInputStream(fileIn,decompressor);
}
}
}
public static BufferedReader getBufferedReader(File file, MapredContext context)
throws IOException {
URI fileuri = file.toURI();
Path path = new Path(fileuri);
Configuration conf = context.getJobConf();
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(path);
if (codec == null) {
return new BufferedReader(new FileReader(file));
} else {
Decompressor decompressor = CodecPool.getDecompressor(codec);
FileInputStream fis = new FileInputStream(file);
CompressionInputStream cis = codec.createInputStream(fis, decompressor);
BufferedReader br = new BufferedReaderExt(new InputStreamReader(cis), decompressor);
return br;
}
}
public RBlockState(Algorithm compressionAlgo, FSDataInputStream fsin, BlockRegion region, Configuration conf, Reader r) throws IOException
{
this.compressAlgo = compressionAlgo;
Decompressor decompressor = compressionAlgo.getDecompressor();
this.region = region;
try {
InputStream in = compressAlgo.createDecompressionStream(new BoundedRangeFileInputStream(fsin, region.getOffset(), region.getCompressedSize()), decompressor, DTFile.getFSInputBufferSize(conf));
int l = 1;
r.baos.reset();
byte[] buf = new byte[DTFile.getFSInputBufferSize(conf)];
while (l >= 0) {
l = in.read(buf);
if (l > 0) {
r.baos.write(buf, 0, l);
}
}
// keep decompressed data into cache
byte[] blockData = r.baos.toByteArray();
rbain = new ReusableByteArrayInputStream(blockData);
} catch (IOException e) {
compressAlgo.returnDecompressor(decompressor);
throw e;
}
}
@Test
public void testInternalErrorTranslation() throws Exception {
String codecErrorMsg = "codec failure";
CompressionInputStream mockCodecStream = mock(CompressionInputStream.class);
when(mockCodecStream.read(any(byte[].class), anyInt(), anyInt()))
.thenThrow(new InternalError(codecErrorMsg));
Decompressor mockDecoder = mock(Decompressor.class);
CompressionCodec mockCodec = mock(ConfigurableCodecForTest.class);
when(mockCodec.createDecompressor()).thenReturn(mockDecoder);
when(mockCodec.createInputStream(any(InputStream.class), any(Decompressor.class)))
.thenReturn(mockCodecStream);
byte[] header = new byte[] { (byte) 'T', (byte) 'I', (byte) 'F', (byte) 1};
try {
ShuffleUtils.shuffleToMemory(new byte[1024], new ByteArrayInputStream(header),
1024, 128, mockCodec, false, 0, mock(Logger.class), null);
Assert.fail("shuffle was supposed to throw!");
} catch (IOException e) {
Assert.assertTrue(e.getCause() instanceof InternalError);
Assert.assertTrue(e.getMessage().contains(codecErrorMsg));
}
}
public Decompressor getDecompressor() throws IOException {
CompressionCodec codec = getCodec();
if (codec != null) {
Decompressor decompressor = CodecPool.getDecompressor(codec);
if (decompressor != null) {
if (decompressor.finished()) {
// Somebody returns the decompressor to CodecPool but is still using
// it.
LOG.warn("Deompressor obtained from CodecPool already finished()");
} else {
LOG.debug("Got a decompressor: " + decompressor.hashCode());
}
/**
* Following statement is necessary to get around bugs in 0.18 where a
* decompressor is referenced after returned back to the codec pool.
*/
decompressor.reset();
}
return decompressor;
}
return null;
}
private ByteBuffer decompress(CompressionCodec compressor, InputStream cellBlockStream,
int osInitialSize) throws IOException {
// GZIPCodec fails w/ NPE if no configuration.
if (compressor instanceof Configurable) {
((Configurable) compressor).setConf(this.conf);
}
Decompressor poolDecompressor = CodecPool.getDecompressor(compressor);
CompressionInputStream cis = compressor.createInputStream(cellBlockStream, poolDecompressor);
ByteBufferOutputStream bbos;
try {
// TODO: This is ugly. The buffer will be resized on us if we guess wrong.
// TODO: Reuse buffers.
bbos = new ByteBufferOutputStream(osInitialSize);
IOUtils.copy(cis, bbos);
bbos.close();
return bbos.getByteBuffer();
} finally {
CodecPool.returnDecompressor(poolDecompressor);
}
}
public Decompressor getDecompressor() {
CompressionCodec codec = getCodec(conf);
if (codec != null) {
Decompressor decompressor = CodecPool.getDecompressor(codec);
if (LOG.isTraceEnabled()) LOG.trace("Retrieved decompressor " + decompressor + " from pool.");
if (decompressor != null) {
if (decompressor.finished()) {
// Somebody returns the decompressor to CodecPool but is still using it.
LOG.warn("Deompressor obtained from CodecPool is already finished()");
}
decompressor.reset();
}
return decompressor;
}
return null;
}
public DecompressorStream(InputStream in, Decompressor decompressor, int bufferSize) {
super(in);
if (in == null || decompressor == null) {
throw new NullPointerException();
} else if (bufferSize <= 0) {
throw new IllegalArgumentException("Illegal bufferSize");
}
this.decompressor = decompressor;
buffer = new byte[bufferSize];
}
private void verifyCompressedFile(Path f, int expectedNumLines)
throws IOException {
Configuration conf = new Configuration();
if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
}
FileSystem fs = FileSystem.get(conf);
InputStream is = fs.open(f);
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(f);
LOG.info("gzip check codec is " + codec);
Decompressor decompressor = CodecPool.getDecompressor(codec);
if (null == decompressor) {
LOG.info("Verifying gzip sanity with null decompressor");
} else {
LOG.info("Verifying gzip sanity with decompressor: "
+ decompressor.toString());
}
is = codec.createInputStream(is, decompressor);
BufferedReader r = new BufferedReader(new InputStreamReader(is));
int numLines = 0;
while (true) {
String ln = r.readLine();
if (ln == null) {
break;
}
numLines++;
}
r.close();
assertEquals("Did not read back correct number of lines",
expectedNumLines, numLines);
LOG.info("gzip sanity check returned " + numLines + " lines; ok.");
}
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
(String[])null);
LineRecordReader reader = new LineRecordReader(conf, split);
LongWritable key = new LongWritable();
Text value = new Text();
//noinspection StatementWithEmptyBody
while (reader.next(key, value)) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
// read the data and check whether BOM is skipped
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
LineRecordReader reader = new LineRecordReader();
reader.initialize(split, context);
//noinspection StatementWithEmptyBody
while (reader.nextKeyValue()) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
public DecompressorStream(InputStream in, Decompressor decompressor,
int bufferSize)
throws IOException {
super(in);
if (decompressor == null) {
throw new NullPointerException();
} else if (bufferSize <= 0) {
throw new IllegalArgumentException("Illegal bufferSize");
}
this.decompressor = decompressor;
buffer = new byte[bufferSize];
}
protected MrGeoRaster toNonWritable(byte[] val, CompressionCodec codec, Decompressor decompressor)
throws IOException
{
DataInputBuffer dib = new DataInputBuffer();
dib.reset(val, val.length);
RasterWritable rw = new RasterWritable();
rw.readFields(dib);
if (codec == null || decompressor == null)
{
return RasterWritable.toMrGeoRaster(rw);
}
return RasterWritable.toMrGeoRaster(rw, codec, decompressor);
}
@Override
public synchronized InputStream createDecompressionStream(
InputStream downStream, Decompressor decompressor,
int downStreamBufferSize) throws IOException {
// Set the internal buffer size to read from down stream.
if (downStreamBufferSize > 0) {
codec.getConf().setInt("io.file.buffer.size", downStreamBufferSize);
}
CompressionInputStream cis =
codec.createInputStream(downStream, decompressor);
BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
return bis2;
}
@Override
public synchronized InputStream createDecompressionStream(
InputStream downStream, Decompressor decompressor,
int downStreamBufferSize) throws IOException {
if (downStreamBufferSize > 0) {
return new BufferedInputStream(downStream, downStreamBufferSize);
}
return downStream;
}
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
(String[])null);
LineRecordReader reader = new LineRecordReader(conf, split);
LongWritable key = new LongWritable();
Text value = new Text();
//noinspection StatementWithEmptyBody
while (reader.next(key, value)) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
// read the data and check whether BOM is skipped
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
LineRecordReader reader = new LineRecordReader();
reader.initialize(split, context);
//noinspection StatementWithEmptyBody
while (reader.nextKeyValue()) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
@Override
public synchronized InputStream createDecompressionStream(
InputStream downStream, Decompressor decompressor,
int downStreamBufferSize) throws IOException {
if (downStreamBufferSize > 0) {
return new BufferedInputStream(downStream, downStreamBufferSize);
}
return downStream;
}
/**
* Get a {@link Decompressor} for the given {@link CompressionCodec} from the
* pool or a new one.
*
* @param codec
* the <code>CompressionCodec</code> for which to get the
* <code>Decompressor</code>
* @return <code>Decompressor</code> for the given
* <code>CompressionCodec</code> the pool or a new one
*/
public static Decompressor getDecompressor(CompressionCodec codec) {
Decompressor decompressor = borrow(DECOMPRESSOR_POOL, codec
.getDecompressorType());
if (decompressor == null) {
decompressor = codec.createDecompressor();
LOG.info("Got brand-new decompressor ["+codec.getDefaultExtension()+"]");
} else {
if(LOG.isDebugEnabled()) {
LOG.debug("Got recycled decompressor");
}
}
return decompressor;
}
@Override
public synchronized InputStream createDecompressionStream(
InputStream downStream, Decompressor decompressor,
int downStreamBufferSize) throws IOException {
// Set the internal buffer size to read from down stream.
if (downStreamBufferSize > 0) {
codec.getConf().setInt("io.file.buffer.size", downStreamBufferSize);
}
CompressionInputStream cis =
codec.createInputStream(downStream, decompressor);
BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
return bis2;
}