类org.apache.hadoop.io.compress.CompressionCodec源码实例Demo

下面列出了怎么用org.apache.hadoop.io.compress.CompressionCodec的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: big-c   文件: FileOutputFormat.java

/**
 * Get the {@link CompressionCodec} for compressing the job outputs.
 * @param job the {@link Job} to look in
 * @param defaultValue the {@link CompressionCodec} to return if not set
 * @return the {@link CompressionCodec} to be used to compress the 
 *         job outputs
 * @throws IllegalArgumentException if the class was specified, but not found
 */
public static Class<? extends CompressionCodec> 
getOutputCompressorClass(JobContext job, 
                       Class<? extends CompressionCodec> defaultValue) {
  Class<? extends CompressionCodec> codecClass = defaultValue;
  Configuration conf = job.getConfiguration();
  String name = conf.get(FileOutputFormat.COMPRESS_CODEC);
  if (name != null) {
    try {
      codecClass = 
      	conf.getClassByName(name).asSubclass(CompressionCodec.class);
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Compression codec " + name + 
                                         " was not found.", e);
    }
  }
  return codecClass;
}
 
源代码2 项目: incubator-tez   文件: TezMerger.java

public static <K extends Object, V extends Object>
  TezRawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class keyClass, Class valueClass,
                          CompressionCodec codec,
                          List<Segment> segments,
                          int mergeFactor, int inMemSegments, Path tmpDir,
                          RawComparator comparator, Progressable reporter,
                          boolean sortSegments,
                          TezCounter readsCounter,
                          TezCounter writesCounter,
                          TezCounter bytesReadCounter,
                          Progress mergePhase)
    throws IOException {
  return new MergeQueue(conf, fs, segments, comparator, reporter,
                         sortSegments, codec, false).merge(keyClass, valueClass,
                                             mergeFactor, inMemSegments,
                                             tmpDir,
                                             readsCounter, writesCounter,
                                             bytesReadCounter,
                                             mergePhase);
}
 

@Test
 public void readBitcoinRawBlockInputFormatGzipCompressed() throws IOException, InterruptedException {
  Configuration conf = new Configuration(defaultConf);
   Job job = Job.getInstance(conf);
   CompressionCodec gzip = new GzipCodec();
   ReflectionUtils.setConf(gzip, conf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.gz";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for compressed block");
   	RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();	
BytesWritable block = new BytesWritable();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes");
   	assertFalse( reader.nextKeyValue(),"No further blocks in compressed block");
reader.close();
 }
 
源代码4 项目: big-c   文件: InMemoryMapOutput.java

public InMemoryMapOutput(Configuration conf, TaskAttemptID mapId,
                         MergeManagerImpl<K, V> merger,
                         int size, CompressionCodec codec,
                         boolean primaryMapOutput) {
  super(mapId, (long)size, primaryMapOutput);
  this.conf = conf;
  this.merger = merger;
  this.codec = codec;
  byteStream = new BoundedByteArrayOutputStream(size);
  memory = byteStream.getBuffer();
  if (codec != null) {
    decompressor = CodecPool.getDecompressor(codec);
  } else {
    decompressor = null;
  }
}
 

@Test
 public void readBitcoinRawBlockInputFormatBzip2Compressed() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
   Job job = Job.getInstance(conf);
   CompressionCodec bzip2 = new BZip2Codec();
   ReflectionUtils.setConf(bzip2, conf);
   ClassLoader classLoader = getClass().getClassLoader();
   String fileName="version4comp.blk.bz2";
   String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();	
   Path file = new Path(fileNameBlock);
   FileInputFormat.setInputPaths(job, file);
   BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
   List<InputSplit> splits = format.getSplits(job);
   TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
   assertEquals( 1, splits.size(),"Only one split generated for compressed block");
   	RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned  null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();	
BytesWritable block = new BytesWritable();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes");
   	assertFalse( reader.nextKeyValue(),"No further blocks in compressed block");
reader.close();
 }
 
源代码6 项目: hadoop-gpu   文件: SequenceFile.java

/**
 * Clones the attributes (like compression of the input file and creates a 
 * corresponding Writer
 * @param inputFile the path of the input file whose attributes should be 
 * cloned
 * @param outputFile the path of the output file 
 * @param prog the Progressable to report status during the file write
 * @return Writer
 * @throws IOException
 */
public Writer cloneFileAttributes(Path inputFile, Path outputFile, 
                                  Progressable prog) 
throws IOException {
  FileSystem srcFileSys = inputFile.getFileSystem(conf);
  Reader reader = new Reader(srcFileSys, inputFile, 4096, conf, true);
  boolean compress = reader.isCompressed();
  boolean blockCompress = reader.isBlockCompressed();
  CompressionCodec codec = reader.getCompressionCodec();
  reader.close();

  Writer writer = createWriter(outputFile.getFileSystem(conf), conf, 
                               outputFile, keyClass, valClass, compress, 
                               blockCompress, codec, prog,
                               new Metadata());
  return writer;
}
 
源代码7 项目: hiped2   文件: SequenceFileStoreFunc.java

@Override
public void setStoreLocation(String location, Job job)
    throws IOException {
  job.setOutputKeyClass(keyClass);
  job.setOutputValueClass(valueClass);
  if (compressionType != null && compressionCodecClass != null) {
    Class<? extends CompressionCodec> codecClass =
        FileOutputFormat.getOutputCompressorClass(job,
            DefaultCodec.class);
    SequenceFileOutputFormat.
        setOutputCompressorClass(job, codecClass);
    SequenceFileOutputFormat.setOutputCompressionType(job,
        SequenceFile.CompressionType.valueOf(compressionType));
  }
  FileOutputFormat.setOutputPath(job, new Path(location));
}
 
源代码8 项目: RDFS   文件: TestSequenceFile.java

private static void writeTest(FileSystem fs, int count, int seed, Path file, 
                              CompressionType compressionType, CompressionCodec codec)
  throws IOException {
  fs.delete(file, true);
  LOG.info("creating " + count + " records with " + compressionType +
           " compression");
  SequenceFile.Writer writer = 
    SequenceFile.createWriter(fs, conf, file, 
                              RandomDatum.class, RandomDatum.class, compressionType, codec);
  RandomDatum.Generator generator = new RandomDatum.Generator(seed);
  for (int i = 0; i < count; i++) {
    generator.next();
    RandomDatum key = generator.getKey();
    RandomDatum value = generator.getValue();

    writer.append(key, value);
  }
  writer.close();
}
 
源代码9 项目: hadoop-gpu   文件: Compression.java

public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        LOG.debug("Got a compressor: " + compressor.hashCode());
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
源代码10 项目: big-c   文件: Compression.java

public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Got a compressor: " + compressor.hashCode());
        }
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
源代码11 项目: incubator-tez   文件: TestIFile.java

private Writer writeTestFile(IFile.Writer writer, boolean rle, boolean repeatKeys,
    List<KVPair> data, CompressionCodec codec) throws IOException {
  assertNotNull(writer);

  Text previousKey = null;
  for (KVPair kvp : data) {
    if (repeatKeys && (previousKey != null && previousKey.compareTo(kvp.getKey()) == 0)) {
      //RLE is enabled in IFile when IFile.REPEAT_KEY is set
      writer.append(IFile.REPEAT_KEY, kvp.getvalue());
    } else {
      writer.append(kvp.getKey(), kvp.getvalue());
    }
    previousKey = kvp.getKey();
  }

  writer.close();

  LOG.info("Uncompressed: " + writer.getRawLength());
  LOG.info("CompressedSize: " + writer.getCompressedLength());

  return writer;
}
 
源代码12 项目: tajo   文件: TestInsertQuery.java

@Test
public final void testInsertOverwriteWithCompression() throws Exception {
  String tableName = IdentifierUtil.normalizeIdentifier("testInsertOverwriteWithCompression");
  ResultSet res = executeFile("testInsertOverwriteWithCompression_ddl.sql");
  res.close();

  CatalogService catalog = testingCluster.getMaster().getCatalog();
  assertTrue(catalog.existsTable(getCurrentDatabase(), tableName));

  res = executeQuery();
  res.close();
  TableDesc desc = catalog.getTableDesc(getCurrentDatabase(), tableName);
  if (!testingCluster.isHiveCatalogStoreRunning()) {
    assertEquals(2, desc.getStats().getNumRows().intValue());
  }

  FileSystem fs = FileSystem.get(testingCluster.getConfiguration());
  assertTrue(fs.exists(new Path(desc.getUri())));
  CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration());

  for (FileStatus file : fs.listStatus(new Path(desc.getUri()))) {
    CompressionCodec codec = factory.getCodec(file.getPath());
    assertTrue(codec instanceof DeflateCodec);
  }
  executeString("DROP TABLE " + tableName + " PURGE");
}
 
源代码13 项目: hadoop   文件: Compression.java

@Override
public synchronized boolean isSupported() {
  if (!checked) {
    checked = true;
    String extClazz =
        (conf.get(CONF_LZO_CLASS) == null ? System
            .getProperty(CONF_LZO_CLASS) : null);
    String clazz = (extClazz != null) ? extClazz : defaultClazz;
    try {
      LOG.info("Trying to load Lzo codec class: " + clazz);
      codec =
          (CompressionCodec) ReflectionUtils.newInstance(Class
              .forName(clazz), conf);
    } catch (ClassNotFoundException e) {
      // that is okay
    }
  }
  return codec != null;
}
 
源代码14 项目: tez   文件: IFile.java

public Writer(Configuration conf, FSDataOutputStream outputStream,
    Class keyClass, Class valueClass,
    CompressionCodec codec, TezCounter writesCounter, TezCounter serializedBytesCounter,
    boolean rle) throws IOException {
  this.rawOut = outputStream;
  this.writtenRecordsCounter = writesCounter;
  this.serializedUncompressedBytes = serializedBytesCounter;
  this.start = this.rawOut.getPos();
  this.rle = rle;

  setupOutputStream(codec);

  writeHeader(outputStream);

  if (keyClass != null) {
    this.closeSerializers = true;
    SerializationFactory serializationFactory =
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  } else {
    this.closeSerializers = false;
  }
}
 
源代码15 项目: hadoop-gpu   文件: SequenceFile.java

/**
 * Construct the preferred type of 'raw' SequenceFile Writer.
 * @param out The stream on top which the writer is to be constructed.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param compress Compress data?
 * @param blockCompress Compress blocks?
 * @param metadata The metadata of the file.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
private static Writer
  createWriter(Configuration conf, FSDataOutputStream out, 
               Class keyClass, Class valClass, boolean compress, boolean blockCompress,
               CompressionCodec codec, Metadata metadata)
  throws IOException {
  if (codec != null && (codec instanceof GzipCodec) && 
      !NativeCodeLoader.isNativeCodeLoaded() && 
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  Writer writer = null;

  if (!compress) {
    writer = new Writer(conf, out, keyClass, valClass, metadata);
  } else if (compress && !blockCompress) {
    writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  } else {
    writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata);
  }
  
  return writer;
}
 
源代码16 项目: spliceengine   文件: WholeTextInputFormat.java

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (currentPath>=split.getNumPaths()) {
        return false;
    }

    Path path = split.getPath(currentPath);
    currentPath++;

    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(path);
    key = path.toString();
    FSDataInputStream fileIn = fs.open(path);

    value = codec!=null?codec.createInputStream(fileIn):fileIn;
    return true;
}
 
源代码17 项目: hadoop-gpu   文件: SequenceFile.java

/** Create the named file with write-progress reporter. */
public BlockCompressWriter(FileSystem fs, Configuration conf, Path name,
                           Class keyClass, Class valClass,
                           int bufferSize, short replication, long blockSize,
                           CompressionCodec codec,
                           Progressable progress, Metadata metadata)
  throws IOException {
  super.init(name, conf,
             fs.create(name, true, bufferSize, replication, blockSize, progress),
             keyClass, valClass, true, codec, metadata);
  init(conf.getInt("io.seqfile.compress.blocksize", 1000000));

  initializeFileHeader();
  writeFileHeader();
  finalizeFileHeader();
}
 
源代码18 项目: hadoop   文件: Merger.java

public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass,
                          CompressionCodec codec,
                          List<Segment<K, V>> segments,
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          boolean sortSegments,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase,
                          TaskType taskType)
    throws IOException {
  return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
                         sortSegments, codec,
                         taskType).merge(keyClass, valueClass,
                                             mergeFactor, tmpDir,
                                             readsCounter, writesCounter,
                                             mergePhase);
}
 
源代码19 项目: hadoop   文件: CompressionEmulationUtil.java

/**
 * Returns a {@link OutputStream} for a file that might need 
 * compression.
 */
static OutputStream getPossiblyCompressedOutputStream(Path file, 
                                                      Configuration conf)
throws IOException {
  FileSystem fs = file.getFileSystem(conf);
  JobConf jConf = new JobConf(conf);
  if (org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(jConf)) {
    // get the codec class
    Class<? extends CompressionCodec> codecClass =
      org.apache.hadoop.mapred.FileOutputFormat
                              .getOutputCompressorClass(jConf, 
                                                        GzipCodec.class);
    // get the codec implementation
    CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);

    // add the appropriate extension
    file = file.suffix(codec.getDefaultExtension());

    if (isCompressionEmulationEnabled(conf)) {
      FSDataOutputStream fileOut = fs.create(file, false);
      return new DataOutputStream(codec.createOutputStream(fileOut));
    }
  }
  return fs.create(file, false);
}
 
源代码20 项目: incubator-tez   文件: ConfigUtils.java

public static Class<? extends CompressionCodec> getIntermediateOutputCompressorClass(
    Configuration conf, Class<DefaultCodec> defaultValue) {
  Class<? extends CompressionCodec> codecClass = defaultValue;
  String name = conf
      .get(TezJobConfig.TEZ_RUNTIME_COMPRESS_CODEC);
  if (name != null) {
    try {
      codecClass = conf.getClassByName(name).asSubclass(
          CompressionCodec.class);
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Compression codec " + name
          + " was not found.", e);
    }
  }
  return codecClass;
}
 
源代码21 项目: big-c   文件: TestDFSIO.java

@Override // Mapper
public void configure(JobConf conf) {
  super.configure(conf);

  // grab compression
  String compression = getConf().get("test.io.compression.class", null);
  Class<? extends CompressionCodec> codec;

  // try to initialize codec
  try {
    codec = (compression == null) ? null : 
      Class.forName(compression).asSubclass(CompressionCodec.class);
  } catch(Exception e) {
    throw new RuntimeException("Compression codec not found: ", e);
  }

  if(codec != null) {
    compressionCodec = (CompressionCodec)
        ReflectionUtils.newInstance(codec, getConf());
  }
}
 
源代码22 项目: RDFS   文件: FileOutputFormat.java

/**
 * Get the {@link CompressionCodec} for compressing the job outputs.
 * @param job the {@link Job} to look in
 * @param defaultValue the {@link CompressionCodec} to return if not set
 * @return the {@link CompressionCodec} to be used to compress the 
 *         job outputs
 * @throws IllegalArgumentException if the class was specified, but not found
 */
public static Class<? extends CompressionCodec> 
getOutputCompressorClass(JobContext job, 
                       Class<? extends CompressionCodec> defaultValue) {
  Class<? extends CompressionCodec> codecClass = defaultValue;
  Configuration conf = job.getConfiguration();
  String name = conf.get("mapred.output.compression.codec");
  if (name != null) {
    try {
      codecClass = 
      	conf.getClassByName(name).asSubclass(CompressionCodec.class);
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Compression codec " + name + 
                                         " was not found.", e);
    }
  }
  return codecClass;
}
 
源代码23 项目: hadoop   文件: Merger.java

public static <K extends Object, V extends Object>
RawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class<K> keyClass, Class<V> valueClass, 
                          CompressionCodec codec,
                          Path[] inputs, boolean deleteInputs, 
                          int mergeFactor, Path tmpDir,
                          RawComparator<K> comparator, Progressable reporter,
                          Counters.Counter readsCounter,
                          Counters.Counter writesCounter,
                          Progress mergePhase)
throws IOException {
  return 
    new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator, 
                         reporter, null,
                         TaskType.REDUCE).merge(keyClass, valueClass,
                                         mergeFactor, tmpDir,
                                         readsCounter, writesCounter, 
                                         mergePhase);
}
 

private static List<String> readLines(Path location, Configuration conf) throws Exception {
    FileSystem fileSystem = FileSystem.get(location.toUri(), conf);
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    FileStatus[] items = fileSystem.listStatus(location);
    if (items == null)
        return new ArrayList<String>();
    List<String> results = new ArrayList<String>();
    for (FileStatus item : items) {

        // ignoring files like _SUCCESS
        if (item.getPath().getName().startsWith("_")) {
            continue;
        }

        CompressionCodec codec = factory.getCodec(item.getPath());
        InputStream stream = null;

        // check if we have a compression codec we need to use
        if (codec != null) {
            stream = codec.createInputStream(fileSystem.open(item.getPath()));
        } else {
            stream = fileSystem.open(item.getPath());
        }

        StringWriter writer = new StringWriter();
        IOUtils.copy(stream, writer, "UTF-8");
        String raw = writer.toString();
        for (String str : raw.split("\n")) {
            results.add(str);
        }
    }
    return results;
}
 
源代码25 项目: hbase   文件: Compression.java

@Override
CompressionCodec getCodec(Configuration conf) {
  if (bzipCodec == null) {
    synchronized (lock) {
      if (bzipCodec == null) {
        bzipCodec = buildCodec(conf);
      }
    }
  }
  return bzipCodec;
}
 
源代码26 项目: RDFS   文件: SequenceFile.java

/**
 * Construct the preferred type of SequenceFile Writer.
 * @param fs The configured filesystem.
 * @param conf The configuration.
 * @param name The name of the file.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param bufferSize buffer size for the underlaying outputstream.
 * @param replication replication factor for the file.
 * @param blockSize block size for the file.
 * @param createParent create parent directory if non-existent
 * @param compressionType The compression type.
 * @param codec The compression codec.
 * @param progress The Progressable object to track progress.
 * @param metadata The metadata of the file.
 * @param forceSync set the forceSync flag for this file
 * @param doParallelWrites write replicas in parallel
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer
  createWriter(FileSystem fs, Configuration conf, Path name,
               Class keyClass, Class valClass, int bufferSize,
               short replication, long blockSize, boolean createParent,
               CompressionType compressionType, CompressionCodec codec,
               Metadata metadata, boolean forceSync,
               boolean doParallelWrites) throws IOException {
  if ((codec instanceof GzipCodec) &&
      !NativeCodeLoader.isNativeCodeLoaded() &&
      !ZlibFactory.isNativeZlibLoaded(conf)) {
    throw new IllegalArgumentException("SequenceFile doesn't work with " +
                                       "GzipCodec without native-hadoop code!");
  }

  switch (compressionType) {
  case NONE:
    return new Writer(conf,
        fs.createNonRecursive(name, FsPermission.getDefault(),true,
            bufferSize, replication, blockSize, null,forceSync, doParallelWrites),
        keyClass, valClass, metadata).ownStream();
  case RECORD:
    return new RecordCompressWriter(conf,
        fs.createNonRecursive(name, FsPermission.getDefault(), true,
            bufferSize, replication, blockSize, null,forceSync, doParallelWrites),
        keyClass, valClass, codec, metadata).ownStream();
  case BLOCK:
    return new BlockCompressWriter(conf,
        fs.createNonRecursive(name, FsPermission.getDefault(), true,
            bufferSize, replication, blockSize, null, forceSync, doParallelWrites),
        keyClass, valClass, codec, metadata).ownStream();
  default:
    return null;
  }
}
 
源代码27 项目: Hadoop-BAM   文件: QseqInputFormat.java

public QseqRecordReader(Configuration conf, FileSplit split) throws IOException
{
	setConf(conf);
	file = split.getPath();
	start = split.getStart();
	end = start + split.getLength();

	FileSystem fs = file.getFileSystem(conf);
	FSDataInputStream fileIn = fs.open(file);

	CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
	CompressionCodec        codec        = codecFactory.getCodec(file);

	if (codec == null) // no codec.  Uncompressed file.
	{
		positionAtFirstRecord(fileIn);
		inputStream = fileIn;
	}
	else
	{ // compressed file
		if (start != 0)
			throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");

		inputStream = codec.createInputStream(fileIn);
		end = Long.MAX_VALUE; // read until the end of the file
	}

	lineReader = new LineReader(inputStream);
}
 
源代码28 项目: tajo   文件: TestInsertQuery.java

@Test
public final void testInsertOverwritePathWithNonFromQuery() throws Exception {
  ResultSet res = executeString("insert overwrite into location " +
      "'/tajo-data/testInsertOverwritePathWithNonFromQuery' " +
      "USING text WITH ('text.delimiter'='|','compression.codec'='org.apache.hadoop.io.compress.DeflateCodec') " +
      "select 1::INT4, 2.1::FLOAT4, 'test'");

  res.close();
  FileSystem fs = FileSystem.get(testingCluster.getConfiguration());
  Path path = new Path("/tajo-data/testInsertOverwritePathWithNonFromQuery");
  assertTrue(fs.exists(path));
  assertEquals(1, fs.listStatus(path).length);

  CompressionCodecFactory factory = new CompressionCodecFactory(testingCluster.getConfiguration());
  FileStatus file = fs.listStatus(path)[0];
  CompressionCodec codec = factory.getCodec(file.getPath());
  assertTrue(codec instanceof DeflateCodec);

  try (BufferedReader reader = new BufferedReader(
          new InputStreamReader(codec.createInputStream(fs.open(file.getPath()))))) {
    String line = reader.readLine();
    assertNotNull(line);

    String[] tokens = line.split("\\|");

    assertEquals(3, tokens.length);
    assertEquals("1", tokens[0]);
    assertEquals("2.1", tokens[1]);
    assertEquals("test", tokens[2]);
  }
}
 

@Override
public SequenceFileWriter<K, V> create(FSDataOutputStream out) throws IOException {
	org.apache.hadoop.fs.FSDataOutputStream stream = new org.apache.hadoop.fs.FSDataOutputStream(out, null);
	CompressionCodec compressionCodec = getCompressionCodec(serializableHadoopConfig.get(), compressionCodecName);
	SequenceFile.Writer writer = SequenceFile.createWriter(
		serializableHadoopConfig.get(),
		SequenceFile.Writer.stream(stream),
		SequenceFile.Writer.keyClass(keyClass),
		SequenceFile.Writer.valueClass(valueClass),
		SequenceFile.Writer.compression(compressionType, compressionCodec));
	return new SequenceFileWriter<>(writer);
}
 
源代码30 项目: hbase   文件: NettyRpcDuplexHandler.java

public NettyRpcDuplexHandler(NettyRpcConnection conn, CellBlockBuilder cellBlockBuilder,
    Codec codec, CompressionCodec compressor) {
  this.conn = conn;
  this.cellBlockBuilder = cellBlockBuilder;
  this.codec = codec;
  this.compressor = compressor;

}
 
 类所在包
 同包方法