org.apache.hadoop.mapred.InputSplit#getLength ( )源码实例Demo

下面列出了org.apache.hadoop.mapred.InputSplit#getLength ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: pxf   文件: HdfsDataFragmenter.java
protected List<InputSplit> getSplits(Path path) throws IOException {
    PxfInputFormat pxfInputFormat = new PxfInputFormat();
    PxfInputFormat.setInputPaths(jobConf, path);
    InputSplit[] splits = pxfInputFormat.getSplits(jobConf, 1);
    List<InputSplit> result = new ArrayList<>();

    /*
     * HD-2547: If the file is empty, an empty split is returned: no
     * locations and no length.
     */
    if (splits != null) {
        for (InputSplit split : splits) {
            if (split.getLength() > 0) {
                result.add(split);
            }
        }
    }

    return result;
}
 
源代码2 项目: pxf   文件: HdfsDataFragmenter.java
@Override
public FragmentStats getFragmentStats() throws Exception {
    String absoluteDataPath = hcfsType.getDataUri(jobConf, context);
    List<InputSplit> splits = getSplits(new Path(absoluteDataPath));

    if (splits.isEmpty()) {
        return new FragmentStats(0, 0, 0);
    }
    long totalSize = 0;
    for (InputSplit split : splits) {
        totalSize += split.getLength();
    }
    InputSplit firstSplit = splits.get(0);
    return new FragmentStats(splits.size(), firstSplit.getLength(), totalSize);
}
 
源代码3 项目: hadoop   文件: CompositeInputSplit.java
/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}
 
源代码4 项目: big-c   文件: CompositeInputSplit.java
/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}
 
源代码5 项目: parquet-mr   文件: DeprecatedParquetInputFormat.java
public RecordReaderWrapper(
    InputSplit oldSplit, JobConf oldJobConf, Reporter reporter)
    throws IOException {
  splitLen = oldSplit.getLength();

  try {
    realReader = new ParquetRecordReader<V>(
        ParquetInputFormat.<V>getReadSupportInstance(oldJobConf),
        ParquetInputFormat.getFilter(oldJobConf));

    if (oldSplit instanceof ParquetInputSplitWrapper) {
      realReader.initialize(((ParquetInputSplitWrapper) oldSplit).realSplit, oldJobConf, reporter);
    } else if (oldSplit instanceof FileSplit) {
      realReader.initialize((FileSplit) oldSplit, oldJobConf, reporter);
    } else {
      throw new IllegalArgumentException(
          "Invalid split (not a FileSplit or ParquetInputSplitWrapper): " + oldSplit);
    }

    // read once to gain access to key and value objects
    if (realReader.nextKeyValue()) {
      firstRecord = true;
      valueContainer = new Container<V>();
      valueContainer.set(realReader.getCurrentValue());

    } else {
      eof = true;
    }
  } catch (InterruptedException e) {
    Thread.interrupted();
    throw new IOException(e);
  }
}
 
public TextRecordReaderWrapper(ParquetInputFormat<SimpleGroup> newInputFormat,
                           InputSplit oldSplit,
                           JobConf oldJobConf,
                           Reporter reporter) throws IOException {

    splitLen = oldSplit.getLength();

    try {
        ReadSupport<SimpleGroup> rs = ParquetInputFormat.getReadSupportInstance(oldJobConf);
        realReader = new ParquetRecordReader<>(rs);
        realReader.initialize(((StreamingParquetInputSplitWrapper)oldSplit).realSplit, oldJobConf, reporter);

        oldJobConf.set("map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString());
        oldJobConf.set("mapreduce.map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString());

        // read once to gain access to key and value objects
        if (realReader.nextKeyValue()) {

          firstRecord = true;
          valueContainer = new Container<>();
          SimpleGroup v = realReader.getCurrentValue();
          valueContainer.set(v);
          ls = groupToStrings(v);
        } else {

          eof = true;
        }
    } catch (InterruptedException e) {
        Thread.interrupted();
        throw new IOException(e);
    }
}
 
源代码7 项目: RDFS   文件: CompositeInputSplit.java
/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}
 
源代码8 项目: incubator-tez   文件: TezGroupedSplit.java
public void addSplit(InputSplit split) {
  wrappedSplits.add(split);
  try {
    length += split.getLength();
  } catch (Exception e) {
    throw new TezUncheckedException(e);
  }
}
 
源代码9 项目: Hive-XML-SerDe   文件: SplittableXmlInputFormat.java
@Override
public RecordReader<LongWritable, Text> getRecordReader(InputSplit inputSplit, JobConf job, Reporter reporter) throws IOException {

    InputStream inputStream = null;
    try {
        inputStream = getInputStream(job, (FileSplit) inputSplit);
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }
    long start = ((FileSplit) inputSplit).getStart();
    long end = start + inputSplit.getLength();

    return new HiveXmlRecordReader(job, inputStream, start, end);
}
 
源代码10 项目: tez   文件: TezGroupedSplit.java
public void addSplit(InputSplit split) {
  wrappedSplits.add(split);
  try {
    length += split.getLength();
  } catch (Exception e) {
    throw new TezUncheckedException(e);
  }
}
 
源代码11 项目: hadoop-gpu   文件: CompositeInputSplit.java
/**
 * Add an InputSplit to this collection.
 * @throws IOException If capacity was not specified during construction
 *                     or if capacity has been reached.
 */
public void add(InputSplit s) throws IOException {
  if (null == splits) {
    throw new IOException("Uninitialized InputSplit");
  }
  if (fill == splits.length) {
    throw new IOException("Too many splits");
  }
  splits[fill++] = s;
  totsize += s.getLength();
}