下面列出了org.apache.hadoop.mapred.InputSplit#getLength ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
protected List<InputSplit> getSplits(Path path) throws IOException {
PxfInputFormat pxfInputFormat = new PxfInputFormat();
PxfInputFormat.setInputPaths(jobConf, path);
InputSplit[] splits = pxfInputFormat.getSplits(jobConf, 1);
List<InputSplit> result = new ArrayList<>();
/*
* HD-2547: If the file is empty, an empty split is returned: no
* locations and no length.
*/
if (splits != null) {
for (InputSplit split : splits) {
if (split.getLength() > 0) {
result.add(split);
}
}
}
return result;
}
@Override
public FragmentStats getFragmentStats() throws Exception {
String absoluteDataPath = hcfsType.getDataUri(jobConf, context);
List<InputSplit> splits = getSplits(new Path(absoluteDataPath));
if (splits.isEmpty()) {
return new FragmentStats(0, 0, 0);
}
long totalSize = 0;
for (InputSplit split : splits) {
totalSize += split.getLength();
}
InputSplit firstSplit = splits.get(0);
return new FragmentStats(splits.size(), firstSplit.getLength(), totalSize);
}
/**
* Add an InputSplit to this collection.
* @throws IOException If capacity was not specified during construction
* or if capacity has been reached.
*/
public void add(InputSplit s) throws IOException {
if (null == splits) {
throw new IOException("Uninitialized InputSplit");
}
if (fill == splits.length) {
throw new IOException("Too many splits");
}
splits[fill++] = s;
totsize += s.getLength();
}
/**
* Add an InputSplit to this collection.
* @throws IOException If capacity was not specified during construction
* or if capacity has been reached.
*/
public void add(InputSplit s) throws IOException {
if (null == splits) {
throw new IOException("Uninitialized InputSplit");
}
if (fill == splits.length) {
throw new IOException("Too many splits");
}
splits[fill++] = s;
totsize += s.getLength();
}
public RecordReaderWrapper(
InputSplit oldSplit, JobConf oldJobConf, Reporter reporter)
throws IOException {
splitLen = oldSplit.getLength();
try {
realReader = new ParquetRecordReader<V>(
ParquetInputFormat.<V>getReadSupportInstance(oldJobConf),
ParquetInputFormat.getFilter(oldJobConf));
if (oldSplit instanceof ParquetInputSplitWrapper) {
realReader.initialize(((ParquetInputSplitWrapper) oldSplit).realSplit, oldJobConf, reporter);
} else if (oldSplit instanceof FileSplit) {
realReader.initialize((FileSplit) oldSplit, oldJobConf, reporter);
} else {
throw new IllegalArgumentException(
"Invalid split (not a FileSplit or ParquetInputSplitWrapper): " + oldSplit);
}
// read once to gain access to key and value objects
if (realReader.nextKeyValue()) {
firstRecord = true;
valueContainer = new Container<V>();
valueContainer.set(realReader.getCurrentValue());
} else {
eof = true;
}
} catch (InterruptedException e) {
Thread.interrupted();
throw new IOException(e);
}
}
public TextRecordReaderWrapper(ParquetInputFormat<SimpleGroup> newInputFormat,
InputSplit oldSplit,
JobConf oldJobConf,
Reporter reporter) throws IOException {
splitLen = oldSplit.getLength();
try {
ReadSupport<SimpleGroup> rs = ParquetInputFormat.getReadSupportInstance(oldJobConf);
realReader = new ParquetRecordReader<>(rs);
realReader.initialize(((StreamingParquetInputSplitWrapper)oldSplit).realSplit, oldJobConf, reporter);
oldJobConf.set("map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString());
oldJobConf.set("mapreduce.map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString());
// read once to gain access to key and value objects
if (realReader.nextKeyValue()) {
firstRecord = true;
valueContainer = new Container<>();
SimpleGroup v = realReader.getCurrentValue();
valueContainer.set(v);
ls = groupToStrings(v);
} else {
eof = true;
}
} catch (InterruptedException e) {
Thread.interrupted();
throw new IOException(e);
}
}
/**
* Add an InputSplit to this collection.
* @throws IOException If capacity was not specified during construction
* or if capacity has been reached.
*/
public void add(InputSplit s) throws IOException {
if (null == splits) {
throw new IOException("Uninitialized InputSplit");
}
if (fill == splits.length) {
throw new IOException("Too many splits");
}
splits[fill++] = s;
totsize += s.getLength();
}
public void addSplit(InputSplit split) {
wrappedSplits.add(split);
try {
length += split.getLength();
} catch (Exception e) {
throw new TezUncheckedException(e);
}
}
@Override
public RecordReader<LongWritable, Text> getRecordReader(InputSplit inputSplit, JobConf job, Reporter reporter) throws IOException {
InputStream inputStream = null;
try {
inputStream = getInputStream(job, (FileSplit) inputSplit);
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
long start = ((FileSplit) inputSplit).getStart();
long end = start + inputSplit.getLength();
return new HiveXmlRecordReader(job, inputStream, start, end);
}
public void addSplit(InputSplit split) {
wrappedSplits.add(split);
try {
length += split.getLength();
} catch (Exception e) {
throw new TezUncheckedException(e);
}
}
/**
* Add an InputSplit to this collection.
* @throws IOException If capacity was not specified during construction
* or if capacity has been reached.
*/
public void add(InputSplit s) throws IOException {
if (null == splits) {
throw new IOException("Uninitialized InputSplit");
}
if (fill == splits.length) {
throw new IOException("Too many splits");
}
splits[fill++] = s;
totsize += s.getLength();
}