下面列出了怎么用org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo的API类实例代码及写法,或者点击链接到github查看源代码。
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir,
Configuration conf, FileSystem fs, T[] splits)
throws IOException, InterruptedException {
FSDataOutputStream out = createFile(fs,
JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
out.close();
writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir),
new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
info);
}
public static void createSplitFiles(Path jobSubmitDir,
Configuration conf, FileSystem fs,
org.apache.hadoop.mapred.InputSplit[] splits)
throws IOException {
FSDataOutputStream out = createFile(fs,
JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
out.close();
writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir),
new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
info);
}
@SuppressWarnings("unchecked")
private static <T extends InputSplit>
SplitMetaInfo[] writeNewSplits(Configuration conf,
T[] array, FSDataOutputStream out)
throws IOException, InterruptedException {
SplitMetaInfo[] info = new SplitMetaInfo[array.length];
if (array.length != 0) {
SerializationFactory factory = new SerializationFactory(conf);
int i = 0;
int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
long offset = out.getPos();
for(T split: array) {
long prevCount = out.getPos();
Text.writeString(out, split.getClass().getName());
Serializer<T> serializer =
factory.getSerializer((Class<T>) split.getClass());
serializer.open(out);
serializer.serialize(split);
long currCount = out.getPos();
String[] locations = split.getLocations();
if (locations.length > maxBlockLocations) {
LOG.warn("Max block location exceeded for split: "
+ split + " splitsize: " + locations.length +
" maxsize: " + maxBlockLocations);
locations = Arrays.copyOf(locations, maxBlockLocations);
}
info[i++] =
new JobSplit.SplitMetaInfo(
locations, offset,
split.getLength());
offset += currCount - prevCount;
}
}
return info;
}
private static SplitMetaInfo[] writeOldSplits(
org.apache.hadoop.mapred.InputSplit[] splits,
FSDataOutputStream out, Configuration conf) throws IOException {
SplitMetaInfo[] info = new SplitMetaInfo[splits.length];
if (splits.length != 0) {
int i = 0;
long offset = out.getPos();
int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
for(org.apache.hadoop.mapred.InputSplit split: splits) {
long prevLen = out.getPos();
Text.writeString(out, split.getClass().getName());
split.write(out);
long currLen = out.getPos();
String[] locations = split.getLocations();
if (locations.length > maxBlockLocations) {
LOG.warn("Max block location exceeded for split: "
+ split + " splitsize: " + locations.length +
" maxsize: " + maxBlockLocations);
locations = Arrays.copyOf(locations,maxBlockLocations);
}
info[i++] = new JobSplit.SplitMetaInfo(
locations, offset,
split.getLength());
offset += currLen - prevLen;
}
}
return info;
}
private static void writeJobSplitMetaInfo(FileSystem fs, Path filename,
FsPermission p, int splitMetaInfoVersion,
JobSplit.SplitMetaInfo[] allSplitMetaInfo)
throws IOException {
// write the splits meta-info to a file for the job tracker
FSDataOutputStream out =
FileSystem.create(fs, filename, p);
out.write(JobSplit.META_SPLIT_FILE_HEADER);
WritableUtils.writeVInt(out, splitMetaInfoVersion);
WritableUtils.writeVInt(out, allSplitMetaInfo.length);
for (JobSplit.SplitMetaInfo splitMetaInfo : allSplitMetaInfo) {
splitMetaInfo.write(out);
}
out.close();
}
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir,
Configuration conf, FileSystem fs, T[] splits)
throws IOException, InterruptedException {
FSDataOutputStream out = createFile(fs,
JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
out.close();
writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir),
new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
info);
}
public static void createSplitFiles(Path jobSubmitDir,
Configuration conf, FileSystem fs,
org.apache.hadoop.mapred.InputSplit[] splits)
throws IOException {
FSDataOutputStream out = createFile(fs,
JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
out.close();
writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir),
new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
info);
}
@SuppressWarnings("unchecked")
private static <T extends InputSplit>
SplitMetaInfo[] writeNewSplits(Configuration conf,
T[] array, FSDataOutputStream out)
throws IOException, InterruptedException {
SplitMetaInfo[] info = new SplitMetaInfo[array.length];
if (array.length != 0) {
SerializationFactory factory = new SerializationFactory(conf);
int i = 0;
int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
long offset = out.getPos();
for(T split: array) {
long prevCount = out.getPos();
Text.writeString(out, split.getClass().getName());
Serializer<T> serializer =
factory.getSerializer((Class<T>) split.getClass());
serializer.open(out);
serializer.serialize(split);
long currCount = out.getPos();
String[] locations = split.getLocations();
if (locations.length > maxBlockLocations) {
LOG.warn("Max block location exceeded for split: "
+ split + " splitsize: " + locations.length +
" maxsize: " + maxBlockLocations);
locations = Arrays.copyOf(locations, maxBlockLocations);
}
info[i++] =
new JobSplit.SplitMetaInfo(
locations, offset,
split.getLength());
offset += currCount - prevCount;
}
}
return info;
}
private static SplitMetaInfo[] writeOldSplits(
org.apache.hadoop.mapred.InputSplit[] splits,
FSDataOutputStream out, Configuration conf) throws IOException {
SplitMetaInfo[] info = new SplitMetaInfo[splits.length];
if (splits.length != 0) {
int i = 0;
long offset = out.getPos();
int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
for(org.apache.hadoop.mapred.InputSplit split: splits) {
long prevLen = out.getPos();
Text.writeString(out, split.getClass().getName());
split.write(out);
long currLen = out.getPos();
String[] locations = split.getLocations();
if (locations.length > maxBlockLocations) {
LOG.warn("Max block location exceeded for split: "
+ split + " splitsize: " + locations.length +
" maxsize: " + maxBlockLocations);
locations = Arrays.copyOf(locations,maxBlockLocations);
}
info[i++] = new JobSplit.SplitMetaInfo(
locations, offset,
split.getLength());
offset += currLen - prevLen;
}
}
return info;
}
private static void writeJobSplitMetaInfo(FileSystem fs, Path filename,
FsPermission p, int splitMetaInfoVersion,
JobSplit.SplitMetaInfo[] allSplitMetaInfo)
throws IOException {
// write the splits meta-info to a file for the job tracker
FSDataOutputStream out =
FileSystem.create(fs, filename, p);
out.write(JobSplit.META_SPLIT_FILE_HEADER);
WritableUtils.writeVInt(out, splitMetaInfoVersion);
WritableUtils.writeVInt(out, allSplitMetaInfo.length);
for (JobSplit.SplitMetaInfo splitMetaInfo : allSplitMetaInfo) {
splitMetaInfo.write(out);
}
out.close();
}