下面列出了org.apache.hadoop.mapreduce.lib.input.FileInputFormat#setMinInputSplitSize ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 1;
}
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraValidate");
job.setJarByClass(TeraValidate.class);
job.setMapperClass(ValidateMapper.class);
job.setReducerClass(ValidateReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// force a single reducer
job.setNumReduceTasks(1);
// force a single split
FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
job.setInputFormatClass(TeraInputFormat.class);
return job.waitForCompletion(true) ? 0 : 1;
}
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 1;
}
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraValidate");
job.setJarByClass(TeraValidate.class);
job.setMapperClass(ValidateMapper.class);
job.setReducerClass(ValidateReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// force a single reducer
job.setNumReduceTasks(1);
// force a single split
FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
job.setInputFormatClass(TeraInputFormat.class);
return job.waitForCompletion(true) ? 0 : 1;
}
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 1;
}
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraValidate");
job.setJarByClass(TeraValidate.class);
job.setMapperClass(ValidateMapper.class);
job.setReducerClass(ValidateReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// force a single reducer
job.setNumReduceTasks(1);
// force a single split
FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
job.setInputFormatClass(TeraInputFormat.class);
return job.waitForCompletion(true) ? 0 : 1;
}
/**
* Configures the Hadoop MapReduce job.
*
* @return Instance of the Hadoop MapRed job.
* @throws IOException If failed.
*/
@SuppressWarnings("deprecation")
private Job createConfigBasedHadoopJob() throws IOException {
Job jobCfg = new Job();
Configuration cfg = jobCfg.getConfiguration();
// Use explicit configuration of distributed file system, if provided.
cfg.addResource(U.resolveIgniteUrl(DFS_CFG));
jobCfg.setJobName("HadoopPopularWordExample");
jobCfg.setJarByClass(HadoopPopularWords.class);
jobCfg.setInputFormatClass(TextInputFormat.class);
jobCfg.setOutputKeyClass(Text.class);
jobCfg.setOutputValueClass(IntWritable.class);
jobCfg.setMapperClass(TokenizingMapper.class);
jobCfg.setReducerClass(TopNWordsReducer.class);
FileInputFormat.setInputPaths(jobCfg, BOOKS_DFS_DIR);
FileOutputFormat.setOutputPath(jobCfg, RESULT_DFS_DIR);
// Local job tracker allows the only task per wave, but text input format
// replaces it with the calculated value based on input split size option.
if ("local".equals(cfg.get("mapred.job.tracker", "local"))) {
// Split job into tasks using 32MB split size.
FileInputFormat.setMinInputSplitSize(jobCfg, 32L * 1024 * 1024);
FileInputFormat.setMaxInputSplitSize(jobCfg, Long.MAX_VALUE);
}
return jobCfg;
}
protected List<InputSplit> computeSplits(long desiredBundleSizeBytes) throws IOException, IllegalAccessException,
InstantiationException {
Job job = jobInstance();
FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes);
FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes);
return createFormat(job).getSplits(job);
}