org.apache.hadoop.mapreduce.lib.input.FileInputFormat#setMinInputSplitSize ( )源码实例Demo

下面列出了org.apache.hadoop.mapreduce.lib.input.FileInputFormat#setMinInputSplitSize ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: hadoop   文件: TeraValidate.java
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 1;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
源代码2 项目: big-c   文件: TeraValidate.java
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 1;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
源代码3 项目: incubator-tez   文件: TeraValidate.java
public int run(String[] args) throws Exception {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 1;
  }
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraValidate");
  job.setJarByClass(TeraValidate.class);
  job.setMapperClass(ValidateMapper.class);
  job.setReducerClass(ValidateReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  // force a single reducer
  job.setNumReduceTasks(1);
  // force a single split 
  FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
  job.setInputFormatClass(TeraInputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}
 
源代码4 项目: ignite   文件: HadoopPopularWords.java
/**
 * Configures the Hadoop MapReduce job.
 *
 * @return Instance of the Hadoop MapRed job.
 * @throws IOException If failed.
 */
@SuppressWarnings("deprecation")
private Job createConfigBasedHadoopJob() throws IOException {
    Job jobCfg = new Job();

    Configuration cfg = jobCfg.getConfiguration();

    // Use explicit configuration of distributed file system, if provided.
    cfg.addResource(U.resolveIgniteUrl(DFS_CFG));

    jobCfg.setJobName("HadoopPopularWordExample");
    jobCfg.setJarByClass(HadoopPopularWords.class);
    jobCfg.setInputFormatClass(TextInputFormat.class);
    jobCfg.setOutputKeyClass(Text.class);
    jobCfg.setOutputValueClass(IntWritable.class);
    jobCfg.setMapperClass(TokenizingMapper.class);
    jobCfg.setReducerClass(TopNWordsReducer.class);

    FileInputFormat.setInputPaths(jobCfg, BOOKS_DFS_DIR);
    FileOutputFormat.setOutputPath(jobCfg, RESULT_DFS_DIR);

    // Local job tracker allows the only task per wave, but text input format
    // replaces it with the calculated value based on input split size option.
    if ("local".equals(cfg.get("mapred.job.tracker", "local"))) {
        // Split job into tasks using 32MB split size.
        FileInputFormat.setMinInputSplitSize(jobCfg, 32L * 1024 * 1024);
        FileInputFormat.setMaxInputSplitSize(jobCfg, Long.MAX_VALUE);
    }

    return jobCfg;
}
 
源代码5 项目: components   文件: ConfigurableHDFSFileSource.java
protected List<InputSplit> computeSplits(long desiredBundleSizeBytes) throws IOException, IllegalAccessException,
        InstantiationException {
    Job job = jobInstance();
    FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes);
    FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes);
    return createFormat(job).getSplits(job);
}