org.apache.hadoop.io.serializer.JavaSerializationComparator#org.apache.hadoop.mapreduce.lib.output.FileOutputFormat源码实例Demo

下面列出了org.apache.hadoop.io.serializer.JavaSerializationComparator#org.apache.hadoop.mapreduce.lib.output.FileOutputFormat 实例代码，或者点击链接到github查看源代码，也可以在右侧发表评论。

源代码1 项目： hiped2 文件： Main.java

public static void runSortJob(Configuration conf, Path input, Path outputPath)
    throws Exception {

  Job job = new Job(conf);
  job.setJarByClass(Main.class);
  job.setMapperClass(SortMapReduce.Map.class);
  job.setReducerClass(SortMapReduce.Reduce.class);

  job.setInputFormatClass(KeyValueTextInputFormat.class);

  job.setMapOutputKeyClass(Person.class);
  job.setMapOutputValueClass(Person.class);

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);

  job.setPartitionerClass(PersonNamePartitioner.class);
  job.setSortComparatorClass(PersonComparator.class);
  job.setGroupingComparatorClass(PersonNameComparator.class);

  FileInputFormat.setInputPaths(job, input);
  FileOutputFormat.setOutputPath(job, outputPath);

  job.waitForCompletion(true);
}

源代码2 项目： incubator-gobblin 文件： CompactionJobConfigurator.java

/**
 * Refer to MRCompactorAvroKeyDedupJobRunner#configureInputAndOutputPaths(Job).
 * @return false if no valid input paths present for MR job to process,  where a path is valid if it is
 * a directory containing one or more files.
 *
 */
protected boolean configureInputAndOutputPaths(Job job, FileSystemDataset dataset) throws IOException {
  boolean emptyDirectoryFlag = false;

  String mrOutputBase = this.state.getProp(MRCompactor.COMPACTION_JOB_DIR);
  CompactionPathParser parser = new CompactionPathParser(this.state);
  CompactionPathParser.CompactionParserResult rst = parser.parse(dataset);
  this.mrOutputPath = concatPaths(mrOutputBase, rst.getDatasetName(), rst.getDstSubDir(), rst.getTimeString());

  log.info("Cleaning temporary MR output directory: " + mrOutputPath);
  this.fs.delete(mrOutputPath, true);

  this.mapReduceInputPaths = getGranularInputPaths(dataset.datasetRoot());
  if (this.mapReduceInputPaths.isEmpty()) {
    this.mapReduceInputPaths.add(dataset.datasetRoot());
    emptyDirectoryFlag = true;
  }
  this.oldFiles = new HashSet<>();
  for (Path path : mapReduceInputPaths) {
    oldFiles.add(this.fs.makeQualified(path).toString());
    FileInputFormat.addInputPath(job, path);
  }

  FileOutputFormat.setOutputPath(job, mrOutputPath);
  return emptyDirectoryFlag;
}

源代码3 项目： secure-data-service 文件： BigDiffHadoop.java

public void execute(String inputPath1, String inputPath2, String outputPath) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "bigdiff");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(inputPath1));
    FileInputFormat.addInputPath(job, new Path(inputPath2));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.waitForCompletion(true);
}

源代码4 项目： big-c 文件： TestJoinDatamerge.java

private static void checkOuterConsistency(Job job, Path[] src) 
    throws IOException {
  Path outf = FileOutputFormat.getOutputPath(job);
  FileStatus[] outlist = cluster.getFileSystem().listStatus(outf, new 
                           Utils.OutputFileUtils.OutputFilesFilter());
  assertEquals("number of part files is more than 1. It is" + outlist.length,
    1, outlist.length);
  assertTrue("output file with zero length" + outlist[0].getLen(),
    0 < outlist[0].getLen());
  SequenceFile.Reader r =
    new SequenceFile.Reader(cluster.getFileSystem(),
        outlist[0].getPath(), job.getConfiguration());
  IntWritable k = new IntWritable();
  IntWritable v = new IntWritable();
  while (r.next(k, v)) {
    assertEquals("counts does not match", v.get(),
      countProduct(k, src, job.getConfiguration()));
  }
  r.close();
}

源代码5 项目： parquet-flinktacular 文件： ParquetAvroExample.java

public static void writeAvro(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
	// Set up the Hadoop Input Format
	Job job = Job.getInstance();

	// Set up Hadoop Output Format
	HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new AvroParquetOutputFormat(), job);

	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	AvroParquetOutputFormat.setSchema(job, Person.getClassSchema());
	ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
	ParquetOutputFormat.setEnableDictionary(job, true);

	// Output & Execute
	data.output(hadoopOutputFormat);
}

源代码6 项目： bigdata-tutorial 文件： MapperInputSplitInfo.java

public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
	if (otherArgs.length != 2) {
		System.err.println("Usage: MapperInputSplitInfo <in> <out>");
		System.exit(2);
	}
	Job job = Job.getInstance(conf, MapperInputSplitInfo.class.getSimpleName());
	job.setJarByClass(MapperInputSplitInfo.class);
	job.setMapperClass(MyMapper.class);
	job.setCombinerClass(MyReducer.class);
	job.setReducerClass(MyReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
	System.exit(job.waitForCompletion(true) ? 0 : 1);
}

源代码7 项目： hadoop-connectors 文件： BigQueryOutputConfiguration.java

/**
 * Gets a configured instance of the stored {@link FileOutputFormat} in the configuration.
 *
 * @param conf the configuration to reference the keys from.
 * @return a configured instance of the stored {@link FileOutputFormat} in the configuration.
 * @throws IOException if there's an issue getting an instance of a FileOutputFormat from the
 *     configuration.
 */
@SuppressWarnings("rawtypes")
public static FileOutputFormat getFileOutputFormat(Configuration conf) throws IOException {
  // Ensure the BigQuery output information is valid.
  getMandatoryConfig(conf, OUTPUT_FORMAT_CLASS);

  Class<?> confClass = OUTPUT_FORMAT_CLASS.get(conf, conf::getClass);

  // Fail if the default value was used, or the class isn't a FileOutputFormat.
  if (confClass == null) {
    throw new IOException(
        "Unable to resolve value for the configuration key '"
            + OUTPUT_FORMAT_CLASS.getKey()
            + "'.");
  } else if (!FileOutputFormat.class.isAssignableFrom(confClass)) {
    throw new IOException("The class " + confClass.getName() + " is not a FileOutputFormat.");
  }

  Class<? extends FileOutputFormat> fileOutputClass =
      confClass.asSubclass(FileOutputFormat.class);

  // Create a new instance and configure it if it's configurable.
  return ReflectionUtils.newInstance(fileOutputClass, conf);
}

源代码8 项目： Cubert 文件： JobExecutor.java

protected void setOutput() throws IOException
{
    JsonNode output = get(root, "output");
    JsonNode params = output.get("params");
    if (params == null)
        params = mapper.createObjectNode();

    Path outputPath = new Path(getText(output, "path"));
    FileOutputFormat.setOutputPath(job, outputPath);

    if (params.has("overwrite") && Boolean.parseBoolean(getText(params, "overwrite")))
    {
        fs.delete(outputPath, true);
    }

    BlockSchema schema = new BlockSchema(output.get("schema"));

    Storage storage = StorageFactory.get(getText(output, "type"));
    storage.prepareOutput(job, conf, params, schema, outputPath);
}

源代码9 项目： wifi 文件： WordCount.java

public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
//		System.out.println(otherArgs);
		if(otherArgs.length != 2) {
			System.out.println("Usage:wordcount <in> <out>");
			System.exit(2);
		}
//		if(args.length != 2) {
//			System.out.println("param error!");
//			System.exit(-1);
//		}
		
		Job job = new Job(conf, "word count");
		job.setJarByClass(WordCount.class);
		job.setMapperClass(TokenizerMapper.class);
		job.setCombinerClass(IntSumReducer.class);
		job.setReducerClass(IntSumReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		System.exit(job.waitForCompletion(true) ? 0 : 1);
		
	}

源代码10 项目： RDFS 文件： GridmixJob.java

public Job call() throws IOException, InterruptedException,
                         ClassNotFoundException {
  job.setMapperClass(GridmixMapper.class);
  job.setReducerClass(GridmixReducer.class);
  job.setNumReduceTasks(jobdesc.getNumberReduces());
  job.setMapOutputKeyClass(GridmixKey.class);
  job.setMapOutputValueClass(GridmixRecord.class);
  job.setSortComparatorClass(GridmixKey.Comparator.class);
  job.setGroupingComparatorClass(SpecGroupingComparator.class);
  job.setInputFormatClass(GridmixInputFormat.class);
  job.setOutputFormatClass(RawBytesOutputFormat.class);
  job.setPartitionerClass(DraftPartitioner.class);
  job.setJarByClass(GridmixJob.class);
  job.getConfiguration().setInt("gridmix.job.seq", seq);
  job.getConfiguration().set(ORIGNAME, null == jobdesc.getJobID()
      ? "<unknown>" : jobdesc.getJobID().toString());
  job.getConfiguration().setBoolean("mapred.used.genericoptionsparser", true);
  FileInputFormat.addInputPath(job, new Path("ignored"));
  FileOutputFormat.setOutputPath(job, outdir);
  job.submit();
  return job;
}

源代码11 项目： bigdata-tutorial 文件： WordCount.java

public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
	if (otherArgs.length != 2) {
		System.err.println("Usage: wordcount <in> <out>");
		System.exit(2);
	}
	Job job = new Job(conf, "word count");
	job.setJarByClass(WordCount.class);
	job.setMapperClass(TokenizerMapper.class);
	job.setCombinerClass(IntSumReducer.class);
	job.setReducerClass(IntSumReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
	System.exit(job.waitForCompletion(true) ? 0 : 1);
}

源代码12 项目： hadoop 文件： MapReduceTestUtil.java

/**
 * Creates a simple copy job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  conf.setInt(MRJobConfig.NUM_MAPS, 3);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("DataMoveJob");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(DataCopyMapper.class);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  theJob.setReducerClass(DataCopyReducer.class);
  theJob.setNumReduceTasks(1);
  return theJob;
}

源代码13 项目： MLHadoop 文件： MatMulDriver.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
	Configuration conf = new Configuration();
	// A is an m-by-n matrix; B is an n-by-p matrix.
	conf.set("m", args[0]);
	conf.set("n", args[1]);
	conf.set("p", args[2]);
	Job job = new Job(conf, "Matrix_Multiplication");
	job.setJarByClass(MatMulDriver.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);
	job.setMapperClass(MatMulMap.class);
	//Don't use combiner if there is no scope of combining the output. Otherwise the job will get stuck.
	//job.setCombinerClass(MatMulModGenReduce.class);
	job.setReducerClass(MatMulReduce.class);
	//args[3] is the input path.
	FileInputFormat.addInputPath(job, new Path(args[3]));
	//args[4] is the output path.
	FileOutputFormat.setOutputPath(job, new Path(args[4]));
	System.exit(job.waitForCompletion(true)?0:1);
}

源代码14 项目： hbase 文件： HashTable.java

public Job createSubmittableJob(String[] args) throws IOException {
  Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
  generatePartitions(partitionsPath);

  Job job = Job.getInstance(getConf(),
        getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
  Configuration jobConf = job.getConfiguration();
  jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
  jobConf.setBoolean(IGNORE_TIMESTAMPS, tableHash.ignoreTimestamps);
  job.setJarByClass(HashTable.class);

  TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
      HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);

  // use a TotalOrderPartitioner and reducers to group region output into hash files
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
  job.setReducerClass(Reducer.class);  // identity reducer
  job.setNumReduceTasks(tableHash.numHashFiles);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(ImmutableBytesWritable.class);
  job.setOutputFormatClass(MapFileOutputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));

  return job;
}

源代码15 项目： Hadoop-BAM 文件： TestCRAMOutputFormat.java

private Path doMapReduce(final String inputFile) throws Exception {
    final FileSystem fileSystem = FileSystem.get(conf);
    final Path inputPath = new Path(inputFile);
    final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
    fileSystem.delete(outputPath, true);

    final Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, inputPath);

    job.setInputFormatClass(CRAMInputFormat.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(SAMRecordWritable.class);

    conf.set(CRAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, inputFile);
    job.setOutputFormatClass(CRAMTestNoHeaderOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(SAMRecordWritable.class);

    job.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(job, outputPath);

    final boolean success = job.waitForCompletion(true);
    assertTrue(success);

    return outputPath;
}

源代码16 项目： hbase 文件： IntegrationTestLoadAndVerify.java

protected Job doLoad(Configuration conf, TableDescriptor tableDescriptor) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "load-output");
  LOG.info("Load output dir: " + outputDir);

  NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
  conf.set(TABLE_NAME_KEY, tableDescriptor.getTableName().getNameAsString());

  Job job = Job.getInstance(conf);
  job.setJobName(TEST_NAME + " Load for " + tableDescriptor.getTableName());
  job.setJarByClass(this.getClass());
  setMapperClass(job);
  job.setInputFormatClass(NMapInputFormat.class);
  job.setNumReduceTasks(0);
  setJobScannerConf(job);
  FileOutputFormat.setOutputPath(job, outputDir);

  TableMapReduceUtil.addDependencyJars(job);

  TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);
  assertTrue(job.waitForCompletion(true));
  return job;
}

源代码17 项目： marklogic-contentpump 文件： ElemValueCooccurrencesTest.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 1) {
        System.err.println("Usage: ElemValueCooccurrencesTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ElemValueCooccurrencesTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ElemCooccurrencesMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ElemValueCooccurrencesFunction.class, ElemValueCooccurrences.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

源代码18 项目： flink-perf 文件： KMeansDriver.java

public static void initializeCenters (Configuration conf, FileSystem fs, String pointsPath, String seqFilePath) throws Exception {
	Path points = new Path (pointsPath);
	Path seqFile = new Path (seqFilePath);
	if (fs.exists(seqFile)) {
		fs.delete(seqFile, true);
	}
	Job job = Job.getInstance(conf);
	job.setMapperClass(CenterInitializer.class);
	job.setReducerClass(Reducer.class);
	job.setNumReduceTasks(0);
	job.setMapOutputKeyClass(Centroid.class);
	job.setMapOutputValueClass(Point.class);
	job.setOutputKeyClass(Centroid.class);
	job.setOutputValueClass(Point.class);
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	job.setInputFormatClass(TextInputFormat.class);
	FileInputFormat.addInputPath(job, new Path(pointsPath));
	FileOutputFormat.setOutputPath(job, seqFile);
	job.waitForCompletion(true);
}

源代码19 项目： MapReduce-Demo 文件： Step6.java

public static boolean run(Configuration config, Map<String, String> paths) 
		throws IOException, ClassNotFoundException, InterruptedException {
	String jobName = "step6";
	Job job = Job.getInstance(config, jobName);
	job.setJarByClass(Step6.class);
	job.setJar("export\\ItemCF.jar");
	job.setMapperClass(Step6_Mapper.class);
	job.setReducerClass(Step6_Reducer.class);		
	job.setMapOutputKeyClass(PairWritable.class);
	job.setMapOutputValueClass(Text.class);
	//job.setSortComparatorClass(ScoreSort.class);			//自定义排序
	job.setGroupingComparatorClass(UserGroup.class);	//自定义分组
	
	Path inPath = new Path(paths.get("Step6Input"));
	Path outpath = new Path(paths.get("Step6Output"));
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outpath);		
	FileSystem fs = FileSystem.get(config);
	if (fs.exists(outpath)) {
		fs.delete(outpath, true);
	}
	
	return job.waitForCompletion(true);		
}

源代码20 项目： bigdata-tutorial 文件： XflowDstIPCount.java

public static void main(String[] args) throws Exception {
	String[] otherArgs = new GenericOptionsParser(args).getRemainingArgs();
	if (otherArgs.length != 2) {
		System.err.println("Usage: xflowdstipcount <in> <out>");
		System.exit(2);
	}
	Job job = Job.getInstance();
	job.setJobName("xflow dstip count");
	job.setJarByClass(XflowDstIPCount.class);
	job.setMapperClass(ParesDstIPMapper.class);
	job.setCombinerClass(IntSumReducer.class);
	job.setReducerClass(IntSumReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
	FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
	System.exit(job.waitForCompletion(true) ? 0 : 1);
}

源代码21 项目： MLHadoop 文件： NBCDriver.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
	Configuration conf=new Configuration();
	// The test input for which you want to find the acitivity that the Person should be doing
	conf.set("test_input", args[0]);
	Job job = new Job(conf);
	job.setJarByClass(NBCDriver.class);
	job.setJobName("Naive_Bayes_calssifier using Hadoop");
	FileInputFormat.setInputPaths(job, new Path(args[1]));
	FileOutputFormat.setOutputPath(job, new Path(args[2]));
	job.setMapperClass(NBCMap.class);
	job.setReducerClass(NBCReduce.class);
	job.setMapOutputKeyClass(IntWritable.class);
	job.setMapOutputValueClass(Text.class);
	job.setOutputKeyClass(IntWritable.class);
	job.setOutputValueClass(Text.class);
	boolean success = job.waitForCompletion(true);
	System.exit(success ? 0 : 1);
}

源代码22 项目： flink-perf 文件： PageRankDriver.java

public static void calculateNextRanks (Configuration conf, FileSystem fs, String inputPath, String outputPath) throws Exception {
	Path outFile = new Path (outputPath);
	if (fs.exists(outFile)) {
		fs.delete(outFile, true);
	}
	Job job = Job.getInstance(conf);
	job.setJarByClass(PageRankMapper.class);
	job.setMapperClass(PageRankMapper.class);
	job.setReducerClass(PageRankReducer.class);
	job.setMapOutputKeyClass(LongWritable.class);
	job.setMapOutputValueClass(Message.class);
	job.setOutputKeyClass(LongWritable.class);
	job.setOutputValueClass(Message.class);
	job.setInputFormatClass(SequenceFileInputFormat.class);
	job.setOutputFormatClass(SequenceFileOutputFormat.class);
	FileInputFormat.addInputPath(job, new Path(inputPath));
	FileOutputFormat.setOutputPath(job, outFile);
	job.waitForCompletion(true);
}

源代码23 项目： yuzhouwan 文件： PatentMainController.java

/**
 * Main方法里面，设置了 Patent任务流程，Mapper ->Combiner ->Reducer ->Partitioner.
 *
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {

    //配置 Job，并完成初始化
    Job job = Job.getInstance(new Configuration());

    //指定 Job的主类
    job.setJarByClass(PatentMainController.class);

    //指定 Job的 Mapper组件
    job.setMapperClass(PatentMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    //指定 Job的数据输入地址
    FileInputFormat.setInputPaths(job, new Path(args[0]));

    //指定 Job的 Combiner组件
    job.setCombinerClass(InverseIndexByKeywordCombiner.class);
    job.setReducerClass(InverseIndexByKeywordCombiner.class);

    //指定 Job的 Reducer组件
    job.setReducerClass(PatentReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    //指定 Job的数据输出地址
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setPartitionerClass(PatentPartitioner.class);
    //指定最大的 Task数量
    job.setNumReduceTasks(ConfUtil.getMax());

    //提交并等待执行完成
    job.waitForCompletion(true);
}

源代码24 项目： RDFS 文件： SecondarySort.java

public static void main(String[] args) throws Exception {
  Configuration conf = new Configuration();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length != 2) {
    System.err.println("Usage: secondarysrot <in> <out>");
    System.exit(2);
  }
  Job job = new Job(conf, "secondary sort");
  job.setJarByClass(SecondarySort.class);
  job.setMapperClass(MapClass.class);
  job.setReducerClass(Reduce.class);

  // group and partition by the first int in the pair
  job.setPartitionerClass(FirstPartitioner.class);
  job.setGroupingComparatorClass(FirstGroupingComparator.class);

  // the map output is IntPair, IntWritable
  job.setMapOutputKeyClass(IntPair.class);
  job.setMapOutputValueClass(IntWritable.class);

  // the reduce output is Text, IntWritable
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  
  FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
  FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
  System.exit(job.waitForCompletion(true) ? 0 : 1);
}

源代码25 项目： hadoop 文件： TestLocalModeWithNewApis.java

@Test
public void testNewApis() throws Exception {
  Random r = new Random(System.currentTimeMillis());
  Path tmpBaseDir = new Path("/tmp/wc-" + r.nextInt());
  final Path inDir = new Path(tmpBaseDir, "input");
  final Path outDir = new Path(tmpBaseDir, "output");
  String input = "The quick brown fox\nhas many silly\nred fox sox\n";
  FileSystem inFs = inDir.getFileSystem(conf);
  FileSystem outFs = outDir.getFileSystem(conf);
  outFs.delete(outDir, true);
  if (!inFs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  {
    DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
    file.writeBytes(input);
    file.close();
  }

  Job job = Job.getInstance(conf, "word count");
  job.setJarByClass(TestLocalModeWithNewApis.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(IntSumReducer.class);
  job.setReducerClass(IntSumReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileInputFormat.addInputPath(job, inDir);
  FileOutputFormat.setOutputPath(job, outDir);
  assertEquals(job.waitForCompletion(true), true);

  String output = readOutput(outDir, conf);
  assertEquals("The\t1\nbrown\t1\nfox\t2\nhas\t1\nmany\t1\n" +
               "quick\t1\nred\t1\nsilly\t1\nsox\t1\n", output);
  
  outFs.delete(tmpBaseDir, true);
}

源代码26 项目： dkpro-c4corpus 文件： ConfigurationHelper.java

/**
 * Job configurator
 *
 * @param job                      job instance
 * @param jarByClass               class of the jar
 * @param mapperClass              mapper
 * @param reducerClass             reducer
 * @param commaSeparatedInputFiles input paths
 * @param outputPath               output
 * @throws IOException I/O exception
 */
public static void configureJob(Job job, Class<?> jarByClass,
        Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass,
        String commaSeparatedInputFiles, String outputPath)
        throws IOException
{
    job.setJarByClass(jarByClass);
    job.setJobName(jarByClass.getName());

    // mapper
    job.setMapperClass(mapperClass);

    // reducer
    job.setReducerClass(reducerClass);

    // input-output is warc
    job.setInputFormatClass(WARCInputFormat.class);
    // prevent producing empty files
    LazyOutputFormat.setOutputFormatClass(job, WARCOutputFormat.class);

    // intermediate data
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(WARCWritable.class);

    // output data
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);

    // set output compression to GZip
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
}

源代码27 项目： pravega-samples 文件： TeraStreamValidate.java

public int run(String[] args) throws Exception {
  if (args.length != 5) {
    usage();
    return 2;
  }
  LOG.info("starting");
  Path inputDir = new Path(args[0]);
  Path outputDir = new Path(args[1]);
  getConf().setStrings(INPUT_URI_STRING, args[2]);
  getConf().setStrings(INPUT_SCOPE_NAME, args[3]);
  getConf().setStrings(INPUT_STREAM_NAME, args[4]);
  getConf().setStrings(INPUT_DESERIALIZER, TextSerializer.class.getName());

  getConf().setInt(MRJobConfig.NUM_MAPS, 1);
  Job job = Job.getInstance(getConf());

  TeraInputFormat.setInputPaths(job, inputDir);
  FileOutputFormat.setOutputPath(job, outputDir);

  job.setJobName("TeraStreamValidate");
  job.setJarByClass(TeraStreamValidate.class);

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setMapperClass(TeraSortMapper.class);
  job.setNumReduceTasks(1);

  job.setInputFormatClass(PravegaInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);

  int ret = job.waitForCompletion(true) ? 0 : 1;
  LOG.info("done");
  return ret;
}

源代码28 项目： hadoop 文件： TestLocalRunner.java

/**
 * Run a test which creates a SequenceMapper / IdentityReducer
 * job over a set of generated number files.
 */
private void doMultiReducerTest(int numMaps, int numReduces,
    int parallelMaps, int parallelReduces) throws Exception {

  Path in = getNumberDirPath();
  Path out = getOutputPath();

  // Clear data from any previous tests.
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.getLocal(conf);
  if (fs.exists(out)) {
    fs.delete(out, true);
  }

  if (fs.exists(in)) {
    fs.delete(in, true);
  }

  for (int i = 0; i < numMaps; i++) {
    makeNumberFile(i, 100);
  }

  Job job = Job.getInstance();
  job.setNumReduceTasks(numReduces);

  job.setMapperClass(SequenceMapper.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(NullWritable.class);
  FileInputFormat.addInputPath(job, in);
  FileOutputFormat.setOutputPath(job, out);

  LocalJobRunner.setLocalMaxRunningMaps(job, parallelMaps);
  LocalJobRunner.setLocalMaxRunningReduces(job, parallelReduces);

  boolean result = job.waitForCompletion(true);
  assertTrue("Job failed!!", result);

  verifyNumberJob(numMaps);
}

源代码29 项目： hiped2 文件： AvroParquetMapReduce.java

/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);
  job.setJarByClass(AvroParquetMapReduce.class);

  job.setInputFormatClass(AvroParquetInputFormat.class);
  AvroParquetInputFormat.setInputPaths(job, inputPath);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(DoubleWritable.class);

  job.setOutputFormatClass(AvroParquetOutputFormat.class);
  FileOutputFormat.setOutputPath(job, outputPath);
  AvroParquetOutputFormat.setSchema(job, StockAvg.SCHEMA$);

  return job.waitForCompletion(true) ? 0 : 1;
}

源代码30 项目： RecommendationEngine 文件： IAndKMatrixMultiplicationStep1.java

public static void run() throws IOException, ClassNotFoundException,
		InterruptedException {
	String inputPath1 = ItemBasedCFDriver.path.get("step7InputPath1");
	String inputPath2 = ItemBasedCFDriver.path.get("step7InputPath2");
	String outputPath = ItemBasedCFDriver.path.get("step7OutputPath");

	Configuration conf = new Configuration();
	conf.set("mapred.textoutputformat.separator", ",");

	Job job = Job.getInstance(conf);

	HDFS hdfs = new HDFS(conf);
	hdfs.rmr(outputPath);

	job.setMapperClass(Step1_Mapper.class);
	job.setJarByClass(IAndKMatrixMultiplicationStep1.class);

	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);
	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	FileInputFormat.setInputPaths(job, new Path(inputPath1), new Path(
			inputPath2));
	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	job.waitForCompletion(true);

}

方法所在类

org.apache.hadoop.io.serializer.JavaSerializationComparator