public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: WordCount "); return; } final String inputPath = args[0]; final String outputPath = args[1]; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // Set up the Hadoop Input Format Job job = Job.getInstance(); HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new TextInputFormat(), LongWritable.class, Text.class, job); TextInputFormat.addInputPath(job, new Path(inputPath)); // Create a Flink job with it DataSet> text = env.createInput(hadoopInputFormat); // Tokenize the line and convert from Writable "Text" to String for better handling DataSet> words = text.flatMap(new Tokenizer()); // Sum up the words DataSet> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); // Convert String back to Writable "Text" for use with Hadoop Output Format DataSet> hadoopResult = result.map(new HadoopDatatypeMapper()); // Set up Hadoop Output Format HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new TextOutputFormat(), job); hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " "); hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test TextOutputFormat.setOutputPath(job, new Path(outputPath)); // Output & Execute hadoopResult.output(hadoopOutputFormat); env.execute("Word Count"); }

类org.apache.hadoop.mapreduce.lib.output.TextOutputFormat源码实例Demo