下面列出了怎么用org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Run a mapreduce job. Run as many maps as asked-for clients.
* Before we start up the job, write out an input file with instruction
* per client regards which row they are to start on.
* @param cmd Command to run.
*/
private void doMapReduce(final Class<? extends Test> cmd)
throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = Job.getInstance(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
/**
* Builds and runs the Hadoop job.
* @return 0 if the Hadoop job completes successfully and 1 otherwise.
*/
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
//
Job job = new Job(conf);
job.setJarByClass(WATServerType.class);
job.setNumReduceTasks(1);
String inputPath = "data/*.warc.wat.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
LOG.info("Input path: " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
String outputPath = "/tmp/cc/";
FileSystem fs = FileSystem.newInstance(conf);
if (fs.exists(new Path(outputPath))) {
fs.delete(new Path(outputPath), true);
}
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(WARCFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(ServerTypeMap.ServerMapper.class);
job.setReducerClass(LongSumReducer.class);
if (job.waitForCompletion(true)) {
return 0;
} else {
return 1;
}
}
/**
* Builds and runs the Hadoop job.
* @return 0 if the Hadoop job completes successfully and 1 otherwise.
*/
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
//
Job job = new Job(conf);
job.setJarByClass(WETWordCount.class);
job.setNumReduceTasks(1);
String inputPath = "data/*.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
LOG.info("Input path: " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
String outputPath = "/tmp/cc/";
FileSystem fs = FileSystem.newInstance(conf);
if (fs.exists(new Path(outputPath))) {
fs.delete(new Path(outputPath), true);
}
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(WARCFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(WordCounterMap.WordCountMapper.class);
// The reducer is quite useful in the word frequency task
job.setReducerClass(LongSumReducer.class);
if (job.waitForCompletion(true)) {
return 0;
} else {
return 1;
}
}
/**
* Builds and runs the Hadoop job.
* @return 0 if the Hadoop job completes successfully and 1 otherwise.
*/
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
//
Job job = new Job(conf);
job.setJarByClass(WARCTagCounter.class);
job.setNumReduceTasks(1);
String inputPath = "data/*.warc.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/CC-MAIN-20131204131715-00000-ip-10-33-133-15.ec2.internal.warc.wet.gz";
//inputPath = "s3n://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-48/segments/1386163035819/wet/*.warc.wet.gz";
LOG.info("Input path: " + inputPath);
FileInputFormat.addInputPath(job, new Path(inputPath));
String outputPath = "/tmp/cc/";
FileSystem fs = FileSystem.newInstance(conf);
if (fs.exists(new Path(outputPath))) {
fs.delete(new Path(outputPath), true);
}
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(WARCFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(TagCounterMap.TagCounterMapper.class);
job.setReducerClass(LongSumReducer.class);
return job.waitForCompletion(true) ? 0 : -1;
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
return 2;
}
conf.set("nl.basjes.parse.apachehttpdlogline.format", logFormat);
// A ',' separated list of fields
conf.set("nl.basjes.parse.apachehttpdlogline.fields",
"STRING:request.status.last");
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(Wordcount.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
job.setInputFormatClass(ApacheHttpdLogfileInputFormat.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(LongSumReducer.class);
// configuration should contain reference to your namenode
FileSystem fs = FileSystem.get(conf);
// true stands for recursively deleting the folder you gave
Path outputPath = new Path(otherArgs[1]);
fs.delete(outputPath, true);
FileOutputFormat.setOutputPath(job, outputPath);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
if (job.waitForCompletion(true)) {
return 0;
}
return 1;
}
@Override
//Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {
String driverClassName = DRIVER_CLASS;
String url = DB_URL;
if(args.length > 1) {
driverClassName = args[0];
url = args[1];
}
initialize(driverClassName, url);
Configuration conf = getConf();
DBConfiguration.configureDB(conf, driverClassName, url);
Job job = new Job(conf);
job.setJobName("Count Pageviews of URLs");
job.setJarByClass(DBCountPageView.class);
job.setMapperClass(PageviewMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(PageviewReducer.class);
DBInputFormat.setInput(job, AccessRecord.class, "Access"
, null, "url", AccessFieldNames);
DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(PageviewRecord.class);
job.setOutputValueClass(NullWritable.class);
int ret;
try {
ret = job.waitForCompletion(true) ? 0 : 1;
boolean correct = verify();
if(!correct) {
throw new RuntimeException("Evaluation was not correct!");
}
} finally {
shutdown();
}
return ret;
}
public int run(String[] args) throws Exception {
if (args.length < 3) {
System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
Path tempDir =
new Path("grep-temp-"+
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
conf.set(RegexMapper.GROUP, args[3]);
Job grepJob = Job.getInstance(conf);
try {
grepJob.setJobName("grep-search");
grepJob.setJarByClass(Grep.class);
FileInputFormat.setInputPaths(grepJob, args[0]);
grepJob.setMapperClass(RegexMapper.class);
grepJob.setCombinerClass(LongSumReducer.class);
grepJob.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(grepJob, tempDir);
grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
grepJob.waitForCompletion(true);
Job sortJob = Job.getInstance(conf);
sortJob.setJobName("grep-sort");
sortJob.setJarByClass(Grep.class);
FileInputFormat.setInputPaths(sortJob, tempDir);
sortJob.setInputFormatClass(SequenceFileInputFormat.class);
sortJob.setMapperClass(InverseMapper.class);
sortJob.setNumReduceTasks(1); // write a single file
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
sortJob.setSortComparatorClass( // sort by decreasing freq
LongWritable.DecreasingComparator.class);
sortJob.waitForCompletion(true);
}
finally {
FileSystem.get(conf).delete(tempDir, true);
}
return 0;
}
@Override
//Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {
String driverClassName = DRIVER_CLASS;
String url = DB_URL;
if(args.length > 1) {
driverClassName = args[0];
url = args[1];
}
initialize(driverClassName, url);
Configuration conf = getConf();
DBConfiguration.configureDB(conf, driverClassName, url);
Job job = new Job(conf);
job.setJobName("Count Pageviews of URLs");
job.setJarByClass(DBCountPageView.class);
job.setMapperClass(PageviewMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(PageviewReducer.class);
DBInputFormat.setInput(job, AccessRecord.class, "Access"
, null, "url", AccessFieldNames);
DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(PageviewRecord.class);
job.setOutputValueClass(NullWritable.class);
int ret;
try {
ret = job.waitForCompletion(true) ? 0 : 1;
boolean correct = verify();
if(!correct) {
throw new RuntimeException("Evaluation was not correct!");
}
} finally {
shutdown();
}
return ret;
}
public int run(String[] args) throws Exception {
if (args.length < 3) {
System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
Path tempDir =
new Path("grep-temp-"+
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
conf.set(RegexMapper.GROUP, args[3]);
Job grepJob = Job.getInstance(conf);
try {
grepJob.setJobName("grep-search");
grepJob.setJarByClass(Grep.class);
FileInputFormat.setInputPaths(grepJob, args[0]);
grepJob.setMapperClass(RegexMapper.class);
grepJob.setCombinerClass(LongSumReducer.class);
grepJob.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(grepJob, tempDir);
grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
grepJob.waitForCompletion(true);
Job sortJob = Job.getInstance(conf);
sortJob.setJobName("grep-sort");
sortJob.setJarByClass(Grep.class);
FileInputFormat.setInputPaths(sortJob, tempDir);
sortJob.setInputFormatClass(SequenceFileInputFormat.class);
sortJob.setMapperClass(InverseMapper.class);
sortJob.setNumReduceTasks(1); // write a single file
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
sortJob.setSortComparatorClass( // sort by decreasing freq
LongWritable.DecreasingComparator.class);
sortJob.waitForCompletion(true);
}
finally {
FileSystem.get(conf).delete(tempDir, true);
}
return 0;
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 3) {
System.err.println("Usage: JobChainingDriver <posts> <users> <out>");
System.exit(2);
}
Path postInput = new Path(otherArgs[0]);
Path userInput = new Path(otherArgs[1]);
Path outputDirIntermediate = new Path(otherArgs[2] + "_int");
Path outputDir = new Path(otherArgs[2]);
// Setup first job to counter user posts
Job countingJob = new Job(conf, "JobChaining-Counting");
countingJob.setJarByClass(BasicJobChaining.class);
// Set our mapper and reducer, we can use the API's long sum reducer for
// a combiner!
countingJob.setMapperClass(UserIdCountMapper.class);
countingJob.setCombinerClass(LongSumReducer.class);
countingJob.setReducerClass(UserIdSumReducer.class);
countingJob.setOutputKeyClass(Text.class);
countingJob.setOutputValueClass(LongWritable.class);
countingJob.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(countingJob, postInput);
countingJob.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(countingJob, outputDirIntermediate);
// Execute job and grab exit code
int code = countingJob.waitForCompletion(true) ? 0 : 1;
if (code == 0) {
// Calculate the average posts per user by getting counter values
double numRecords = (double) countingJob.getCounters()
.findCounter(AVERAGE_CALC_GROUP, UserIdCountMapper.RECORDS_COUNTER_NAME)
.getValue();
double numUsers = (double) countingJob.getCounters()
.findCounter(AVERAGE_CALC_GROUP, UserIdSumReducer.USERS_COUNTER_NAME)
.getValue();
double averagePostsPerUser = numRecords / numUsers;
// Setup binning job
Job binningJob = new Job(new Configuration(), "JobChaining-Binning");
binningJob.setJarByClass(BasicJobChaining.class);
// Set mapper and the average posts per user
binningJob.setMapperClass(UserIdBinningMapper.class);
UserIdBinningMapper.setAveragePostsPerUser(binningJob, averagePostsPerUser);
binningJob.setNumReduceTasks(0);
binningJob.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(binningJob, outputDirIntermediate);
// Add two named outputs for below/above average
MultipleOutputs.addNamedOutput(binningJob, MULTIPLE_OUTPUTS_BELOW_NAME,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.addNamedOutput(binningJob, MULTIPLE_OUTPUTS_ABOVE_NAME,
TextOutputFormat.class, Text.class, Text.class);
MultipleOutputs.setCountersEnabled(binningJob, true);
TextOutputFormat.setOutputPath(binningJob, outputDir);
// Add the user files to the DistributedCache
FileStatus[] userFiles = FileSystem.get(conf).listStatus(userInput);
for (FileStatus status : userFiles) {
DistributedCache.addCacheFile(status.getPath().toUri(),
binningJob.getConfiguration());
}
// Execute job and grab exit code
code = binningJob.waitForCompletion(true) ? 0 : 1;
}
// Clean up the intermediate output
FileSystem.get(conf).delete(outputDirIntermediate, true);
System.exit(code);
}