下面列出了怎么用org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper的API类实例代码及写法,或者点击链接到github查看源代码。
@SuppressWarnings("unchecked")
public void configure(JobConf jobConf) {
int numberOfThreads =
jobConf.getInt(MultithreadedMapper.NUM_THREADS, 10);
if (LOG.isDebugEnabled()) {
LOG.debug("Configuring jobConf " + jobConf.getJobName() +
" to use " + numberOfThreads + " threads");
}
this.job = jobConf;
//increment processed counter only if skipping feature is enabled
this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 &&
SkipBadRecords.getAutoIncrMapperProcCount(job);
this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(),
jobConf);
// Creating a threadpool of the configured size to execute the Mapper
// map method in parallel.
executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads,
0L, TimeUnit.MILLISECONDS,
new BlockingArrayQueue
(numberOfThreads));
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: MultithreadedZipContentLoader configFile inputDir threadCount");
System.exit(2);
}
Job job = Job.getInstance(conf);
job.setJarByClass(MultithreadedZipContentLoader.class);
job.setInputFormatClass(ZipContentInputFormat.class);
job.setMapperClass(MultithreadedMapper.class);
MultithreadedMapper.setMapperClass(job, ZipContentMapper.class);
MultithreadedMapper.setNumberOfThreads(job, Integer.parseInt(args[2]));
job.setMapOutputKeyClass(DocumentURI.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(ContentOutputFormat.class);
ZipContentInputFormat.setInputPaths(job, new Path(otherArgs[1]));
conf = job.getConfiguration();
conf.addResource(otherArgs[0]);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
@SuppressWarnings("unchecked")
public void configure(JobConf jobConf) {
int numberOfThreads =
jobConf.getInt(MultithreadedMapper.NUM_THREADS, 10);
if (LOG.isDebugEnabled()) {
LOG.debug("Configuring jobConf " + jobConf.getJobName() +
" to use " + numberOfThreads + " threads");
}
this.job = jobConf;
//increment processed counter only if skipping feature is enabled
this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 &&
SkipBadRecords.getAutoIncrMapperProcCount(job);
this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(),
jobConf);
// Creating a threadpool of the configured size to execute the Mapper
// map method in parallel.
executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads,
0L, TimeUnit.MILLISECONDS,
new BlockingArrayQueue
(numberOfThreads));
}