下面列出了org.apache.hadoop.io.ArrayPrimitiveWritable#org.apache.hadoop.mapreduce.lib.output.TextOutputFormat 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
GenericOptionsParser optionparser = new GenericOptionsParser(conf, args);
conf = optionparser.getConfiguration();
Job job = Job.getInstance(conf, "leftjoin");
job.setJarByClass(LeftJoin.class);
FileInputFormat.addInputPaths(job, conf.get("input_dir"));
Path out = new Path(conf.get("output_dir"));
FileOutputFormat.setOutputPath(job, out);
job.setNumReduceTasks(conf.getInt("reduce_num", 1));
job.setMapperClass(LeftJoinMapper.class);
job.setReducerClass(LeftJoinReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
conf.set("mapred.textoutputformat.separator", ",");
return (job.waitForCompletion(true) ? 0 : 1);
}
public static void convertCentersSequenceFileToText (Configuration conf, FileSystem fs, String seqFilePath, String outputPath) throws Exception {
Path seqFile = new Path (seqFilePath);
Path output = new Path (outputPath);
if (fs.exists(output)) {
fs.delete(output, true);
}
Job job = Job.getInstance(conf);
job.setMapperClass(CenterSequenceToTextConverter.class);
job.setReducerClass(Reducer.class);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
FileInputFormat.addInputPath(job, seqFile);
FileOutputFormat.setOutputPath(job, output);
job.waitForCompletion(true);
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
if (args.length < 2) {
System.err.println("Usage: LinkCountHDFS inputDir outputDir");
System.exit(2);
}
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job job = Job.getInstance(conf, "link count hdfs");
job.setJarByClass(LinkCountHDFS.class);
job.setInputFormatClass(HDFSInputFormat.class);
job.setMapperClass(RefMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
HDFSInputFormat.setInputPaths(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
private void testNormalDim() throws IOException {
setConfigurations();
setMultipleOutputs(BatchConstants.CFG_OUTPUT_COLUMN, reduceDriver.getConfiguration(),
SequenceFileOutputFormat.class, NullWritable.class, Text.class);
setMultipleOutputs(BatchConstants.CFG_OUTPUT_DICT, reduceDriver.getConfiguration(),
SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
setMultipleOutputs(BatchConstants.CFG_OUTPUT_PARTITION, reduceDriver.getConfiguration(), TextOutputFormat.class,
NullWritable.class, LongWritable.class);
int nDimReducers = cubeDesc.getRowkey().getRowKeyColumns().length;
setContextTaskId(nDimReducers - 1);
ByteBuffer tmpBuf = ByteBuffer.allocate(4096);
String val = "100";
tmpBuf.put(Bytes.toBytes(val));
Text outputKey1 = new Text();
outputKey1.set(tmpBuf.array(), 0, tmpBuf.position());
SelfDefineSortableKey key1 = new SelfDefineSortableKey();
key1.init(outputKey1, (byte) 0);
reduceDriver.setInput(key1, ImmutableList.of(new Text()));
List<Pair<NullWritable, Text>> result = reduceDriver.run();
assertEquals(0, result.size());
}
public final static void main(final String[] args) throws Exception {
final Configuration conf = new Configuration();
final Job job = new Job(conf, "P1Q3");
job.setJarByClass(P1Q3.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(P1Q3Map.class);
//job.setCombinerClass(P1Q3Reduce.class);
job.setReducerClass(P1Q3Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: ElementValuesTest configFile outputDir");
System.exit(2);
}
Job job = Job.getInstance(conf);
job.setJarByClass(ElementValuesTest.class);
job.setInputFormatClass(ValueInputFormat.class);
job.setMapperClass(ElementValueMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
conf = job.getConfiguration();
conf.addResource(otherArgs[0]);
conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class,
Writable.class);
conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS,
ElementValuesFunction.class, ElementValues.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 1) {
System.err.println("Usage: ElementValueMatchTest configFile outputDir");
System.exit(2);
}
Job job = Job.getInstance(conf);
job.setJarByClass(ElementValueMatchTest.class);
job.setInputFormatClass(ValueInputFormat.class);
job.setMapperClass(ElementValueMatchMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
conf = job.getConfiguration();
conf.addResource(otherArgs[0]);
conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class,
Writable.class);
conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS,
ElementValueMatchFunction.class, ElementValueMatch.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: ValueMatchTest configFile outputDir");
System.exit(2);
}
Job job = Job.getInstance(conf);
job.setJarByClass(ValueMatchTest.class);
job.setInputFormatClass(ValueInputFormat.class);
job.setMapperClass(ValueMatchMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
conf = job.getConfiguration();
conf.addResource(otherArgs[0]);
conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class,
Writable.class);
conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS,
ValueMatchFunction.class, ValueMatch.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 1) {
System.err.println("Usage: WordsTest configFile outputDir");
System.exit(2);
}
Job job = Job.getInstance(conf);
job.setJarByClass(WordsTest.class);
job.setInputFormatClass(ValueInputFormat.class);
job.setMapperClass(WordsMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
conf = job.getConfiguration();
conf.addResource(otherArgs[0]);
conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class,
Writable.class);
conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS,
Words.class, Words.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 1) {
System.err.println("Usage: ElemValueCooccurrencesTest configFile outputDir");
System.exit(2);
}
Job job = Job.getInstance(conf);
job.setJarByClass(ElemValueCooccurrencesTest.class);
job.setInputFormatClass(ValueInputFormat.class);
job.setMapperClass(ElemCooccurrencesMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
conf = job.getConfiguration();
conf.addResource(otherArgs[0]);
conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class,
Writable.class);
conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS,
ElemValueCooccurrencesFunction.class, ElemValueCooccurrences.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: ValueCooccurrencesTest configFile outputDir");
System.exit(2);
}
Job job = Job.getInstance(conf);
job.setJarByClass(ValueCooccurrencesTest.class);
job.setInputFormatClass(ValueInputFormat.class);
job.setMapperClass(ValueCooccurrencesMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
conf = job.getConfiguration();
conf.addResource(otherArgs[0]);
conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class,
Writable.class);
conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS,
ValueCooccurrencesFunction.class, ValueCooccurrences.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
/**
* Sets up the actual job.
*
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args)
throws IOException {
String tableName = args[0];
Path outputDir = new Path(args[1]);
String reportSeparatorString = (args.length > 2) ? args[2]: ":";
conf.set("ReportSeparator", reportSeparatorString);
Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
job.setJarByClass(CellCounter.class);
Scan scan = getConfiguredScanForJob(conf, args);
TableMapReduceUtil.initTableMapperJob(tableName, scan,
CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
job.setNumReduceTasks(1);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileOutputFormat.setOutputPath(job, outputDir);
job.setReducerClass(IntSumReducer.class);
return job;
}
public final static void main(final String[] args) throws Exception {
final Configuration conf = new Configuration();
final Job job = new Job(conf, "P1");
job.setJarByClass(P1.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(P1Map.class);
job.setCombinerClass(P1Reduce.class);
job.setReducerClass(P1Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
public final static void main(final String[] args) throws Exception {
final Configuration conf = new Configuration();
final Job job = new Job(conf, "P1Q1");
job.setJarByClass(P1Q1.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(P1Q1Map.class);
job.setCombinerClass(P1Q1Reduce.class);
job.setReducerClass(P1Q1Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
public void execute(String inputPath1, String inputPath2, String outputPath) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "bigdiff");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(inputPath1));
FileInputFormat.addInputPath(job, new Path(inputPath2));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.waitForCompletion(true);
}
private DAG createDag(TezConfiguration tezConf, Path outPath, long outSize, int extraColumns, int numTasks)
throws IOException {
long largeOutSizePerTask = outSize / numTasks;
DAG dag = DAG.create("TopK DataGen");
Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
GenDataProcessor.class.getName()).setUserPayload(
UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask, extraColumns)))),
numTasks);
genDataVertex.addDataSink(OUTPUT,
MROutput.createConfigBuilder(new Configuration(tezConf),
TextOutputFormat.class, outPath.toUri().toString()).build());
dag.addVertex(genDataVertex);
return dag;
}
/**
* Sets task classes with related info if needed into configuration object.
*
* @param job Configuration to change.
* @param setMapper Option to set mapper and input format classes.
* @param setCombiner Option to set combiner class.
* @param setReducer Option to set reducer and output format classes.
*/
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
boolean outputCompression) {
if (setMapper) {
job.setMapperClass(HadoopWordCount2Mapper.class);
job.setInputFormatClass(TextInputFormat.class);
}
if (setCombiner)
job.setCombinerClass(HadoopWordCount2Combiner.class);
if (setReducer) {
job.setReducerClass(HadoopWordCount2Reducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
}
if (outputCompression) {
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setCompressOutput(job, true);
job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
}
}
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
throws IOException {
long largeOutSizePerTask = largeOutSize / numTasks;
long smallOutSizePerTask = smallOutSize / numTasks;
DAG dag = DAG.create("JoinDataGen");
Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
GenDataProcessor.class.getName()).setUserPayload(
UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask,
smallOutSizePerTask)))), numTasks);
genDataVertex.addDataSink(STREAM_OUTPUT_NAME,
MROutput.createConfigBuilder(new Configuration(tezConf),
TextOutputFormat.class, largeOutPath.toUri().toString()).build());
genDataVertex.addDataSink(HASH_OUTPUT_NAME,
MROutput.createConfigBuilder(new Configuration(tezConf),
TextOutputFormat.class, smallOutPath.toUri().toString()).build());
genDataVertex.addDataSink(EXPECTED_OUTPUT_NAME,
MROutput.createConfigBuilder(new Configuration(tezConf),
TextOutputFormat.class, expectedOutputPath.toUri().toString()).build());
dag.addVertex(genDataVertex);
return dag;
}
private void setupReducer(Path output, CubeSegment cubeSeg)
throws IOException {
FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
int numberOfReducers = reducerMapping.getTotalReducerNum();
logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers);
if (numberOfReducers > 250) {
throw new IllegalArgumentException(
"The max reducer number for FactDistinctColumnsJob is 250, but now it is "
+ numberOfReducers
+ ", decrease 'kylin.engine.mr.uhc-reducer-count'");
}
job.setReducerClass(FactDistinctColumnsReducer.class);
job.setPartitionerClass(FactDistinctColumnPartitioner.class);
job.setNumReduceTasks(numberOfReducers);
// make each reducer output to respective dir
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);
FileOutputFormat.setOutputPath(job, output);
job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
// prevent to create zero-sized default output
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
deletePath(job.getConfiguration(), output);
}
/**
* Get the {@link OutputFormat} class for the job.
*
* @return the {@link OutputFormat} class for the job.
*/
@SuppressWarnings("unchecked")
public Class<? extends OutputFormat<?,?>> getOutputFormatClass()
throws ClassNotFoundException {
return (Class<? extends OutputFormat<?,?>>)
conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class);
}
private void testCommitterInternal(int version) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
validateContent(outDir);
FileUtil.fullyDelete(new File(outDir.toString()));
}
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
conf.set("key.value.separator.in.input.line", " ");
conf.set("xmlinput.start", "<property>");
conf.set("xmlinput.end", "</property>");
Job job = new Job(conf);
job.setJarByClass(XMLMapReduceReader.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(Map.class);
job.setInputFormatClass(XmlInputFormat.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
if (job.waitForCompletion(true)) {
return 0;
}
return 1;
}
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
Job job = Job.getInstance();
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
TextInputFormat.addInputPath(job, new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
// Tokenize the line and convert from Writable "Text" to String for better handling
DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
// Sum up the words
DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
// Convert String back to Writable "Text" for use with Hadoop Output Format
DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
// Set up Hadoop Output Format
HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Output & Execute
hadoopResult.output(hadoopOutputFormat);
env.execute("Word Count");
}
/**
* Allow the user to inject custom mapper, input, and output formats
* into the importTable() process.
*/
@Override
@SuppressWarnings("unchecked")
public void importTable(ImportJobContext context)
throws IOException, ImportException {
SqoopOptions options = context.getOptions();
Configuration conf = options.getConf();
Class<? extends Mapper> mapperClass = (Class<? extends Mapper>)
conf.getClass(MAPPER_KEY, Mapper.class);
Class<? extends InputFormat> ifClass = (Class<? extends InputFormat>)
conf.getClass(INPUT_FORMAT_KEY, TextInputFormat.class);
Class<? extends OutputFormat> ofClass = (Class<? extends OutputFormat>)
conf.getClass(OUTPUT_FORMAT_KEY, TextOutputFormat.class);
Class<? extends ImportJobBase> jobClass = (Class<? extends ImportJobBase>)
conf.getClass(IMPORT_JOB_KEY, ImportJobBase.class);
String tableName = context.getTableName();
// Instantiate the user's chosen ImportJobBase instance.
ImportJobBase importJob = ReflectionUtils.newInstance(jobClass, conf);
// And configure the dependencies to inject
importJob.setOptions(options);
importJob.setMapperClass(mapperClass);
importJob.setInputFormatClass(ifClass);
importJob.setOutputFormatClass(ofClass);
importJob.runImport(tableName, context.getJarFile(),
getSplitColumn(options, tableName), conf);
}
private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException,
ClassNotFoundException {
job = new Job(getConf());
job.setJobName("Link Verifier");
job.setNumReduceTasks(numReducers);
job.setJarByClass(getClass());
setJobScannerConf(job);
Scan scan = new Scan();
scan.addColumn(FAMILY_NAME, COLUMN_PREV);
scan.setCaching(10000);
scan.setCacheBlocks(false);
String[] split = labels.split(COMMA);
scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2],
split[(this.labelIndex * 2) + 1]));
TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class,
BytesWritable.class, BytesWritable.class, job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
job.setReducerClass(VerifyReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, outputDir);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
/**
* Get the {@link OutputFormat} class for the job.
*
* @return the {@link OutputFormat} class for the job.
*/
@SuppressWarnings("unchecked")
public Class<? extends OutputFormat<?,?>> getOutputFormatClass()
throws ClassNotFoundException {
return (Class<? extends OutputFormat<?,?>>)
conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class);
}
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
Map<String, LocalResource> localResources, Path stagingDir,
String inputPath, String outputPath) throws IOException {
Configuration inputConf = new Configuration(tezConf);
inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
InputDescriptor id = new InputDescriptor(MRInput.class.getName())
.setUserPayload(MRInput.createUserPayload(inputConf,
TextInputFormat.class.getName(), true, true));
Configuration outputConf = new Configuration(tezConf);
outputConf.set(FileOutputFormat.OUTDIR, outputPath);
OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
.setUserPayload(MROutput.createUserPayload(
outputConf, TextOutputFormat.class.getName(), true));
Vertex tokenizerVertex = new Vertex("tokenizer", new ProcessorDescriptor(
TokenProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf));
tokenizerVertex.addInput("MRInput", id, MRInputAMSplitGenerator.class);
Vertex summerVertex = new Vertex("summer",
new ProcessorDescriptor(
SumProcessor.class.getName()), 1, MRHelpers.getReduceResource(tezConf));
summerVertex.addOutput("MROutput", od, MROutputCommitter.class);
OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
.newBuilder(Text.class.getName(), IntWritable.class.getName(),
HashPartitioner.class.getName(), null).build();
DAG dag = new DAG("WordCount");
dag.addVertex(tokenizerVertex)
.addVertex(summerVertex)
.addEdge(
new Edge(tokenizerVertex, summerVertex, edgeConf.createDefaultEdgeProperty()));
return dag;
}
protected Job runFailingMapperJob()
throws IOException, InterruptedException, ClassNotFoundException {
Configuration myConf = new Configuration(mrCluster.getConfig());
myConf.setInt(MRJobConfig.NUM_MAPS, 1);
myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); //reduce the number of attempts
Job job = Job.getInstance(myConf);
job.setJarByClass(FailingMapper.class);
job.setJobName("failmapper");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(RandomInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(FailingMapper.class);
job.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_ROOT_DIR,
"failmapper-output"));
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
Assert.assertFalse(succeeded);
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
return job;
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Path inputFile = new Path(args[0]);
Path outputFile = new Path(args[1]);
FileSystem hdfs = outputFile.getFileSystem(conf);
hdfs.delete(outputFile, true);
Class<?> codecClass = Class.forName(args[2]);
conf.setBoolean("mapred.output.compress", true);
conf.setClass("mapred.output.compression.codec",
codecClass,
CompressionCodec.class);
conf.setBoolean("mapred.compress.map.output", true);
conf.setClass("mapred.map.output.compression.codec",
codecClass,
CompressionCodec.class);
Job job = new Job(conf);
job.setJarByClass(CompressedMapReduce.class);
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, inputFile);
FileOutputFormat.setOutputPath(job, outputFile);
job.waitForCompletion(true);
}
/**
* Get the {@link OutputFormat} class for the job.
*
* @return the {@link OutputFormat} class for the job.
*/
@SuppressWarnings("unchecked")
public Class<? extends OutputFormat<?,?>> getOutputFormatClass()
throws ClassNotFoundException {
return (Class<? extends OutputFormat<?,?>>)
conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class);
}