下面列出了org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer#org.apache.hadoop.mapreduce.OutputFormat 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public OutputFormat getOutputFormat()
{
// TODO Auto-generated method stub
if (doBulk || forceBulk)
{
return new AccumuloMrsPyramidFileOutputFormat(zoomLevel, cv);
//return new AccumuloMrsPyramidFileOutputFormat();
}
else
{
return new AccumuloMrsPyramidOutputFormat(zoomLevel, cv);
}
}
@SuppressWarnings("deprecation")
public static boolean runJob(Configuration conf,
Class<? extends InputFormat<?,?>> inputFormatClass,
Class<? extends Mapper<?,?,?,?>> mapperClass,
Class<? extends OutputFormat<?,?>> outputFormatClass) throws IOException,
InterruptedException, ClassNotFoundException {
Job job = new Job(conf);
job.setInputFormatClass(inputFormatClass);
job.setMapperClass(mapperClass);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputFormatClass(outputFormatClass);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
boolean ret = job.waitForCompletion(true);
// Hadoop 1.0 (and 0.20) have nasty bug when job committer is not called in
// LocalJobRuner
if (isHadoop1()) {
callOutputCommitter(job, outputFormatClass);
}
return ret;
}
@Override
public StoreFuncInterface createStoreFunc(POStore store)
throws IOException {
StoreFuncInterface storeFunc = store.getStoreFunc();
// call the setStoreLocation on the storeFunc giving it the
// Job. Typically this will result in the OutputFormat of the
// storeFunc storing the output location in the Configuration
// in the Job. The PigOutFormat.setLocation() method will merge
// this modified Configuration into the configuration of the
// Context we have
PigOutputFormat.setLocation(context, store);
OutputFormat<?,?> outputFormat = storeFunc.getOutputFormat();
// create a new record writer
try {
writer = outputFormat.getRecordWriter(context);
} catch (InterruptedException e) {
throw new IOException(e);
}
storeFunc.prepareToWrite(writer);
return storeFunc;
}
/**
* This test validates functionality of {@link
* HadoopFormatIO.Write.Builder#withConfiguration(Configuration) withConfiguration(Configuration)}
* function when job id is not provided by the user in configuration.
*/
@Test
public void testWriteValidationFailsMissingJobIDInConf() {
Configuration configuration = new Configuration();
configuration.setClass(
HadoopFormatIO.OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class, OutputFormat.class);
configuration.setClass(HadoopFormatIO.OUTPUT_KEY_CLASS, Text.class, Object.class);
configuration.setClass(HadoopFormatIO.OUTPUT_VALUE_CLASS, Employee.class, Object.class);
configuration.set(HadoopFormatIO.OUTPUT_DIR, tmpFolder.getRoot().getAbsolutePath());
runValidationPipeline(configuration);
thrown.expect(Pipeline.PipelineExecutionException.class);
thrown.expectMessage("Configuration must contain \"mapreduce.job.id\"");
p.run().waitUntilFinish();
}
@Test(enabled = true)
public void testWriteChunkData() throws Exception {
NullWritable nada = NullWritable.get();
MneDurableOutputSession<DurableChunk<?>> sess =
new MneDurableOutputSession<DurableChunk<?>>(m_tacontext, null,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
MneDurableOutputValue<DurableChunk<?>> mdvalue =
new MneDurableOutputValue<DurableChunk<?>>(sess);
OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat =
new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>();
RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer =
outputFormat.getRecordWriter(m_tacontext);
DurableChunk<?> dchunk = null;
Checksum cs = new CRC32();
cs.reset();
for (int i = 0; i < m_reccnt; ++i) {
dchunk = genupdDurableChunk(sess, cs);
Assert.assertNotNull(dchunk);
writer.write(nada, mdvalue.of(dchunk));
}
m_checksum = cs.getValue();
writer.close(m_tacontext);
sess.close();
}
@Test(enabled = true)
public void testWriteLongData() throws Exception {
NullWritable nada = NullWritable.get();
MneDurableOutputSession<Long> sess =
new MneDurableOutputSession<Long>(m_tacontext, null,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
MneDurableOutputValue<Long> mdvalue =
new MneDurableOutputValue<Long>(sess);
OutputFormat<NullWritable, MneDurableOutputValue<Long>> outputFormat =
new MneOutputFormat<MneDurableOutputValue<Long>>();
RecordWriter<NullWritable, MneDurableOutputValue<Long>> writer =
outputFormat.getRecordWriter(m_tacontext);
Long val = null;
for (int i = 0; i < m_reccnt; ++i) {
val = m_rand.nextLong();
m_sum += val;
writer.write(nada, mdvalue.of(val));
}
writer.close(m_tacontext);
sess.close();
}
@SuppressWarnings("unchecked")
private synchronized RecordWriter getNewRecordWriter(
TaskAttemptContext taskContext, String baseFileName)
throws IOException, InterruptedException {
// look for record-writer in the cache
RecordWriter writer = newRecordWriters.get(baseFileName);
// If not in cache, create a new one
if (writer == null) {
// get the record writer from context output format
taskContext.getConfiguration().set(
MRJobConfig.FILEOUTPUTFORMAT_BASE_OUTPUT_NAME, baseFileName);
try {
writer = ((OutputFormat) ReflectionUtils.newInstance(
taskContext.getOutputFormatClass(), taskContext.getConfiguration()))
.getRecordWriter(taskContext);
} catch (ClassNotFoundException e) {
throw new IOException(e);
}
// add the record-writer to the cache
newRecordWriters.put(baseFileName, writer);
}
return writer;
}
private static OutputCommitter initOutputCommitter(
OutputFormat<?, ?> outputFormatObj,
Configuration conf,
TaskAttemptContext taskAttemptContext)
throws IllegalStateException {
OutputCommitter outputCommitter;
try {
outputCommitter = outputFormatObj.getOutputCommitter(taskAttemptContext);
if (outputCommitter != null) {
outputCommitter.setupJob(new JobContextImpl(conf, taskAttemptContext.getJobID()));
}
} catch (Exception e) {
throw new IllegalStateException("Unable to create OutputCommitter object: ", e);
}
return outputCommitter;
}
private void testFailedJob(String fileName,
Class<? extends OutputFormat> output, String[] exclude) throws Exception {
Path outDir = getNewOutputDir();
Job job = MapReduceTestUtil.createFailJob(conf, outDir, inDir);
job.setOutputFormatClass(output);
assertFalse("Job did not fail!", job.waitForCompletion(true));
if (fileName != null) {
Path testFile = new Path(outDir, fileName);
assertTrue("File " + testFile + " missing for failed job " + job.getJobID(),
fs.exists(testFile));
}
// check if the files from the missing set exists
for (String ex : exclude) {
Path file = new Path(outDir, ex);
assertFalse("File " + file + " should not be present for failed job "
+ job.getJobID(), fs.exists(file));
}
}
@Test(enabled = true)
public void testWriteBufferData() throws Exception {
NullWritable nada = NullWritable.get();
MneDurableOutputSession<DurableBuffer<?>> sess =
new MneDurableOutputSession<DurableBuffer<?>>(m_tacontext, null,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
MneDurableOutputValue<DurableBuffer<?>> mdvalue =
new MneDurableOutputValue<DurableBuffer<?>>(sess);
OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat =
new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>();
RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer =
outputFormat.getRecordWriter(m_tacontext);
DurableBuffer<?> dbuf = null;
Checksum cs = new CRC32();
cs.reset();
for (int i = 0; i < m_reccnt; ++i) {
dbuf = genupdDurableBuffer(sess, cs);
Assert.assertNotNull(dbuf);
writer.write(nada, mdvalue.of(dbuf));
}
m_checksum = cs.getValue();
writer.close(m_tacontext);
sess.close();
}
MapRunner() throws IOException, InterruptedException,
ClassNotFoundException {
// initiate the real mapper that does the work
mapper = ReflectionUtils.newInstance(mapClass,
outer.getConfiguration());
@SuppressWarnings("unchecked")
OutputFormat<K2, V2> outputFormat = (OutputFormat<K2, V2>)
ReflectionUtils.newInstance(outer.getOutputFormatClass(),
outer.getConfiguration());
try {
// outputFormat is not initialized. Relying on everything it
// needs can be obtained from the AssignmentManager singleton.
writer = outputFormat.getRecordWriter(outer);
subcontext = (Context)ReflectionUtil.createMapperContext(
mapper, outer.getConfiguration(), outer.getTaskAttemptID(),
new SubMapRecordReader(), writer,
outer.getOutputCommitter(), new SubMapStatusReporter(),
outer.getInputSplit());
} catch (Exception e) {
throw new IOException("Error creating mapper context", e);
}
}
@SuppressWarnings("rawtypes")
@Override
public OutputFormat getOutputFormat() throws IOException {
AvroStorageLog.funcCall("getOutputFormat");
Properties property = getUDFProperties();
String allSchemaStr = property.getProperty(AVRO_OUTPUT_SCHEMA_PROPERTY);
Map<String, String> map = (allSchemaStr != null) ? parseSchemaMap(allSchemaStr) : null;
String key = getSchemaKey();
Schema schema = (map == null || !map.containsKey(key)) ? outputAvroSchema : Schema.parse(map.get(key));
if (schema == null)
throw new IOException("Output schema is null!");
AvroStorageLog.details("Output schema=" + schema);
return new PigAvroOutputFormat(schema);
}
private void testFailedJob(String fileName,
Class<? extends OutputFormat> output, String[] exclude) throws Exception {
Path outDir = getNewOutputDir();
Job job = MapReduceTestUtil.createFailJob(conf, outDir, inDir);
job.setOutputFormatClass(output);
assertFalse("Job did not fail!", job.waitForCompletion(true));
if (fileName != null) {
Path testFile = new Path(outDir, fileName);
assertTrue("File " + testFile + " missing for failed job " + job.getJobID(),
fs.exists(testFile));
}
// check if the files from the missing set exists
for (String ex : exclude) {
Path file = new Path(outDir, ex);
assertFalse("File " + file + " should not be present for failed job "
+ job.getJobID(), fs.exists(file));
}
}
public void setupRunner(String jobName, Class<?> runnerClass,
Class<? extends TableMapper<?, ?>> mapperClass, Class<? extends Reducer<?, ?, ?, ?>> reducerClass,
Class<? extends WritableComparable<?>> outputKeyClass,
Class<? extends Writable> outputValueClass,
Class<? extends OutputFormat<?, ?>> outputFormatClass) {
this.setupRunner(jobName, runnerClass, mapperClass, reducerClass, outputKeyClass, outputValueClass, outputKeyClass, outputValueClass, outputFormatClass);
}
@Test
public void shouldSplitFileAndWriteProperSplits() throws Exception {
for (int numberOfSplits = 1; numberOfSplits < 10; numberOfSplits++) {
final File testFile = new File(HadoopGraphProvider.PATHS.get(getInputFilename()));
logger.info("Testing: {}", testFile + " (splits {}", numberOfSplits + ")");
final List<FileSplit> splits = generateFileSplits(testFile, numberOfSplits);
final Class<? extends InputFormat<NullWritable, VertexWritable>> inputFormatClass = getInputFormat();
final Class<? extends OutputFormat<NullWritable, VertexWritable>> outputFormatClass = getOutputFormat();
final File outputDirectory = TestHelper.makeTestDataPath(inputFormatClass, "hadoop-record-reader-writer-test");
final Configuration config = configure(outputDirectory);
config.addResource(this.configuration);
validateFileSplits(splits, config, inputFormatClass, Optional.of(outputFormatClass));
}
}
private HadoopOutputFormat<String, Long> setupHadoopOutputFormat(
OutputFormat<String, Long> outputFormat,
Job job,
RecordWriter<String, Long> recordWriter,
OutputCommitter outputCommitter,
Configuration configuration) {
HadoopOutputFormat<String, Long> hadoopOutputFormat = new HadoopOutputFormat<>(outputFormat, job);
hadoopOutputFormat.recordWriter = recordWriter;
hadoopOutputFormat.outputCommitter = outputCommitter;
hadoopOutputFormat.configuration = configuration;
hadoopOutputFormat.configuration.set(MAPRED_OUTPUT_DIR_KEY, MAPRED_OUTPUT_PATH);
return hadoopOutputFormat;
}
private static Configuration loadTestConfiguration(
Class<?> outputFormatClassName, Class<?> keyClass, Class<?> valueClass) {
Configuration conf = new Configuration();
conf.setClass(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatClassName, OutputFormat.class);
conf.setClass(MRJobConfig.OUTPUT_KEY_CLASS, keyClass, Object.class);
conf.setClass(MRJobConfig.OUTPUT_VALUE_CLASS, valueClass, Object.class);
conf.setInt(MRJobConfig.NUM_REDUCES, REDUCERS_COUNT);
conf.set(MRJobConfig.ID, String.valueOf(1));
return conf;
}
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
List<OutputFormat> formats = getNewApiFormats(CompatHandler.taskAttemptContext(context).getConfiguration());
List<OutputCommitter> committers = new ArrayList<OutputCommitter>();
for (OutputFormat format : formats) {
committers.add(format.getOutputCommitter(context));
}
return new MultiNewOutputCommitter(committers);
}
@Override
protected Class<? extends OutputFormat> getOutputFormatClass()
throws ClassNotFoundException {
if (isHCatJob) {
return SqoopHCatUtilities.getOutputFormatClass();
} else {
return RawKeyTextOutputFormat.class;
}
}
/**
* Setup task.
*
* @param outputFormat Output format.
* @throws IOException In case of IO exception.
* @throws InterruptedException In case of interrupt.
*/
protected void setup(@Nullable OutputFormat outputFormat) throws IOException, InterruptedException {
if (hadoopCtx.writer() != null) {
assert outputFormat != null;
outputFormat.getOutputCommitter(hadoopCtx).setupTask(hadoopCtx);
}
}
public JobBase(final SqoopOptions opts,
final Class<? extends Mapper> mapperClass,
final Class<? extends InputFormat> inputFormatClass,
final Class<? extends OutputFormat> outputFormatClass) {
this.options = opts;
this.mapperClass = mapperClass;
this.inputFormatClass = inputFormatClass;
this.outputFormatClass = outputFormatClass;
isHCatJob = options.getHCatTableName() != null;
}
@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
List<org.apache.hadoop.mapred.OutputFormat> formats = getOldApiFormats(job);
for (org.apache.hadoop.mapred.OutputFormat format : formats) {
format.checkOutputSpecs(ignored, job);
}
}
public ImportJobBase(final SqoopOptions opts,
final Class<? extends Mapper> mapperClass,
final Class<? extends InputFormat> inputFormatClass,
final Class<? extends OutputFormat> outputFormatClass,
final ImportJobContext context) {
super(opts, mapperClass, inputFormatClass, outputFormatClass, context);
}
/**
* Allow the user to inject custom mapper, input, and output formats
* into the importTable() process.
*/
@Override
@SuppressWarnings("unchecked")
public void importTable(ImportJobContext context)
throws IOException, ImportException {
SqoopOptions options = context.getOptions();
Configuration conf = options.getConf();
Class<? extends Mapper> mapperClass = (Class<? extends Mapper>)
conf.getClass(MAPPER_KEY, Mapper.class);
Class<? extends InputFormat> ifClass = (Class<? extends InputFormat>)
conf.getClass(INPUT_FORMAT_KEY, TextInputFormat.class);
Class<? extends OutputFormat> ofClass = (Class<? extends OutputFormat>)
conf.getClass(OUTPUT_FORMAT_KEY, TextOutputFormat.class);
Class<? extends ImportJobBase> jobClass = (Class<? extends ImportJobBase>)
conf.getClass(IMPORT_JOB_KEY, ImportJobBase.class);
String tableName = context.getTableName();
// Instantiate the user's chosen ImportJobBase instance.
ImportJobBase importJob = ReflectionUtils.newInstance(jobClass, conf);
// And configure the dependencies to inject
importJob.setOptions(options);
importJob.setMapperClass(mapperClass);
importJob.setInputFormatClass(ifClass);
importJob.setOutputFormatClass(ofClass);
importJob.runImport(tableName, context.getJarFile(),
getSplitColumn(options, tableName), conf);
}
private void writeBadOutput(TaskAttempt attempt, Configuration conf)
throws Exception {
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf,
TypeConverter.fromYarn(attempt.getID()));
TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat
.getRecordWriter(tContext);
NullWritable nullWritable = NullWritable.get();
try {
theRecordWriter.write(key2, val2);
theRecordWriter.write(null, nullWritable);
theRecordWriter.write(null, val2);
theRecordWriter.write(nullWritable, val1);
theRecordWriter.write(key1, nullWritable);
theRecordWriter.write(key2, null);
theRecordWriter.write(null, null);
theRecordWriter.write(key1, val1);
} finally {
theRecordWriter.close(tContext);
}
OutputFormat outputFormat = ReflectionUtils.newInstance(
tContext.getOutputFormatClass(), conf);
OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
committer.commitTask(tContext);
}
/**
* Get the {@link OutputFormat} class for the job.
*
* @return the {@link OutputFormat} class for the job.
*/
@SuppressWarnings("unchecked")
public Class<? extends OutputFormat<?,?>> getOutputFormatClass()
throws ClassNotFoundException {
return (Class<? extends OutputFormat<?,?>>)
conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class);
}
private void testKilledJob(String fileName,
Class<? extends OutputFormat> output, String[] exclude) throws Exception {
Path outDir = getNewOutputDir();
Job job = MapReduceTestUtil.createKillJob(conf, outDir, inDir);
job.setOutputFormatClass(output);
job.submit();
// wait for the setup to be completed
while (job.setupProgress() != 1.0f) {
UtilsForTests.waitFor(100);
}
job.killJob(); // kill the job
assertFalse("Job did not get kill", job.waitForCompletion(true));
if (fileName != null) {
Path testFile = new Path(outDir, fileName);
assertTrue("File " + testFile + " missing for job " + job.getJobID(), fs
.exists(testFile));
}
// check if the files from the missing set exists
for (String ex : exclude) {
Path file = new Path(outDir, ex);
assertFalse("File " + file + " should not be present for killed job "
+ job.getJobID(), fs.exists(file));
}
}
private void writeBadOutput(TaskAttempt attempt, Configuration conf)
throws Exception {
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf,
TypeConverter.fromYarn(attempt.getID()));
TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat
.getRecordWriter(tContext);
NullWritable nullWritable = NullWritable.get();
try {
theRecordWriter.write(key2, val2);
theRecordWriter.write(null, nullWritable);
theRecordWriter.write(null, val2);
theRecordWriter.write(nullWritable, val1);
theRecordWriter.write(key1, nullWritable);
theRecordWriter.write(key2, null);
theRecordWriter.write(null, null);
theRecordWriter.write(key1, val1);
} finally {
theRecordWriter.close(tContext);
}
OutputFormat outputFormat = ReflectionUtils.newInstance(
tContext.getOutputFormatClass(), conf);
OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
committer.commitTask(tContext);
}
private void writeOutput(TaskAttempt attempt, Configuration conf)
throws Exception {
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf,
TypeConverter.fromYarn(attempt.getID()));
TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat
.getRecordWriter(tContext);
NullWritable nullWritable = NullWritable.get();
try {
theRecordWriter.write(key1, val1);
theRecordWriter.write(null, nullWritable);
theRecordWriter.write(null, val1);
theRecordWriter.write(nullWritable, val2);
theRecordWriter.write(key2, nullWritable);
theRecordWriter.write(key1, null);
theRecordWriter.write(null, null);
theRecordWriter.write(key2, val2);
} finally {
theRecordWriter.close(tContext);
}
OutputFormat outputFormat = ReflectionUtils.newInstance(
tContext.getOutputFormatClass(), conf);
OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
committer.commitTask(tContext);
}
/**
* Commit task.
*
* @param outputFormat Output format.
* @throws IgniteCheckedException In case of Grid exception.
* @throws IOException In case of IO exception.
* @throws InterruptedException In case of interrupt.
*/
protected void commit(@Nullable OutputFormat outputFormat) throws IgniteCheckedException, IOException, InterruptedException {
if (hadoopCtx.writer() != null) {
assert outputFormat != null;
OutputCommitter outputCommitter = outputFormat.getOutputCommitter(hadoopCtx);
if (outputCommitter.needsTaskCommit(hadoopCtx))
outputCommitter.commitTask(hadoopCtx);
}
}