下面列出了怎么用org.apache.hadoop.mapred.OutputFormat的API类实例代码及写法,或者点击链接到github查看源代码。
@Test
public void testOpen() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.open(1, 1);
verify(jobConf, times(2)).getOutputCommitter();
verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
@Test
public void testCloseWithTaskCommit() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
when(outputCommitter.needsTaskCommit(nullable(TaskAttemptContext.class))).thenReturn(true);
DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
JobConf jobConf = mock(JobConf.class);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.recordWriter = recordWriter;
outputFormat.outputCommitter = outputCommitter;
outputFormat.close();
verify(recordWriter, times(1)).close(nullable(Reporter.class));
verify(outputCommitter, times(1)).commitTask(nullable(TaskAttemptContext.class));
}
@Test
public void testCloseWithoutTaskCommit() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
when(outputCommitter.needsTaskCommit(any(TaskAttemptContext.class))).thenReturn(false);
DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
JobConf jobConf = mock(JobConf.class);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.recordWriter = recordWriter;
outputFormat.outputCommitter = outputCommitter;
outputFormat.close();
verify(recordWriter, times(1)).close(any(Reporter.class));
verify(outputCommitter, times(0)).commitTask(any(TaskAttemptContext.class));
}
@Test
public void testOpen() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.open(1, 1);
verify(jobConf, times(2)).getOutputCommitter();
verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
@Test
public void testCloseWithTaskCommit() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
when(outputCommitter.needsTaskCommit(nullable(TaskAttemptContext.class))).thenReturn(true);
DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
JobConf jobConf = mock(JobConf.class);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.recordWriter = recordWriter;
outputFormat.outputCommitter = outputCommitter;
outputFormat.close();
verify(recordWriter, times(1)).close(nullable(Reporter.class));
verify(outputCommitter, times(1)).commitTask(nullable(TaskAttemptContext.class));
}
@Test
public void testCloseWithoutTaskCommit() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
when(outputCommitter.needsTaskCommit(any(TaskAttemptContext.class))).thenReturn(false);
DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
JobConf jobConf = mock(JobConf.class);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.recordWriter = recordWriter;
outputFormat.outputCommitter = outputCommitter;
outputFormat.close();
verify(recordWriter, times(1)).close(any(Reporter.class));
verify(outputCommitter, times(0)).commitTask(any(TaskAttemptContext.class));
}
@Test(enabled = true)
public void testWriteChunkData() throws Exception {
NullWritable nada = NullWritable.get();
MneDurableOutputSession<DurableChunk<?>> sess =
new MneDurableOutputSession<DurableChunk<?>>(m_tacontext, null,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
MneDurableOutputValue<DurableChunk<?>> mdvalue =
new MneDurableOutputValue<DurableChunk<?>>(sess);
OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat =
new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>();
RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer =
outputFormat.getRecordWriter(null, m_conf, null, null);
DurableChunk<?> dchunk = null;
Checksum cs = new CRC32();
cs.reset();
for (int i = 0; i < m_reccnt; ++i) {
dchunk = genupdDurableChunk(sess, cs);
Assert.assertNotNull(dchunk);
writer.write(nada, mdvalue.of(dchunk));
}
m_checksum = cs.getValue();
writer.close(null);
sess.close();
}
@Test(enabled = true)
public void testWriteLongData() throws Exception {
NullWritable nada = NullWritable.get();
MneDurableOutputSession<Long> sess =
new MneDurableOutputSession<Long>(m_tacontext, null,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
MneDurableOutputValue<Long> mdvalue =
new MneDurableOutputValue<Long>(sess);
OutputFormat<NullWritable, MneDurableOutputValue<Long>> outputFormat =
new MneOutputFormat<MneDurableOutputValue<Long>>();
RecordWriter<NullWritable, MneDurableOutputValue<Long>> writer =
outputFormat.getRecordWriter(null, m_conf, null, null);
Long val = null;
for (int i = 0; i < m_reccnt; ++i) {
val = m_rand.nextLong();
m_sum += val;
writer.write(nada, mdvalue.of(val));
}
writer.close(null);
sess.close();
}
@Test(enabled = true)
public void testWritePersonData() throws Exception {
NullWritable nada = NullWritable.get();
MneDurableOutputSession<Person<Long>> sess =
new MneDurableOutputSession<Person<Long>>(m_tacontext, null,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
MneDurableOutputValue<Person<Long>> mdvalue =
new MneDurableOutputValue<Person<Long>>(sess);
OutputFormat<NullWritable, MneDurableOutputValue<Person<Long>>> outputFormat =
new MneOutputFormat<MneDurableOutputValue<Person<Long>>>();
RecordWriter<NullWritable, MneDurableOutputValue<Person<Long>>> writer =
outputFormat.getRecordWriter(null, m_conf, null, null);
Person<Long> person = null;
for (int i = 0; i < m_reccnt; ++i) {
person = sess.newDurableObjectRecord();
person.setAge((short) m_rand.nextInt(50));
person.setName(String.format("Name: [%s]", Utils.genRandomString()), true);
m_sumage += person.getAge();
writer.write(nada, mdvalue.of(person));
}
writer.close(null);
sess.close();
}
@Test(enabled = true)
public void testWriteBufferData() throws Exception {
NullWritable nada = NullWritable.get();
MneDurableOutputSession<DurableBuffer<?>> sess =
new MneDurableOutputSession<DurableBuffer<?>>(null, m_conf,
MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
MneDurableOutputValue<DurableBuffer<?>> mdvalue =
new MneDurableOutputValue<DurableBuffer<?>>(sess);
OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat =
new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>();
RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer =
outputFormat.getRecordWriter(m_fs, m_conf, null, null);
DurableBuffer<?> dbuf = null;
Checksum cs = new CRC32();
cs.reset();
for (int i = 0; i < m_reccnt; ++i) {
dbuf = genupdDurableBuffer(sess, cs);
Assert.assertNotNull(dbuf);
writer.write(nada, mdvalue.of(dbuf));
}
m_checksum = cs.getValue();
writer.close(null);
sess.close();
}
/**
* @param jobConf Job configuration.
* @param taskCtx Task context.
* @param directWrite Direct write flag.
* @param fileName File name.
* @throws IOException In case of IO exception.
*/
HadoopV1OutputCollector(JobConf jobConf, HadoopTaskContext taskCtx, boolean directWrite,
@Nullable String fileName, TaskAttemptID attempt) throws IOException {
this.jobConf = jobConf;
this.taskCtx = taskCtx;
this.attempt = attempt;
if (directWrite) {
jobConf.set("mapreduce.task.attempt.id", attempt.toString());
OutputFormat outFormat = jobConf.getOutputFormat();
writer = outFormat.getRecordWriter(null, jobConf, fileName, Reporter.NULL);
}
else
writer = null;
}
@Test
public void testOpen() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.open(1, 1);
verify(jobConf, times(2)).getOutputCommitter();
verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
@Test
public void testCloseWithTaskCommit() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
when(outputCommitter.needsTaskCommit(nullable(TaskAttemptContext.class))).thenReturn(true);
DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
JobConf jobConf = mock(JobConf.class);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.recordWriter = recordWriter;
outputFormat.outputCommitter = outputCommitter;
outputFormat.close();
verify(recordWriter, times(1)).close(nullable(Reporter.class));
verify(outputCommitter, times(1)).commitTask(nullable(TaskAttemptContext.class));
}
@Test
public void testCloseWithoutTaskCommit() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
when(outputCommitter.needsTaskCommit(any(TaskAttemptContext.class))).thenReturn(false);
DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
JobConf jobConf = mock(JobConf.class);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.recordWriter = recordWriter;
outputFormat.outputCommitter = outputCommitter;
outputFormat.close();
verify(recordWriter, times(1)).close(any(Reporter.class));
verify(outputCommitter, times(0)).commitTask(any(TaskAttemptContext.class));
}
@Test
public void testWriteRecord() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
JobConf jobConf = mock(JobConf.class);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.recordWriter = recordWriter;
outputFormat.writeRecord(new Tuple2<>("key", 1L));
verify(recordWriter, times(1)).write(anyString(), anyLong());
}
@Test
public void testFinalizeGlobal() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.finalizeGlobal(1);
verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
@Test
public void testWriteRecord() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
JobConf jobConf = mock(JobConf.class);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.recordWriter = recordWriter;
outputFormat.writeRecord(new Tuple2<>("key", 1L));
verify(recordWriter, times(1)).write(anyString(), anyLong());
}
@Test
public void testFinalizeGlobal() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.finalizeGlobal(1);
verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
HivePartitionWriter(JobConf jobConf, OutputFormat outputFormat, FileSinkOperator.RecordWriter recordWriter,
OutputCommitter outputCommitter) {
this.jobConf = jobConf;
this.outputFormat = outputFormat;
this.recordWriter = recordWriter;
this.outputCommitter = outputCommitter;
}
public OutputInfo(Class<? extends OutputFormat> formatCls,
Class<? extends Writable> keyCls, Class<? extends Writable> valueCls)
{
outputFormatClass=formatCls;
outputKeyClass=keyCls;
outputValueClass=valueCls;
}
/**
* Set the underlying output format for LazyOutputFormat.
* @param job the {@link JobConf} to modify
* @param theClass the underlying class
*/
@SuppressWarnings("unchecked")
public static void setOutputFormatClass(JobConf job,
Class<? extends OutputFormat> theClass) {
job.setOutputFormat(LazyOutputFormat.class);
job.setClass("mapreduce.output.lazyoutputformat.outputformat", theClass, OutputFormat.class);
}
@SuppressWarnings("unchecked")
private void getBaseOutputFormat(JobConf job) throws IOException {
baseOut = ReflectionUtils.newInstance(
job.getClass("mapreduce.output.lazyoutputformat.outputformat", null, OutputFormat.class),
job);
if (baseOut == null) {
throw new IOException("Ouput format not set for LazyOutputFormat");
}
}
public LazyRecordWriter(JobConf job, OutputFormat of, String name,
Progressable progress) throws IOException {
this.of = of;
this.job = job;
this.name = name;
this.progress = progress;
}
/**
* Set the underlying output format for LazyOutputFormat.
* @param job the {@link JobConf} to modify
* @param theClass the underlying class
*/
@SuppressWarnings("unchecked")
public static void setOutputFormatClass(JobConf job,
Class<? extends OutputFormat> theClass) {
job.setOutputFormat(LazyOutputFormat.class);
job.setClass("mapreduce.output.lazyoutputformat.outputformat", theClass, OutputFormat.class);
}
@SuppressWarnings("unchecked")
private void getBaseOutputFormat(JobConf job) throws IOException {
baseOut = ReflectionUtils.newInstance(
job.getClass("mapreduce.output.lazyoutputformat.outputformat", null, OutputFormat.class),
job);
if (baseOut == null) {
throw new IOException("Ouput format not set for LazyOutputFormat");
}
}
public LazyRecordWriter(JobConf job, OutputFormat of, String name,
Progressable progress) throws IOException {
this.of = of;
this.job = job;
this.name = name;
this.progress = progress;
}
public InputOutputInfo(Class<? extends InputFormat> formatClsIn, Class<? extends OutputFormat> formatClsOut,
Class<? extends Writable> keyCls, Class<? extends Writable> valueCls)
{
inputFormatClass = formatClsIn;
outputFormatClass = formatClsOut;
keyClass = keyCls;
valueClass = valueCls;
}
@Test
public void testWriteRecord() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyRecordWriter recordWriter = mock(DummyRecordWriter.class);
JobConf jobConf = mock(JobConf.class);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.recordWriter = recordWriter;
outputFormat.writeRecord(new Tuple2<>("key", 1L));
verify(recordWriter, times(1)).write(anyString(), anyLong());
}
@Test
public void testFinalizeGlobal() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.finalizeGlobal(1);
verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Class outputFormatClz,
Class<? extends Writable> outValClz, boolean isCompressed, Properties tableProps, Path outPath) {
try {
Class utilClass = HiveFileFormatUtils.class;
OutputFormat outputFormat = (OutputFormat) outputFormatClz.newInstance();
Method utilMethod = utilClass.getDeclaredMethod("getRecordWriter", JobConf.class, OutputFormat.class,
Class.class, boolean.class, Properties.class, Path.class, Reporter.class);
return (FileSinkOperator.RecordWriter) utilMethod.invoke(null,
jobConf, outputFormat, outValClz, isCompressed, tableProps, outPath, Reporter.NULL);
} catch (Exception e) {
throw new CatalogException("Failed to create Hive RecordWriter", e);
}
}