下面列出了org.apache.hadoop.mapreduce.task.ReduceContextImpl#org.apache.hadoop.mapred.JobContext 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Test
public void testOpen() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.open(1, 1);
verify(jobConf, times(2)).getOutputCommitter();
verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
@Test
public void testOpen() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.open(1, 1);
verify(jobConf, times(2)).getOutputCommitter();
verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
TextWriterProxy(Configuration config, String fileName) throws IOException{
fieldDelimiter = config.getChar(Key.FIELD_DELIMITER);
columns = config.getListConfiguration(Key.COLUMN);
String compress = config.getString(Key.COMPRESS,null);
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmm");
String attempt = "attempt_"+dateFormat.format(new Date())+"_0001_m_000000_0";
Path outputPath = new Path(fileName);
//todo 需要进一步确定TASK_ATTEMPT_ID
conf.set(JobContext.TASK_ATTEMPT_ID, attempt);
FileOutputFormat outFormat = new TextOutputFormat();
outFormat.setOutputPath(conf, outputPath);
outFormat.setWorkOutputPath(conf, outputPath);
if(null != compress) {
Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
if (null != codecClass) {
outFormat.setOutputCompressorClass(conf, codecClass);
}
}
writer = outFormat.getRecordWriter(fileSystem, conf, outputPath.toString(), Reporter.NULL);
}
private void setHadoopJobConfigs(Job job, int numInputPaths) {
job.getConfiguration().set(JobContext.JOB_NAME, this.getClass().getName());
// Turn this on to always firstly use class paths that user specifies.
job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, "true");
// Turn this off since we don't need an empty file in the output directory
job.getConfiguration().set(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "false");
job.setJarByClass(HadoopSegmentPreprocessingJob.class);
String hadoopTokenFileLocation = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
if (hadoopTokenFileLocation != null) {
job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocation);
}
// Mapper configs.
job.setMapperClass(SegmentPreprocessingMapper.class);
job.setMapOutputKeyClass(AvroKey.class);
job.setMapOutputValueClass(AvroValue.class);
job.getConfiguration().setInt(JobContext.NUM_MAPS, numInputPaths);
// Reducer configs.
job.setReducerClass(SegmentPreprocessingReducer.class);
job.setOutputKeyClass(AvroKey.class);
job.setOutputValueClass(NullWritable.class);
}
@Test
public void testAbsentNotificationOnNotLastRetryUnregistrationFailure()
throws Exception {
HttpServer2 server = startHttpServer();
MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
this.getClass().getName(), true, 1, false));
doNothing().when(app).sysexit();
JobConf conf = new JobConf();
conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
JobImpl job = (JobImpl)app.submit(conf);
app.waitForState(job, JobState.RUNNING);
app.getContext().getEventHandler()
.handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
app.waitForInternalState(job, JobStateInternal.REBOOT);
// Now shutdown.
// Unregistration fails: isLastAMRetry is recalculated, this is not
app.shutDownJob();
// Not the last AM attempt. So user should that the job is still running.
app.waitForState(job, JobState.RUNNING);
Assert.assertFalse(app.isLastAMRetry());
Assert.assertEquals(0, JobEndServlet.calledTimes);
Assert.assertNull(JobEndServlet.requestUri);
Assert.assertNull(JobEndServlet.foundJobState);
server.stop();
}
@Test
public void testNotificationOnLastRetryNormalShutdown() throws Exception {
HttpServer2 server = startHttpServer();
// Act like it is the second attempt. Default max attempts is 2
MRApp app = spy(new MRAppWithCustomContainerAllocator(
2, 2, true, this.getClass().getName(), true, 2, true));
doNothing().when(app).sysexit();
JobConf conf = new JobConf();
conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
JobImpl job = (JobImpl)app.submit(conf);
app.waitForInternalState(job, JobStateInternal.SUCCEEDED);
// Unregistration succeeds: successfullyUnregistered is set
app.shutDownJob();
Assert.assertTrue(app.isLastAMRetry());
Assert.assertEquals(1, JobEndServlet.calledTimes);
Assert.assertEquals("jobid=" + job.getID() + "&status=SUCCEEDED",
JobEndServlet.requestUri.getQuery());
Assert.assertEquals(JobState.SUCCEEDED.toString(),
JobEndServlet.foundJobState);
server.stop();
}
@Test
public void testAbsentNotificationOnNotLastRetryUnregistrationFailure()
throws Exception {
HttpServer2 server = startHttpServer();
MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
this.getClass().getName(), true, 1, false));
doNothing().when(app).sysexit();
JobConf conf = new JobConf();
conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
JobImpl job = (JobImpl)app.submit(conf);
app.waitForState(job, JobState.RUNNING);
app.getContext().getEventHandler()
.handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
app.waitForInternalState(job, JobStateInternal.REBOOT);
// Now shutdown.
// Unregistration fails: isLastAMRetry is recalculated, this is not
app.shutDownJob();
// Not the last AM attempt. So user should that the job is still running.
app.waitForState(job, JobState.RUNNING);
Assert.assertFalse(app.isLastAMRetry());
Assert.assertEquals(0, JobEndServlet.calledTimes);
Assert.assertNull(JobEndServlet.requestUri);
Assert.assertNull(JobEndServlet.foundJobState);
server.stop();
}
/** {@inheritDoc} */
@Override public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
HadoopV2TaskContext ctx = (HadoopV2TaskContext)taskCtx;
JobContext jobCtx = ctx.jobContext();
try {
OutputCommitter committer = jobCtx.getJobConf().getOutputCommitter();
if (abort)
committer.abortJob(jobCtx, JobStatus.State.FAILED);
else
committer.commitJob(jobCtx);
}
catch (IOException e) {
throw new IgniteCheckedException(e);
}
}
@Test
public void testOpen() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.open(1, 1);
verify(jobConf, times(2)).getOutputCommitter();
verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
/**
* create the temporary output file for hadoop RecordWriter.
* @param taskNumber The number of the parallel instance.
* @param numTasks The number of parallel tasks.
* @throws java.io.IOException
*/
@Override
public void open(int taskNumber, int numTasks) throws IOException {
// enforce sequential open() calls
synchronized (OPEN_MUTEX) {
if (Integer.toString(taskNumber + 1).length() > 6) {
throw new IOException("Task id too large.");
}
TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
+ Integer.toString(taskNumber + 1)
+ "_0");
this.jobConf.set("mapred.task.id", taskAttemptID.toString());
this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
// for hadoop 2.2
this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);
this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);
this.outputCommitter = this.jobConf.getOutputCommitter();
JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
this.outputCommitter.setupJob(jobContext);
this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}
}
@Override
public void finalizeGlobal(int parallelism) throws IOException {
try {
JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
// finalize HDFS output format
outputCommitter.commitJob(jobContext);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Test
public void testFinalizeGlobal() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.finalizeGlobal(1);
verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
@Override
public void finalizeGlobal(int parallelism) throws IOException {
try {
JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
// finalize HDFS output format
outputCommitter.commitJob(jobContext);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Test
public void testFinalizeGlobal() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.finalizeGlobal(1);
verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
private void commitJob(String location) throws IOException {
jobConf.set(OUTDIR, location);
JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
// finalize HDFS output format
outputCommitter.commitJob(jobContext);
}
@Test
public void testNotificationOnLastRetryUnregistrationFailure()
throws Exception {
HttpServer2 server = startHttpServer();
MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
this.getClass().getName(), true, 2, false));
// Currently, we will have isLastRetry always equals to false at beginning
// of MRAppMaster, except staging area exists or commit already started at
// the beginning.
// Now manually set isLastRetry to true and this should reset to false when
// unregister failed.
app.isLastAMRetry = true;
doNothing().when(app).sysexit();
JobConf conf = new JobConf();
conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
JobImpl job = (JobImpl)app.submit(conf);
app.waitForState(job, JobState.RUNNING);
app.getContext().getEventHandler()
.handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
app.waitForInternalState(job, JobStateInternal.REBOOT);
// Now shutdown. User should see FAILED state.
// Unregistration fails: isLastAMRetry is recalculated, this is
///reboot will stop service internally, we don't need to shutdown twice
app.waitForServiceToStop(10000);
Assert.assertFalse(app.isLastAMRetry());
// Since it's not last retry, JobEndServlet didn't called
Assert.assertEquals(0, JobEndServlet.calledTimes);
Assert.assertNull(JobEndServlet.requestUri);
Assert.assertNull(JobEndServlet.foundJobState);
server.stop();
}
@SuppressWarnings({"rawtypes", "unchecked"})
@Test (timeout=10000)
public void testLoadMapper() throws Exception {
Configuration conf = new Configuration();
conf.setInt(JobContext.NUM_REDUCES, 2);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
TaskAttemptID taskId = new TaskAttemptID();
RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();
LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();
OutputCommitter committer = new CustomOutputCommitter();
StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
LoadSplit split = getLoadSplit();
MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(
conf, taskId, reader, writer, committer, reporter, split);
// context
Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>()
.getMapContext(mapContext);
reader.initialize(split, ctx);
ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
CompressionEmulationUtil.setCompressionEmulationEnabled(
ctx.getConfiguration(), true);
LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
// setup, map, clean
mapper.run(ctx);
Map<GridmixKey, GridmixRecord> data = writer.getData();
// check result
assertEquals(2, data.size());
}
@SuppressWarnings({"unchecked", "rawtypes"})
@Test (timeout=30000)
public void testSleepMapper() throws Exception {
SleepJob.SleepMapper test = new SleepJob.SleepMapper();
Configuration conf = new Configuration();
conf.setInt(JobContext.NUM_REDUCES, 2);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
TaskAttemptID taskId = new TaskAttemptID();
FakeRecordLLReader reader = new FakeRecordLLReader();
LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
OutputCommitter committer = new CustomOutputCommitter();
StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
SleepSplit split = getSleepSplit();
MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(
conf, taskId, reader, writer, committer, reporter, split);
Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>()
.getMapContext(mapcontext);
long start = System.currentTimeMillis();
LOG.info("start:" + start);
LongWritable key = new LongWritable(start + 2000);
LongWritable value = new LongWritable(start + 2000);
// should slip 2 sec
test.map(key, value, context);
LOG.info("finish:" + System.currentTimeMillis());
assertTrue(System.currentTimeMillis() >= (start + 2000));
test.cleanup(context);
assertEquals(1, writer.getData().size());
}
@Test
public void testNotificationOnLastRetryUnregistrationFailure()
throws Exception {
HttpServer2 server = startHttpServer();
MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
this.getClass().getName(), true, 2, false));
// Currently, we will have isLastRetry always equals to false at beginning
// of MRAppMaster, except staging area exists or commit already started at
// the beginning.
// Now manually set isLastRetry to true and this should reset to false when
// unregister failed.
app.isLastAMRetry = true;
doNothing().when(app).sysexit();
JobConf conf = new JobConf();
conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
JobImpl job = (JobImpl)app.submit(conf);
app.waitForState(job, JobState.RUNNING);
app.getContext().getEventHandler()
.handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
app.waitForInternalState(job, JobStateInternal.REBOOT);
// Now shutdown. User should see FAILED state.
// Unregistration fails: isLastAMRetry is recalculated, this is
///reboot will stop service internally, we don't need to shutdown twice
app.waitForServiceToStop(10000);
Assert.assertFalse(app.isLastAMRetry());
// Since it's not last retry, JobEndServlet didn't called
Assert.assertEquals(0, JobEndServlet.calledTimes);
Assert.assertNull(JobEndServlet.requestUri);
Assert.assertNull(JobEndServlet.foundJobState);
server.stop();
}
@SuppressWarnings({"rawtypes", "unchecked"})
@Test (timeout=10000)
public void testLoadMapper() throws Exception {
Configuration conf = new Configuration();
conf.setInt(JobContext.NUM_REDUCES, 2);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
TaskAttemptID taskId = new TaskAttemptID();
RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();
LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();
OutputCommitter committer = new CustomOutputCommitter();
StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
LoadSplit split = getLoadSplit();
MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(
conf, taskId, reader, writer, committer, reporter, split);
// context
Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>()
.getMapContext(mapContext);
reader.initialize(split, ctx);
ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
CompressionEmulationUtil.setCompressionEmulationEnabled(
ctx.getConfiguration(), true);
LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
// setup, map, clean
mapper.run(ctx);
Map<GridmixKey, GridmixRecord> data = writer.getData();
// check result
assertEquals(2, data.size());
}
@SuppressWarnings({"unchecked", "rawtypes"})
@Test (timeout=30000)
public void testSleepMapper() throws Exception {
SleepJob.SleepMapper test = new SleepJob.SleepMapper();
Configuration conf = new Configuration();
conf.setInt(JobContext.NUM_REDUCES, 2);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
TaskAttemptID taskId = new TaskAttemptID();
FakeRecordLLReader reader = new FakeRecordLLReader();
LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
OutputCommitter committer = new CustomOutputCommitter();
StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
SleepSplit split = getSleepSplit();
MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(
conf, taskId, reader, writer, committer, reporter, split);
Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>()
.getMapContext(mapcontext);
long start = System.currentTimeMillis();
LOG.info("start:" + start);
LongWritable key = new LongWritable(start + 2000);
LongWritable value = new LongWritable(start + 2000);
// should slip 2 sec
test.map(key, value, context);
LOG.info("finish:" + System.currentTimeMillis());
assertTrue(System.currentTimeMillis() >= (start + 2000));
test.cleanup(context);
assertEquals(1, writer.getData().size());
}
/**
* create the temporary output file for hadoop RecordWriter.
* @param taskNumber The number of the parallel instance.
* @param numTasks The number of parallel tasks.
* @throws java.io.IOException
*/
@Override
public void open(int taskNumber, int numTasks) throws IOException {
// enforce sequential open() calls
synchronized (OPEN_MUTEX) {
if (Integer.toString(taskNumber + 1).length() > 6) {
throw new IOException("Task id too large.");
}
TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
+ Integer.toString(taskNumber + 1)
+ "_0");
this.jobConf.set("mapred.task.id", taskAttemptID.toString());
this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
// for hadoop 2.2
this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);
this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);
this.outputCommitter = this.jobConf.getOutputCommitter();
JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
this.outputCommitter.setupJob(jobContext);
this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}
}
@Override
public void finalizeGlobal(int parallelism) throws IOException {
try {
JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
// finalize HDFS output format
outputCommitter.commitJob(jobContext);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Test
public void testFinalizeGlobal() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.finalizeGlobal(1);
verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}
@Override
public void setupJob(JobContext jobContext) throws IOException {
writeFile(jobContext.getJobConf(), JOB_SETUP_FILE_NAME);
}
@Override
public void commitJob(JobContext jobContext) throws IOException {
super.commitJob(jobContext);
writeFile(jobContext.getJobConf(), JOB_COMMIT_FILE_NAME);
}
@Override
public void abortJob(JobContext jobContext, int status)
throws IOException {
super.abortJob(jobContext, status);
writeFile(jobContext.getJobConf(), JOB_ABORT_FILE_NAME);
}
/**
* Parse the URL that needs to be notified of the end of the job, along
* with the number of retries in case of failure, the amount of time to
* wait between retries and proxy settings
* @param conf the configuration
*/
public void setConf(Configuration conf) {
this.conf = conf;
numTries = Math.min(
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, 0) + 1
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1)
);
waitInterval = Math.min(
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5000)
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5000)
);
waitInterval = (waitInterval < 0) ? 5000 : waitInterval;
timeout = conf.getInt(JobContext.MR_JOB_END_NOTIFICATION_TIMEOUT,
JobContext.DEFAULT_MR_JOB_END_NOTIFICATION_TIMEOUT);
userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL);
proxyConf = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_PROXY);
//Configure the proxy to use if its set. It should be set like
//[email protected]:port
if(proxyConf != null && !proxyConf.equals("") &&
proxyConf.lastIndexOf(":") != -1) {
int typeIndex = proxyConf.indexOf("@");
Proxy.Type proxyType = Proxy.Type.HTTP;
if(typeIndex != -1 &&
proxyConf.substring(0, typeIndex).compareToIgnoreCase("socks") == 0) {
proxyType = Proxy.Type.SOCKS;
}
String hostname = proxyConf.substring(typeIndex + 1,
proxyConf.lastIndexOf(":"));
String portConf = proxyConf.substring(proxyConf.lastIndexOf(":") + 1);
try {
int port = Integer.parseInt(portConf);
proxyToUse = new Proxy(proxyType,
new InetSocketAddress(hostname, port));
Log.info("Job end notification using proxy type \"" + proxyType +
"\" hostname \"" + hostname + "\" and port \"" + port + "\"");
} catch(NumberFormatException nfe) {
Log.warn("Job end notification couldn't parse configured proxy's port "
+ portConf + ". Not going to use a proxy");
}
}
}
public TestKeyFieldBasedComparator() throws IOException {
super(HadoopTestCase.LOCAL_MR, HadoopTestCase.LOCAL_FS, 1, 1);
conf = createJobConf();
localConf = createJobConf();
localConf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
}
public void configure(String keySpec, int expect) throws Exception {
Path testdir = new Path(TEST_DIR.getAbsolutePath());
Path inDir = new Path(testdir, "in");
Path outDir = new Path(testdir, "out");
FileSystem fs = getFileSystem();
fs.delete(testdir, true);
conf.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);
conf.setNumMapTasks(1);
conf.setNumReduceTasks(1);
conf.setOutputFormat(TextOutputFormat.class);
conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
conf.setKeyFieldComparatorOptions(keySpec);
conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
conf.setMapperClass(InverseMapper.class);
conf.setReducerClass(IdentityReducer.class);
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// set up input data in 2 files
Path inFile = new Path(inDir, "part0");
FileOutputStream fos = new FileOutputStream(inFile.toString());
fos.write((line1 + "\n").getBytes());
fos.write((line2 + "\n").getBytes());
fos.close();
JobClient jc = new JobClient(conf);
RunningJob r_job = jc.submitJob(conf);
while (!r_job.isComplete()) {
Thread.sleep(1000);
}
if (!r_job.isSuccessful()) {
fail("Oops! The job broke due to an unexpected error");
}
Path[] outputFiles = FileUtil.stat2Paths(
getFileSystem().listStatus(outDir,
new Utils.OutputFileUtils.OutputFilesFilter()));
if (outputFiles.length > 0) {
InputStream is = getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
//make sure we get what we expect as the first line, and also
//that we have two lines
if (expect == 1) {
assertTrue(line.startsWith(line1));
} else if (expect == 2) {
assertTrue(line.startsWith(line2));
}
line = reader.readLine();
if (expect == 1) {
assertTrue(line.startsWith(line2));
} else if (expect == 2) {
assertTrue(line.startsWith(line1));
}
reader.close();
}
}