下面列出了org.apache.hadoop.mapred.TaskAttemptContextImpl#org.apache.hadoop.mapreduce.task.JobContextImpl 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
throws IOException {
configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
jobContext.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
jobContext.getCredentials().addAll(currentUserCreds);
}
List<org.apache.hadoop.mapreduce.InputSplit> splits;
try {
splits = this.mapreduceInputFormat.getSplits(jobContext);
} catch (InterruptedException e) {
throw new IOException("Could not get Splits.", e);
}
HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
for (int i = 0; i < hadoopInputSplits.length; i++) {
hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
}
return hadoopInputSplits;
}
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
throws IOException {
configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
List<InputSplit> splits;
try {
splits = this.hCatInputFormat.getSplits(jobContext);
} catch (InterruptedException e) {
throw new IOException("Could not get Splits.", e);
}
HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
for (int i = 0; i < hadoopInputSplits.length; i++){
hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
}
return hadoopInputSplits;
}
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
throws IOException {
configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
jobContext.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
jobContext.getCredentials().addAll(currentUserCreds);
}
List<org.apache.hadoop.mapreduce.InputSplit> splits;
try {
splits = this.mapreduceInputFormat.getSplits(jobContext);
} catch (InterruptedException e) {
throw new IOException("Could not get Splits.", e);
}
HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
for (int i = 0; i < hadoopInputSplits.length; i++) {
hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
}
return hadoopInputSplits;
}
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
throws IOException {
configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
List<InputSplit> splits;
try {
splits = this.hCatInputFormat.getSplits(jobContext);
} catch (InterruptedException e) {
throw new IOException("Could not get Splits.", e);
}
HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
for (int i = 0; i < hadoopInputSplits.length; i++){
hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
}
return hadoopInputSplits;
}
public void testEmptyOutput() throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// Do not write any output
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
FileUtil.fullyDelete(new File(outDir.toString()));
}
@Test
public void testNoCommitAction() {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
taskAttemptContext.getTaskAttemptID().getJobID());
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
committer.commitJob(jobContext);
Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
//Test for idempotent commit
committer.commitJob(jobContext);
Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
} catch (IOException e) {
LOG.error("Exception encountered ", e);
Assert.fail("Commit failed");
}
}
@Before
public void setupCommitter() throws Exception {
getConfiguration().set(
"s3.multipart.committer.num-threads", String.valueOf(numThreads));
getConfiguration().set(UPLOAD_UUID, UUID.randomUUID().toString());
this.job = new JobContextImpl(getConfiguration(), JOB_ID);
this.jobCommitter = new MockedS3Committer(S3_OUTPUT_PATH, job);
jobCommitter.setupJob(job);
this.uuid = job.getConfiguration().get(UPLOAD_UUID);
this.tac = new TaskAttemptContextImpl(
new Configuration(job.getConfiguration()), AID);
// get the task's configuration copy so modifications take effect
this.conf = tac.getConfiguration();
conf.set("mapred.local.dir", "/tmp/local-0,/tmp/local-1");
conf.setInt(UPLOAD_SIZE, 100);
this.committer = new MockedS3Committer(S3_OUTPUT_PATH, tac);
}
@Before
public void setupJob() throws Exception {
this.mockFS = mock(FileSystem.class);
FileSystem s3 = new Path("s3://" + MockS3FileSystem.BUCKET + "/")
.getFileSystem(CONF);
if (s3 instanceof MockS3FileSystem) {
((MockS3FileSystem) s3).setMock(mockFS);
} else {
throw new RuntimeException("Cannot continue: S3 not mocked");
}
this.job = new JobContextImpl(CONF, JOB_ID);
job.getConfiguration().set(UPLOAD_UUID, UUID.randomUUID().toString());
this.results = new TestUtil.ClientResults();
this.errors = new TestUtil.ClientErrors();
this.mockClient = TestUtil.newMockClient(results, errors);
}
public void testEmptyOutput() throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// Do not write any output
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
FileUtil.fullyDelete(new File(outDir.toString()));
}
@Test
public void testNoCommitAction() {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
taskAttemptContext.getTaskAttemptID().getJobID());
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
committer.commitJob(jobContext);
Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
//Test for idempotent commit
committer.commitJob(jobContext);
Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
} catch (IOException e) {
LOG.error("Exception encountered ", e);
Assert.fail("Commit failed");
}
}
private static OutputCommitter initOutputCommitter(
OutputFormat<?, ?> outputFormatObj,
Configuration conf,
TaskAttemptContext taskAttemptContext)
throws IllegalStateException {
OutputCommitter outputCommitter;
try {
outputCommitter = outputFormatObj.getOutputCommitter(taskAttemptContext);
if (outputCommitter != null) {
outputCommitter.setupJob(new JobContextImpl(conf, taskAttemptContext.getJobID()));
}
} catch (Exception e) {
throw new IllegalStateException("Unable to create OutputCommitter object: ", e);
}
return outputCommitter;
}
public HadoopElementIterator(final HadoopGraph graph) {
try {
this.graph = graph;
final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration());
final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration);
if (inputFormat instanceof FileInputFormat) {
final Storage storage = FileSystemStorage.open(configuration);
if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
return; // there is no input location and thus, no data (empty graph)
if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent())
return; // there is no data at the input location (empty graph)
configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get());
}
final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1)));
for (final InputSplit split : splits) {
this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID())));
}
} catch (final Exception e) {
throw new IllegalStateException(e.getMessage(), e);
}
}
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
throws IOException {
configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
jobContext.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
jobContext.getCredentials().addAll(currentUserCreds);
}
List<org.apache.hadoop.mapreduce.InputSplit> splits;
try {
splits = this.mapreduceInputFormat.getSplits(jobContext);
} catch (InterruptedException e) {
throw new IOException("Could not get Splits.", e);
}
HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
for (int i = 0; i < hadoopInputSplits.length; i++) {
hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
}
return hadoopInputSplits;
}
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
throws IOException {
configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
List<InputSplit> splits;
try {
splits = this.hCatInputFormat.getSplits(jobContext);
} catch (InterruptedException e) {
throw new IOException("Could not get Splits.", e);
}
HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
for (int i = 0; i < hadoopInputSplits.length; i++){
hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
}
return hadoopInputSplits;
}
@Before
public void setup() throws Exception {
Configuration conf = new Configuration();
input = ClassLoader.getSystemClassLoader().getResource("test.cram").getFile();
reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI().toString();
String referenceIndex = ClassLoader.getSystemClassLoader().getResource("auxf.fa.fai")
.toURI().toString();
conf.set("mapred.input.dir", "file://" + input);
URI hdfsRef = clusterUri.resolve("/tmp/auxf.fa");
URI hdfsRefIndex = clusterUri.resolve("/tmp/auxf.fa.fai");
Files.copy(Paths.get(URI.create(reference)), Paths.get(hdfsRef));
Files.copy(Paths.get(URI.create(referenceIndex)), Paths.get(hdfsRefIndex));
conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, hdfsRef.toString());
taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());
}
/** Tests getSplits method of GsonBigQueryInputFormat when Bigquery connection error is thrown. */
@Test
public void testGetSplitsSecurityException() throws IOException {
when(mockBigquery.tables()).thenReturn(mockBigqueryTables);
// Write values to file.
Path mockPath = new Path("gs://test_bucket/path/test");
GsonRecordReaderTest.writeFile(ghfs, mockPath, (value1 + "\n" + value2 + "\n").getBytes(UTF_8));
// Run getSplits method.
GsonBigQueryInputFormat gsonBigQueryInputFormat =
new GsonBigQueryInputFormatForTestGeneralSecurityException();
config.set("mapreduce.input.fileinputformat.inputdir", "gs://test_bucket/path/test");
JobContext jobContext = new JobContextImpl(config, new JobID());
assertThrows(IOException.class, () -> gsonBigQueryInputFormat.getSplits(jobContext));
}
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
// only gather base statistics for FileInputFormats
if (!(mapreduceInputFormat instanceof FileInputFormat)) {
return null;
}
JobContext jobContext = new JobContextImpl(configuration, null);
final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
(FileBaseStatistics) cachedStats : null;
try {
final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
} catch (IOException ioex) {
if (LOG.isWarnEnabled()) {
LOG.warn("Could not determine statistics due to an io error: "
+ ioex.getMessage());
}
} catch (Throwable t) {
if (LOG.isErrorEnabled()) {
LOG.error("Unexpected problem while getting the file statistics: "
+ t.getMessage(), t);
}
}
// no statistics available
return null;
}
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
// only gather base statistics for FileInputFormats
if (!(mapreduceInputFormat instanceof FileInputFormat)) {
return null;
}
JobContext jobContext = new JobContextImpl(configuration, null);
final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
(FileBaseStatistics) cachedStats : null;
try {
final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
} catch (IOException ioex) {
if (LOG.isWarnEnabled()) {
LOG.warn("Could not determine statistics due to an io error: "
+ ioex.getMessage());
}
} catch (Throwable t) {
if (LOG.isErrorEnabled()) {
LOG.error("Unexpected problem while getting the file statistics: "
+ t.getMessage(), t);
}
}
// no statistics available
return null;
}
private void testCommitterInternal(int version) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
validateContent(outDir);
FileUtil.fullyDelete(new File(outDir.toString()));
}
private void testMapFileOutputCommitterInternal(int version) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeMapFileOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
FileUtil.fullyDelete(new File(outDir.toString()));
}
private void testSafety(int commitVersion) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1);
conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, commitVersion);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// close the job prior to committing task (leaving files in temporary dir
try {
committer.commitJob(jContext);
Assert.fail("Expected commit job to fail");
} catch (Exception e) {
committer.commitTask(tContext);
committer.commitJob(jContext);
}
validateContent(outDir);
FileUtil.fullyDelete(new File(outDir.toString()));
}
@Override
public void prepare(TSetContext ctx) {
this.context = ctx;
Configuration hadoopConf = this.wrappedConfiguration.getConfiguration();
jconf = new JobConf(hadoopConf);
try {
format = inputClazz.newInstance();
JobContext jobContext = new JobContextImpl(hadoopConf, new JobID(context.getId(),
context.getIndex()));
List<InputSplit> splits = format.getSplits(jobContext);
for (int i = 0; i < splits.size(); i++) {
if (i % context.getParallelism() == context.getIndex()) {
assignedSplits.add(splits.get(i));
}
}
if (assignedSplits.size() > 0) {
TaskID taskID = new TaskID(context.getId(), context.getIndex(),
TaskType.MAP, context.getIndex());
TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex());
TaskAttemptContextImpl taskAttemptContext =
new TaskAttemptContextImpl(jconf, taskAttemptID);
currentReader = format.createRecordReader(assignedSplits.get(consumingSplit),
taskAttemptContext);
currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext);
}
} catch (InstantiationException | IllegalAccessException
| InterruptedException | IOException e) {
throw new RuntimeException("Failed to initialize hadoop input", e);
}
}
@Override
public void prepare(TSetContext ctx) {
this.context = ctx;
Configuration hadoopConf = this.wrappedConfiguration.getConfiguration();
jconf = new JobConf(hadoopConf);
try {
format = inputClazz.newInstance();
JobContext jobContext = new JobContextImpl(hadoopConf, new JobID(context.getId(),
context.getIndex()));
List<InputSplit> splits = format.getSplits(jobContext);
for (int i = 0; i < splits.size(); i++) {
if (i % context.getParallelism() == context.getIndex()) {
assignedSplits.add(splits.get(i));
}
}
if (assignedSplits.size() > 0) {
TaskID taskID = new TaskID(context.getId(), context.getIndex(),
TaskType.MAP, context.getIndex());
TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex());
TaskAttemptContextImpl taskAttemptContext =
new TaskAttemptContextImpl(jconf, taskAttemptID);
currentReader = format.createRecordReader(assignedSplits.get(consumingSplit),
taskAttemptContext);
currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext);
}
} catch (InstantiationException | IllegalAccessException
| InterruptedException | IOException e) {
throw new RuntimeException("Failed to initialize hadoop input", e);
}
}
@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
File expectedFile = new File(new Path(outDir, partFile).toString());
StringBuffer expectedOutput = new StringBuffer();
expectedOutput.append(key1).append('\t').append(val1).append("\n");
expectedOutput.append(val1).append("\n");
expectedOutput.append(val2).append("\n");
expectedOutput.append(key2).append("\n");
expectedOutput.append(key1).append("\n");
expectedOutput.append(key2).append('\t').append(val2).append("\n");
String output = UtilsForTests.slurp(expectedFile);
assertEquals(output, expectedOutput.toString());
FileUtil.fullyDelete(new File(outDir.toString()));
}
@SuppressWarnings("unchecked")
public void testAbort() throws IOException, InterruptedException {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// do setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do abort
committer.abortTask(tContext);
File expectedFile = new File(new Path(committer.getWorkPath(), partFile)
.toString());
assertFalse("task temp dir still exists", expectedFile.exists());
committer.abortJob(jContext, JobStatus.State.FAILED);
expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME)
.toString());
assertFalse("job temp dir still exists", expectedFile.exists());
assertEquals("Output directory not empty", 0, new File(outDir.toString())
.listFiles().length);
FileUtil.fullyDelete(new File(outDir.toString()));
}
private void testCommitterInternal(int version) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
validateContent(outDir);
FileUtil.fullyDelete(new File(outDir.toString()));
}
private void testMapFileOutputCommitterInternal(int version)
throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeMapFileOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
FileUtil.fullyDelete(new File(outDir.toString()));
}
@Test
public void testAtomicCommitMissingFinal() {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
taskAttemptContext.getTaskAttemptID().getJobID());
Configuration conf = jobContext.getConfiguration();
String workPath = "/tmp1/" + String.valueOf(rand.nextLong());
String finalPath = "/tmp1/" + String.valueOf(rand.nextLong());
FileSystem fs = null;
try {
OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
fs = FileSystem.get(conf);
fs.mkdirs(new Path(workPath));
conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
Assert.assertTrue(fs.exists(new Path(workPath)));
Assert.assertFalse(fs.exists(new Path(finalPath)));
committer.commitJob(jobContext);
Assert.assertFalse(fs.exists(new Path(workPath)));
Assert.assertTrue(fs.exists(new Path(finalPath)));
//Test for idempotent commit
committer.commitJob(jobContext);
Assert.assertFalse(fs.exists(new Path(workPath)));
Assert.assertTrue(fs.exists(new Path(finalPath)));
} catch (IOException e) {
LOG.error("Exception encountered while testing for preserve status", e);
Assert.fail("Atomic commit failure");
} finally {
TestDistCpUtils.delete(fs, workPath);
TestDistCpUtils.delete(fs, finalPath);
conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false);
}
}
@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
File expectedFile = new File(new Path(outDir, partFile).toString());
StringBuffer expectedOutput = new StringBuffer();
expectedOutput.append(key1).append('\t').append(val1).append("\n");
expectedOutput.append(val1).append("\n");
expectedOutput.append(val2).append("\n");
expectedOutput.append(key2).append("\n");
expectedOutput.append(key1).append("\n");
expectedOutput.append(key2).append('\t').append(val2).append("\n");
String output = UtilsForTests.slurp(expectedFile);
assertEquals(output, expectedOutput.toString());
FileUtil.fullyDelete(new File(outDir.toString()));
}
@SuppressWarnings("unchecked")
public void testAbort() throws IOException, InterruptedException {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// do setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do abort
committer.abortTask(tContext);
File expectedFile = new File(new Path(committer.getWorkPath(), partFile)
.toString());
assertFalse("task temp dir still exists", expectedFile.exists());
committer.abortJob(jContext, JobStatus.State.FAILED);
expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME)
.toString());
assertFalse("job temp dir still exists", expectedFile.exists());
assertEquals("Output directory not empty", 0, new File(outDir.toString())
.listFiles().length);
FileUtil.fullyDelete(new File(outDir.toString()));
}