下面列出了org.apache.hadoop.mapreduce.Job#setMaxMapAttempts ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Launch tests
* @param conf Configuration of the mapreduce job.
* @param inDir input path
* @param outDir output path
* @param input Input text
* @throws IOException
*/
public void launchTest(JobConf conf,
Path inDir,
Path outDir,
String input)
throws IOException, InterruptedException, ClassNotFoundException {
FileSystem outFs = outDir.getFileSystem(conf);
// Launch job with default option for temp dir.
// i.e. temp dir is ./tmp
Job job = Job.getInstance(conf);
job.addFileToClassPath(APP_JAR);
job.setJarByClass(TestMiniMRChildTask.class);
job.setMaxMapAttempts(1); // speed up failures
job.waitForCompletion(true);
boolean succeeded = job.waitForCompletion(true);
assertTrue(succeeded);
outFs.delete(outDir, true);
}
@Test (timeout = 60000)
public void testFailingJob() throws IOException, InterruptedException,
ClassNotFoundException {
LOG.info("\n\n\nStarting testFailingJob().");
if (!(new File(MiniTezCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniTezCluster.APPJAR
+ " not found. Not running test.");
return;
}
Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());
MRRSleepJob sleepJob = new MRRSleepJob();
sleepJob.setConf(sleepConf);
Job job = sleepJob.createJob(1, 1, 1, 1, 1,
1, 1, 1, 1, 1);
job.setJarByClass(MRRSleepJob.class);
job.setMaxMapAttempts(1); // speed up failures
job.getConfiguration().setBoolean(MRRSleepJob.MAP_FATAL_ERROR, true);
job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "*");
job.submit();
boolean succeeded = job.waitForCompletion(true);
Assert.assertFalse(succeeded);
Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());
// FIXME once counters and task progress can be obtained properly
// TODO verify failed task diagnostics
}
@Test (timeout = 60000)
public void testMRRSleepJob() throws IOException, InterruptedException,
ClassNotFoundException {
LOG.info("\n\n\nStarting testMRRSleepJob().");
if (!(new File(MiniTezCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniTezCluster.APPJAR
+ " not found. Not running test.");
return;
}
Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());
MRRSleepJob sleepJob = new MRRSleepJob();
sleepJob.setConf(sleepConf);
Job job = sleepJob.createJob(1, 1, 1, 1, 1,
1, 1, 1, 1, 1);
job.setJarByClass(MRRSleepJob.class);
job.setMaxMapAttempts(1); // speed up failures
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue(succeeded);
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
// There's one bug in YARN that there may be some suffix at the end of trackingURL (YARN-2246)
// After TEZ-1961, the tracking will change from http://localhost:53419/proxy/application_1430963524753_0005
// to http://localhost:53419/proxy/application_1430963524753_0005/ui/
// So here use String#contains to verify.
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.contains(jobId.substring(jobId.indexOf("_"))));
// FIXME once counters and task progress can be obtained properly
// TODO use dag client to test counters and task progress?
// what about completed jobs?
}
private void testWithConf(Configuration conf) throws IOException,
InterruptedException, ClassNotFoundException, URISyntaxException {
// Create a temporary file of length 1.
Path first = createTempFile("distributed.first", "x");
// Create two jars with a single file inside them.
Path second =
makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
Path third =
makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
Path fourth =
makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);
Job job = Job.getInstance(conf);
job.setMapperClass(DistributedCacheCheckerMapper.class);
job.setReducerClass(DistributedCacheCheckerReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
FileInputFormat.setInputPaths(job, first);
// Creates the Job Configuration
job.addCacheFile(
new URI(first.toUri().toString() + "#distributed.first.symlink"));
job.addFileToClassPath(second);
job.addArchiveToClassPath(third);
job.addCacheArchive(fourth.toUri());
job.setMaxMapAttempts(1); // speed up failures
job.submit();
assertTrue(job.waitForCompletion(false));
}
private Job runSpecTest(boolean mapspec, boolean redspec)
throws IOException, ClassNotFoundException, InterruptedException {
Path first = createTempFile("specexec_map_input1", "a\nz");
Path secnd = createTempFile("specexec_map_input2", "a\nz");
Configuration conf = mrCluster.getConfig();
conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec);
conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec);
conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR,
TestSpecEstimator.class,
TaskRuntimeEstimator.class);
Job job = Job.getInstance(conf);
job.setJarByClass(TestSpeculativeExecution.class);
job.setMapperClass(SpeculativeMapper.class);
job.setReducerClass(SpeculativeReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(2);
FileInputFormat.setInputPaths(job, first);
FileInputFormat.addInputPath(job, secnd);
FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);
// Delete output directory if it exists.
try {
localFs.delete(TEST_OUT_DIR,true);
} catch (IOException e) {
// ignore
}
// Creates the Job Configuration
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
job.setMaxMapAttempts(2);
job.submit();
return job;
}
@Test (timeout = 60000)
public void testFailingAttempt() throws IOException, InterruptedException,
ClassNotFoundException {
LOG.info("\n\n\nStarting testFailingAttempt().");
if (!(new File(MiniTezCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniTezCluster.APPJAR
+ " not found. Not running test.");
return;
}
Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());
MRRSleepJob sleepJob = new MRRSleepJob();
sleepJob.setConf(sleepConf);
Job job = sleepJob.createJob(1, 1, 1, 1, 1,
1, 1, 1, 1, 1);
job.setJarByClass(MRRSleepJob.class);
job.setMaxMapAttempts(3); // speed up failures
job.getConfiguration().setBoolean(MRRSleepJob.MAP_THROW_ERROR, true);
job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "0");
job.submit();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue(succeeded);
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
// FIXME once counters and task progress can be obtained properly
// TODO verify failed task diagnostics
}
@Test (timeout = 60000)
public void testFailingAttempt() throws IOException, InterruptedException,
ClassNotFoundException {
LOG.info("\n\n\nStarting testFailingAttempt().");
if (!(new File(MiniTezCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniTezCluster.APPJAR
+ " not found. Not running test.");
return;
}
Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());
MRRSleepJob sleepJob = new MRRSleepJob();
sleepJob.setConf(sleepConf);
Job job = sleepJob.createJob(1, 1, 1, 1, 1,
1, 1, 1, 1, 1);
job.setJarByClass(MRRSleepJob.class);
job.setMaxMapAttempts(3); // speed up failures
job.getConfiguration().setBoolean(MRRSleepJob.MAP_THROW_ERROR, true);
job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "0");
job.submit();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue(succeeded);
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
// FIXME once counters and task progress can be obtained properly
// TODO verify failed task diagnostics
}
@Test (timeout = 60000)
public void testFailingJob() throws IOException, InterruptedException,
ClassNotFoundException {
LOG.info("\n\n\nStarting testFailingJob().");
if (!(new File(MiniTezCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniTezCluster.APPJAR
+ " not found. Not running test.");
return;
}
Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());
MRRSleepJob sleepJob = new MRRSleepJob();
sleepJob.setConf(sleepConf);
Job job = sleepJob.createJob(1, 1, 1, 1, 1,
1, 1, 1, 1, 1);
job.setJarByClass(MRRSleepJob.class);
job.setMaxMapAttempts(1); // speed up failures
job.getConfiguration().setBoolean(MRRSleepJob.MAP_FATAL_ERROR, true);
job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "*");
job.submit();
boolean succeeded = job.waitForCompletion(true);
Assert.assertFalse(succeeded);
Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());
// FIXME once counters and task progress can be obtained properly
// TODO verify failed task diagnostics
}
void runTestTaskEnv(JobConf conf, Path inDir, Path outDir, boolean oldConfigs)
throws IOException, InterruptedException, ClassNotFoundException {
String input = "The input";
configure(conf, inDir, outDir, input,
EnvCheckMapper.class, EnvCheckReducer.class);
// test
// - new SET of new var (MY_PATH)
// - set of old var (LANG)
// - append to an old var from modified env (LD_LIBRARY_PATH)
// - append to an old var from tt's env (PATH)
// - append to a new var (NEW_PATH)
String mapTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
String reduceTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
String mapTaskJavaOptsKey = JobConf.MAPRED_MAP_TASK_JAVA_OPTS;
String reduceTaskJavaOptsKey = JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS;
String mapTaskJavaOpts = MAP_OPTS_VAL;
String reduceTaskJavaOpts = REDUCE_OPTS_VAL;
conf.setBoolean(OLD_CONFIGS, oldConfigs);
if (oldConfigs) {
mapTaskEnvKey = reduceTaskEnvKey = JobConf.MAPRED_TASK_ENV;
mapTaskJavaOptsKey = reduceTaskJavaOptsKey = JobConf.MAPRED_TASK_JAVA_OPTS;
mapTaskJavaOpts = reduceTaskJavaOpts = TASK_OPTS_VAL;
}
conf.set(
mapTaskEnvKey,
Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp,"
+ "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp"
: "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
+ "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
conf.set(
reduceTaskEnvKey,
Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp,"
+ "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp"
: "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
+ "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
conf.set("path", System.getenv("PATH"));
conf.set(mapTaskJavaOptsKey, mapTaskJavaOpts);
conf.set(reduceTaskJavaOptsKey, reduceTaskJavaOpts);
Job job = Job.getInstance(conf);
job.addFileToClassPath(APP_JAR);
job.setJarByClass(TestMiniMRChildTask.class);
job.setMaxMapAttempts(1); // speed up failures
job.waitForCompletion(true);
boolean succeeded = job.waitForCompletion(true);
assertTrue("The environment checker job failed.", succeeded);
}
private void testSleepJobInternal(boolean useRemoteJar) throws Exception {
LOG.info("\n\n\nStarting testSleepJob: useRemoteJar=" + useRemoteJar);
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
+ " not found. Not running test.");
return;
}
Configuration sleepConf = new Configuration(mrCluster.getConfig());
// set master address to local to test that local mode applied iff framework == local
sleepConf.set(MRConfig.MASTER_ADDRESS, "local");
SleepJob sleepJob = new SleepJob();
sleepJob.setConf(sleepConf);
// job with 3 maps (10s) and numReduces reduces (5s), 1 "record" each:
Job job = sleepJob.createJob(3, numSleepReducers, 10000, 1, 5000, 1);
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
if (useRemoteJar) {
final Path localJar = new Path(
ClassUtil.findContainingJar(SleepJob.class));
ConfigUtil.addLink(job.getConfiguration(), "/jobjars",
localFs.makeQualified(localJar.getParent()).toUri());
job.setJar("viewfs:///jobjars/" + localJar.getName());
} else {
job.setJarByClass(SleepJob.class);
}
job.setMaxMapAttempts(1); // speed up failures
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue(succeeded);
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
verifySleepJobCounters(job);
verifyTaskProgress(job);
// TODO later: add explicit "isUber()" checks of some sort (extend
// JobStatus?)--compare against MRJobConfig.JOB_UBERTASK_ENABLE value
}
private void testJobClassloader(boolean useCustomClasses) throws IOException,
InterruptedException, ClassNotFoundException {
LOG.info("\n\n\nStarting testJobClassloader()"
+ " useCustomClasses=" + useCustomClasses);
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
+ " not found. Not running test.");
return;
}
final Configuration sleepConf = new Configuration(mrCluster.getConfig());
// set master address to local to test that local mode applied iff framework == local
sleepConf.set(MRConfig.MASTER_ADDRESS, "local");
sleepConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, true);
if (useCustomClasses) {
// to test AM loading user classes such as output format class, we want
// to blacklist them from the system classes (they need to be prepended
// as the first match wins)
String systemClasses = ApplicationClassLoader.SYSTEM_CLASSES_DEFAULT;
// exclude the custom classes from system classes
systemClasses = "-" + CustomOutputFormat.class.getName() + ",-" +
CustomSpeculator.class.getName() + "," +
systemClasses;
sleepConf.set(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES,
systemClasses);
}
sleepConf.set(MRJobConfig.IO_SORT_MB, TEST_IO_SORT_MB);
sleepConf.set(MRJobConfig.MR_AM_LOG_LEVEL, Level.ALL.toString());
sleepConf.set(MRJobConfig.MAP_LOG_LEVEL, Level.ALL.toString());
sleepConf.set(MRJobConfig.REDUCE_LOG_LEVEL, Level.ALL.toString());
sleepConf.set(MRJobConfig.MAP_JAVA_OPTS, "-verbose:class");
final SleepJob sleepJob = new SleepJob();
sleepJob.setConf(sleepConf);
final Job job = sleepJob.createJob(1, 1, 10, 1, 10, 1);
job.setMapperClass(ConfVerificationMapper.class);
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
job.setJarByClass(SleepJob.class);
job.setMaxMapAttempts(1); // speed up failures
if (useCustomClasses) {
// set custom output format class and speculator class
job.setOutputFormatClass(CustomOutputFormat.class);
final Configuration jobConf = job.getConfiguration();
jobConf.setClass(MRJobConfig.MR_AM_JOB_SPECULATOR, CustomSpeculator.class,
Speculator.class);
// speculation needs to be enabled for the speculator to be loaded
jobConf.setBoolean(MRJobConfig.MAP_SPECULATIVE, true);
}
job.submit();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue("Job status: " + job.getStatus().getFailureInfo(),
succeeded);
}
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
ClassNotFoundException {
LOG.info("\n\n\nStarting testRandomWriter().");
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
+ " not found. Not running test.");
return;
}
RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
Job job = randomWriterJob.createJob(mrCluster.getConfig());
Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
FileOutputFormat.setOutputPath(job, outputDir);
job.setSpeculativeExecution(false);
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
job.setJarByClass(RandomTextWriterJob.class);
job.setMaxMapAttempts(1); // speed up failures
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue(succeeded);
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
// Make sure there are three files in the output-dir
RemoteIterator<FileStatus> iterator =
FileContext.getFileContext(mrCluster.getConfig()).listStatus(
outputDir);
int count = 0;
while (iterator.hasNext()) {
FileStatus file = iterator.next();
if (!file.getPath().getName()
.equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
count++;
}
}
Assert.assertEquals("Number of part files is wrong!", 3, count);
verifyRandomWriterCounters(job);
// TODO later: add explicit "isUber()" checks of some sort
}
@Test (timeout = 60000)
public void testMRRSleepJobWithCompression() throws IOException,
InterruptedException, ClassNotFoundException {
LOG.info("\n\n\nStarting testMRRSleepJobWithCompression().");
if (!(new File(MiniTezCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniTezCluster.APPJAR
+ " not found. Not running test.");
return;
}
Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());
MRRSleepJob sleepJob = new MRRSleepJob();
sleepJob.setConf(sleepConf);
Job job = sleepJob.createJob(1, 1, 2, 1, 1,
1, 1, 1, 1, 1);
job.setJarByClass(MRRSleepJob.class);
job.setMaxMapAttempts(1); // speed up failures
// enable compression
job.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
job.getConfiguration().set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC,
DefaultCodec.class.getName());
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue(succeeded);
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
// FIXME once counters and task progress can be obtained properly
// TODO use dag client to test counters and task progress?
// what about completed jobs?
}
void runTestTaskEnv(JobConf conf, Path inDir, Path outDir, boolean oldConfigs)
throws IOException, InterruptedException, ClassNotFoundException {
String input = "The input";
configure(conf, inDir, outDir, input,
EnvCheckMapper.class, EnvCheckReducer.class);
// test
// - new SET of new var (MY_PATH)
// - set of old var (LANG)
// - append to an old var from modified env (LD_LIBRARY_PATH)
// - append to an old var from tt's env (PATH)
// - append to a new var (NEW_PATH)
String mapTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
String reduceTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
String mapTaskJavaOptsKey = JobConf.MAPRED_MAP_TASK_JAVA_OPTS;
String reduceTaskJavaOptsKey = JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS;
String mapTaskJavaOpts = MAP_OPTS_VAL;
String reduceTaskJavaOpts = REDUCE_OPTS_VAL;
conf.setBoolean(OLD_CONFIGS, oldConfigs);
if (oldConfigs) {
mapTaskEnvKey = reduceTaskEnvKey = JobConf.MAPRED_TASK_ENV;
mapTaskJavaOptsKey = reduceTaskJavaOptsKey = JobConf.MAPRED_TASK_JAVA_OPTS;
mapTaskJavaOpts = reduceTaskJavaOpts = TASK_OPTS_VAL;
}
conf.set(
mapTaskEnvKey,
Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp,"
+ "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp"
: "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
+ "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
conf.set(
reduceTaskEnvKey,
Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp,"
+ "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp"
: "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp,"
+ "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
conf.set("path", System.getenv("PATH"));
conf.set(mapTaskJavaOptsKey, mapTaskJavaOpts);
conf.set(reduceTaskJavaOptsKey, reduceTaskJavaOpts);
Job job = Job.getInstance(conf);
job.addFileToClassPath(APP_JAR);
job.setJarByClass(TestMiniMRChildTask.class);
job.setMaxMapAttempts(1); // speed up failures
job.waitForCompletion(true);
boolean succeeded = job.waitForCompletion(true);
assertTrue("The environment checker job failed.", succeeded);
}
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
ClassNotFoundException {
LOG.info("\n\n\nStarting testRandomWriter().");
if (!(new File(MiniTezCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniTezCluster.APPJAR
+ " not found. Not running test.");
return;
}
RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
mrrTezCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
mrrTezCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
Job job = randomWriterJob.createJob(mrrTezCluster.getConfig());
Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
FileOutputFormat.setOutputPath(job, outputDir);
job.setSpeculativeExecution(false);
job.setJarByClass(RandomTextWriterJob.class);
job.setMaxMapAttempts(1); // speed up failures
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue(succeeded);
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.contains(jobId.substring(jobId.indexOf("_"))));
// Make sure there are three files in the output-dir
RemoteIterator<FileStatus> iterator =
FileContext.getFileContext(mrrTezCluster.getConfig()).listStatus(
outputDir);
int count = 0;
while (iterator.hasNext()) {
FileStatus file = iterator.next();
if (!file.getPath().getName()
.equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
count++;
}
}
Assert.assertEquals("Number of part files is wrong!", 3, count);
}
private void testJobClassloader(boolean useCustomClasses) throws IOException,
InterruptedException, ClassNotFoundException {
LOG.info("\n\n\nStarting testJobClassloader()"
+ " useCustomClasses=" + useCustomClasses);
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
+ " not found. Not running test.");
return;
}
final Configuration sleepConf = new Configuration(mrCluster.getConfig());
// set master address to local to test that local mode applied iff framework == local
sleepConf.set(MRConfig.MASTER_ADDRESS, "local");
sleepConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, true);
if (useCustomClasses) {
// to test AM loading user classes such as output format class, we want
// to blacklist them from the system classes (they need to be prepended
// as the first match wins)
String systemClasses = ApplicationClassLoader.SYSTEM_CLASSES_DEFAULT;
// exclude the custom classes from system classes
systemClasses = "-" + CustomOutputFormat.class.getName() + ",-" +
CustomSpeculator.class.getName() + "," +
systemClasses;
sleepConf.set(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES,
systemClasses);
}
sleepConf.set(MRJobConfig.IO_SORT_MB, TEST_IO_SORT_MB);
sleepConf.set(MRJobConfig.MR_AM_LOG_LEVEL, Level.ALL.toString());
sleepConf.set(MRJobConfig.MAP_LOG_LEVEL, Level.ALL.toString());
sleepConf.set(MRJobConfig.REDUCE_LOG_LEVEL, Level.ALL.toString());
sleepConf.set(MRJobConfig.MAP_JAVA_OPTS, "-verbose:class");
final SleepJob sleepJob = new SleepJob();
sleepJob.setConf(sleepConf);
final Job job = sleepJob.createJob(1, 1, 10, 1, 10, 1);
job.setMapperClass(ConfVerificationMapper.class);
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
job.setJarByClass(SleepJob.class);
job.setMaxMapAttempts(1); // speed up failures
if (useCustomClasses) {
// set custom output format class and speculator class
job.setOutputFormatClass(CustomOutputFormat.class);
final Configuration jobConf = job.getConfiguration();
jobConf.setClass(MRJobConfig.MR_AM_JOB_SPECULATOR, CustomSpeculator.class,
Speculator.class);
// speculation needs to be enabled for the speculator to be loaded
jobConf.setBoolean(MRJobConfig.MAP_SPECULATIVE, true);
}
job.submit();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue("Job status: " + job.getStatus().getFailureInfo(),
succeeded);
}
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
ClassNotFoundException {
LOG.info("\n\n\nStarting testRandomWriter().");
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
+ " not found. Not running test.");
return;
}
RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
Job job = randomWriterJob.createJob(mrCluster.getConfig());
Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
FileOutputFormat.setOutputPath(job, outputDir);
job.setSpeculativeExecution(false);
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
job.setJarByClass(RandomTextWriterJob.class);
job.setMaxMapAttempts(1); // speed up failures
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue(succeeded);
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
// Make sure there are three files in the output-dir
RemoteIterator<FileStatus> iterator =
FileContext.getFileContext(mrCluster.getConfig()).listStatus(
outputDir);
int count = 0;
while (iterator.hasNext()) {
FileStatus file = iterator.next();
if (!file.getPath().getName()
.equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
count++;
}
}
Assert.assertEquals("Number of part files is wrong!", 3, count);
verifyRandomWriterCounters(job);
// TODO later: add explicit "isUber()" checks of some sort
}
public void _testDistributedCache(String jobJarPath) throws Exception {
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
+ " not found. Not running test.");
return;
}
// Create a temporary file of length 1.
Path first = createTempFile("distributed.first", "x");
// Create two jars with a single file inside them.
Path second =
makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
Path third =
makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
Path fourth =
makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);
Job job = Job.getInstance(mrCluster.getConfig());
// Set the job jar to a new "dummy" jar so we can check that its extracted
// properly
job.setJar(jobJarPath);
// Because the job jar is a "dummy" jar, we need to include the jar with
// DistributedCacheChecker or it won't be able to find it
Path distributedCacheCheckerJar = new Path(
JarFinder.getJar(DistributedCacheChecker.class));
job.addFileToClassPath(distributedCacheCheckerJar.makeQualified(
localFs.getUri(), distributedCacheCheckerJar.getParent()));
job.setMapperClass(DistributedCacheChecker.class);
job.setOutputFormatClass(NullOutputFormat.class);
FileInputFormat.setInputPaths(job, first);
// Creates the Job Configuration
job.addCacheFile(
new URI(first.toUri().toString() + "#distributed.first.symlink"));
job.addFileToClassPath(second);
// The AppMaster jar itself
job.addFileToClassPath(
APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent()));
job.addArchiveToClassPath(third);
job.addCacheArchive(fourth.toUri());
job.setMaxMapAttempts(1); // speed up failures
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
Assert.assertTrue(job.waitForCompletion(false));
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}
@Override
protected int run(CommandLine cmd) throws Exception {
if (!cmd.getArgList().isEmpty()) throw new HalyardExport.ExportException("Unknown arguments: " + cmd.getArgList().toString());
String source = cmd.getOptionValue('s');
String queryFiles = cmd.getOptionValue('q');
String target = cmd.getOptionValue('t');
if (!target.contains("{0}")) {
throw new HalyardExport.ExportException("Bulk export target must contain '{0}' to be replaced by stripped filename of the actual SPARQL query.");
}
getConf().set(SOURCE, source);
getConf().set(TARGET, target);
String driver = cmd.getOptionValue('c');
if (driver != null) {
getConf().set(JDBC_DRIVER, driver);
}
String props[] = cmd.getOptionValues('p');
if (props != null) {
for (int i=0; i<props.length; i++) {
props[i] = Base64.encodeBase64String(props[i].getBytes(StandardCharsets.UTF_8));
}
getConf().setStrings(JDBC_PROPERTIES, props);
}
if (cmd.hasOption('i')) getConf().set(HalyardBulkUpdate.ELASTIC_INDEX_URL, cmd.getOptionValue('i'));
TableMapReduceUtil.addDependencyJars(getConf(),
HalyardExport.class,
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class,
HTable.class,
HBaseConfiguration.class,
AuthenticationProtos.class,
Trace.class,
Gauge.class);
HBaseConfiguration.addHbaseResources(getConf());
String cp = cmd.getOptionValue('l');
if (cp != null) {
String jars[] = cp.split(":");
StringBuilder newCp = new StringBuilder();
for (int i=0; i<jars.length; i++) {
if (i > 0) newCp.append(':');
newCp.append(addTmpFile(jars[i])); //append clappspath entris to tmpfiles and trim paths from the classpath
}
getConf().set(JDBC_CLASSPATH, newCp.toString());
}
Job job = Job.getInstance(getConf(), "HalyardBulkExport " + source + " -> " + target);
job.setJarByClass(HalyardBulkExport.class);
job.setMaxMapAttempts(1);
job.setMapperClass(BulkExportMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(Void.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(QueryInputFormat.class);
QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, false, 0);
job.setOutputFormatClass(NullOutputFormat.class);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
LOG.info("Bulk Export Completed..");
return 0;
}
return -1;
}
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
ClassNotFoundException {
LOG.info("\n\n\nStarting testRandomWriter().");
if (!(new File(MiniTezCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniTezCluster.APPJAR
+ " not found. Not running test.");
return;
}
RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
mrrTezCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
mrrTezCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
Job job = randomWriterJob.createJob(mrrTezCluster.getConfig());
Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
FileOutputFormat.setOutputPath(job, outputDir);
job.setSpeculativeExecution(false);
job.setJarByClass(RandomTextWriterJob.class);
job.setMaxMapAttempts(1); // speed up failures
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue(succeeded);
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
// Make sure there are three files in the output-dir
RemoteIterator<FileStatus> iterator =
FileContext.getFileContext(mrrTezCluster.getConfig()).listStatus(
outputDir);
int count = 0;
while (iterator.hasNext()) {
FileStatus file = iterator.next();
if (!file.getPath().getName()
.equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
count++;
}
}
Assert.assertEquals("Number of part files is wrong!", 3, count);
}