下面列出了org.apache.hadoop.mapred.InvalidJobConfException#org.apache.hadoop.mapred.FileAlreadyExistsException 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public void checkOutputSpecs(JobContext job
) throws FileAlreadyExistsException, IOException{
// Ensure that the output directory is set and not already there
Path outDir = getOutputPath(job);
if (outDir == null) {
throw new InvalidJobConfException("Output directory not set.");
}
// get delegation token for outDir's file system
TokenCache.obtainTokensForNamenodes(job.getCredentials(),
new Path[] { outDir }, job.getConfiguration());
if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
throw new FileAlreadyExistsException("Output directory " + outDir +
" already exists");
}
}
public void checkOutputSpecs(JobContext job
) throws FileAlreadyExistsException, IOException{
// Ensure that the output directory is set and not already there
Path outDir = getOutputPath(job);
if (outDir == null) {
throw new InvalidJobConfException("Output directory not set.");
}
// get delegation token for outDir's file system
TokenCache.obtainTokensForNamenodes(job.getCredentials(),
new Path[] { outDir }, job.getConfiguration());
if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
throw new FileAlreadyExistsException("Output directory " + outDir +
" already exists");
}
}
/**
* Overridden to avoid throwing an exception if the specified directory
* for export already exists.
*/
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
Path outDir = getOutputPath(job);
if(outDir == null) {
throw new InvalidJobConfException("Output directory not set.");
} else {
TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job.getConfiguration());
/*
if(outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
System.out.println("Output dir already exists, no problem");
throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
}
*/
}
}
/**
* Overridden to avoid throwing an exception if the specified directory
* for export already exists.
*/
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
Path outDir = getOutputPath(job);
if(outDir == null) {
throw new InvalidJobConfException("Output directory not set.");
} else {
TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job.getConfiguration());
/*
if(outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
System.out.println("Output dir already exists, no problem");
throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
}
*/
}
}
@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job) throws FileAlreadyExistsException, InvalidJobConfException, IOException {
// Ensure that the output directory is set and not already there
Path outDir = getOutputPath(job);
if (outDir == null && job.getNumReduceTasks() != 0) {
throw new InvalidJobConfException("Output directory not set in JobConf.");
}
if (outDir != null) {
FileSystem fs = outDir.getFileSystem(job);
// normalize the output directory
outDir = fs.makeQualified(outDir);
setOutputPath(job, outDir);
// get delegation token for the outDir's file system
TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job);
String jobUuid = job.get("zephyr.job.uuid");
if (jobUuid == null)
throw new InvalidJobConfException("This output format REQUIRES the value zephyr.job.uuid to be specified in the job configuration!");
// // check its existence
// if (fs.exists(outDir)) {
// throw new FileAlreadyExistsException("Output directory " + outDir
// + " already exists");
// }
}
}
@Override
public void checkOutputSpecs(JobContext job
) throws InvalidJobConfException, IOException {
// Ensure that the output directory is set
Path outDir = getOutputPath(job);
if (outDir == null) {
throw new InvalidJobConfException("Output directory not set in JobConf.");
}
final Configuration jobConf = job.getConfiguration();
// get delegation token for outDir's file system
TokenCache.obtainTokensForNamenodes(job.getCredentials(),
new Path[] { outDir }, jobConf);
final FileSystem fs = outDir.getFileSystem(jobConf);
if (fs.exists(outDir)) {
// existing output dir is considered empty iff its only content is the
// partition file.
//
final FileStatus[] outDirKids = fs.listStatus(outDir);
boolean empty = false;
if (outDirKids != null && outDirKids.length == 1) {
final FileStatus st = outDirKids[0];
final String fname = st.getPath().getName();
empty =
!st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
}
if (TeraSort.getUseSimplePartitioner(job) || !empty) {
throw new FileAlreadyExistsException("Output directory " + outDir
+ " already exists");
}
}
}
@Override
public void checkOutputSpecs(JobContext job
) throws InvalidJobConfException, IOException {
// Ensure that the output directory is set
Path outDir = getOutputPath(job);
if (outDir == null) {
throw new InvalidJobConfException("Output directory not set in JobConf.");
}
final Configuration jobConf = job.getConfiguration();
// get delegation token for outDir's file system
TokenCache.obtainTokensForNamenodes(job.getCredentials(),
new Path[] { outDir }, jobConf);
final FileSystem fs = outDir.getFileSystem(jobConf);
try {
// existing output dir is considered empty iff its only content is the
// partition file.
//
final FileStatus[] outDirKids = fs.listStatus(outDir);
boolean empty = false;
if (outDirKids != null && outDirKids.length == 1) {
final FileStatus st = outDirKids[0];
final String fname = st.getPath().getName();
empty =
!st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
}
if (TeraSort.getUseSimplePartitioner(job) || !empty) {
throw new FileAlreadyExistsException("Output directory " + outDir
+ " already exists");
}
} catch (FileNotFoundException ignored) {
}
}
@Override
public void checkOutputSpecs(JobContext job
) throws InvalidJobConfException, IOException {
// Ensure that the output directory is set
Path outDir = getOutputPath(job);
if (outDir == null) {
throw new InvalidJobConfException("Output directory not set in JobConf.");
}
final Configuration jobConf = job.getConfiguration();
// get delegation token for outDir's file system
TokenCache.obtainTokensForNamenodes(job.getCredentials(),
new Path[] { outDir }, jobConf);
final FileSystem fs = outDir.getFileSystem(jobConf);
if (fs.exists(outDir)) {
// existing output dir is considered empty iff its only content is the
// partition file.
//
final FileStatus[] outDirKids = fs.listStatus(outDir);
boolean empty = false;
if (outDirKids != null && outDirKids.length == 1) {
final FileStatus st = outDirKids[0];
final String fname = st.getPath().getName();
empty =
!st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
}
if (TeraSort.getUseSimplePartitioner(job) || !empty) {
throw new FileAlreadyExistsException("Output directory " + outDir
+ " already exists");
}
}
}
/**
* Checks to make sure the configuration is valid, the output path doesn't already exist, and that
* a connection to BigQuery can be established.
*/
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
Configuration conf = job.getConfiguration();
// Validate the output configuration.
BigQueryOutputConfiguration.validateConfiguration(conf);
// Get the output path.
Path outputPath = BigQueryOutputConfiguration.getGcsOutputPath(conf);
logger.atInfo().log("Using output path '%s'.", outputPath);
// Error if the output path already exists.
FileSystem outputFileSystem = outputPath.getFileSystem(conf);
if (outputFileSystem.exists(outputPath)) {
throw new IOException("The output path '" + outputPath + "' already exists.");
}
// Error if compression is set as there's mixed support in BigQuery.
if (FileOutputFormat.getCompressOutput(job)) {
throw new IOException("Compression isn't supported for this OutputFormat.");
}
// Error if unable to create a BigQuery helper.
try {
new BigQueryFactory().getBigQueryHelper(conf);
} catch (GeneralSecurityException gse) {
throw new IOException("Failed to create BigQuery client", gse);
}
// Let delegate process its checks.
getDelegate(conf).checkOutputSpecs(job);
}
private void checkOutputSpecsHelper(List<POStore> stores, JobContext
jobcontext) throws IOException, InterruptedException {
for (POStore store : stores) {
// make a copy of the original JobContext so that
// each OutputFormat get a different copy
JobContext jobContextCopy = HadoopShims.createJobContext(
jobcontext.getConfiguration(), jobcontext.getJobID());
// set output location
PigOutputFormat.setLocation(jobContextCopy, store);
StoreFuncInterface sFunc = store.getStoreFunc();
OutputFormat of = sFunc.getOutputFormat();
// The above call should have update the conf in the JobContext
// to have the output location - now call checkOutputSpecs()
try {
of.checkOutputSpecs(jobContextCopy);
} catch (IOException ioe) {
boolean shouldThrowException = true;
if (sFunc instanceof OverwritableStoreFunc) {
if (((OverwritableStoreFunc) sFunc).shouldOverwrite()) {
if (ioe instanceof FileAlreadyExistsException
|| ioe instanceof org.apache.hadoop.fs.FileAlreadyExistsException) {
shouldThrowException = false;
}
}
}
if (shouldThrowException)
throw ioe;
}
}
}
@Test
public void testNativeMRJobSimpleFailure() throws Exception{
try{
//test if correct return code is obtained when query fails
// the native MR is writing to an exisiting and should fail
Collection<String> results = new HashSet<String>();
results.add("(one,1)");
results.add("(two,2)");
results.add("(three,3)");
pigServer.setBatchOn();
pigServer.registerQuery("A = load '" + INPUT_FILE + "';");
pigServer.registerQuery("B = mapreduce '" + jarFileName + "' " +
"Store A into 'table_testNativeMRJobSimple_input' "+
"Load 'table_testNativeMRJobSimple_output' "+
"`org.apache.pig.test.utils.WordCount table_testNativeMRJobSimple_input " + INPUT_FILE + "`;");
pigServer.registerQuery("Store B into 'table_testNativeMRJobSimpleDir';");
pigServer.executeBatch();
assertTrue("job failed", PigStats.get().getReturnCode() != 0);
} catch (JobCreationException e) {
// Running in Tez mode throw exception
assertTrue(e.getCause() instanceof FileAlreadyExistsException);
}
finally{
// We have to manually delete intermediate mapreduce files
Util.deleteFile(cluster, "table_testNativeMRJobSimple_input");
Util.deleteFile(cluster, "table_testNativeMRJobSimpleDir");
}
}
@Test
public void testAlreadyExistsOutputPath() {
String tableName = "TABLE9";
String outputPath = "/tmp/output/tabl9";
try {
Statement stmt = conn.createStatement();
stmt.execute("CREATE TABLE " + tableName + "(ID INTEGER NOT NULL PRIMARY KEY, "
+ "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
FileSystem fs = FileSystem.get(getUtility().getConfiguration());
fs.create(new Path(outputPath));
FSDataOutputStream outputStream = fs.create(new Path("/tmp/input9.csv"));
PrintWriter printWriter = new PrintWriter(outputStream);
printWriter.println("1,FirstName 1,LastName 1");
printWriter.println("2,FirstName 2,LastName 2");
printWriter.close();
CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
csvBulkLoadTool.setConf(getUtility().getConfiguration());
csvBulkLoadTool.run(new String[] {
"--input", "/tmp/input9.csv",
"--output", outputPath,
"--table", tableName,
"--zookeeper", zkQuorum });
fail(String.format("Output path %s already exists. hence, should fail",outputPath));
} catch (Exception ex) {
assertTrue(ex instanceof FileAlreadyExistsException);
}
}
@Test
public void testAlreadyExistsOutputPath() {
String tableName = "TABLE9";
String outputPath = "/tmp/output/tabl9";
try {
Statement stmt = conn.createStatement();
stmt.execute("CREATE TABLE " + tableName + "(ID INTEGER NOT NULL PRIMARY KEY, "
+ "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
FileSystem fs = FileSystem.get(getUtility().getConfiguration());
fs.create(new Path(outputPath));
FSDataOutputStream outputStream = fs.create(new Path("/tmp/input9.csv"));
PrintWriter printWriter = new PrintWriter(outputStream);
printWriter.println("1,FirstName 1,LastName 1");
printWriter.println("2,FirstName 2,LastName 2");
printWriter.close();
RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool();
regexBulkLoadTool.setConf(getUtility().getConfiguration());
regexBulkLoadTool.run(new String[] {
"--input", "/tmp/input9.csv",
"--output", outputPath,
"--table", tableName,
"--regex", "([^,]*),([^,]*),([^,]*)",
"--zookeeper", zkQuorum });
fail(String.format("Output path %s already exists. hence, should fail",outputPath));
} catch (Exception ex) {
assertTrue(ex instanceof FileAlreadyExistsException);
}
}
public void checkOutputSpecs(JobContext job
) throws FileAlreadyExistsException, IOException{
// Ensure that the output directory is set and not already there
Path outDir = getOutputPath(job);
if (outDir == null) {
throw new InvalidJobConfException("Output directory not set.");
}
if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
throw new FileAlreadyExistsException("Output directory " + outDir +
" already exists");
}
}
@Test
public void testHadoopExceptionCreation() throws Exception {
Object object = PigContext
.instantiateFuncFromSpec("org.apache.hadoop.mapred.FileAlreadyExistsException");
assertTrue(object instanceof FileAlreadyExistsException);
}
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
}
/**
* Over-ride the default FileOutputFormat's checkOutputSpecs() to
* allow for the target directory to already exist.
*/
public void checkOutputSpecs( FileSystem ignored, JobConf job )
throws FileAlreadyExistsException, InvalidJobConfException, IOException
{
}
public int submitAndMonitorJob() throws IOException {
if (jar_ != null && isLocalHadoop()) {
// getAbs became required when shell and subvm have different working dirs...
File wd = new File(".").getAbsoluteFile();
StreamUtil.unJar(new File(jar_), wd);
}
// if jobConf_ changes must recreate a JobClient
jc_ = new JobClient(jobConf_);
boolean error = true;
running_ = null;
String lastReport = null;
try {
running_ = jc_.submitJob(jobConf_);
jobId_ = running_.getID();
LOG.info("getLocalDirs(): " + Arrays.asList(jobConf_.getLocalDirs()));
LOG.info("Running job: " + jobId_);
jobInfo();
while (!running_.isComplete()) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
}
running_ = jc_.getJob(jobId_);
String report = null;
report = " map " + Math.round(running_.mapProgress() * 100) + "% reduce "
+ Math.round(running_.reduceProgress() * 100) + "%";
if (!report.equals(lastReport)) {
LOG.info(report);
lastReport = report;
}
}
if (!running_.isSuccessful()) {
jobInfo();
LOG.error("Job not Successful!");
return 1;
}
LOG.info("Job complete: " + jobId_);
LOG.info("Output: " + output_);
error = false;
} catch(FileNotFoundException fe) {
LOG.error("Error launching job , bad input path : " + fe.getMessage());
return 2;
} catch(InvalidJobConfException je) {
LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
return 3;
} catch(FileAlreadyExistsException fae) {
LOG.error("Error launching job , Output path already exists : "
+ fae.getMessage());
return 4;
} catch(IOException ioe) {
LOG.error("Error Launching job : " + ioe.getMessage());
return 5;
} finally {
if (error && (running_ != null)) {
LOG.info("killJob...");
running_.killJob();
}
jc_.close();
}
return 0;
}