类org.apache.hadoop.mapreduce.JobSubmissionFiles源码实例Demo

下面列出了怎么用org.apache.hadoop.mapreduce.JobSubmissionFiles的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: hadoop   文件: TestExternalCall.java
/**
* test methods run end execute of DistCp class. silple copy file
* @throws Exception 
*/
 @Test
 public void testCleanup() throws Exception {

     Configuration conf = getConf();

     Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf),
         conf);
     stagingDir.getFileSystem(conf).mkdirs(stagingDir);
     Path soure = createFile("tmp.txt");
     Path target = createFile("target.txt");

     DistCp distcp = new DistCp(conf, null);
     String[] arg = { soure.toString(), target.toString() };

     distcp.run(arg);
     Assert.assertTrue(fs.exists(target));

 
 }
 
源代码2 项目: hadoop   文件: TestExternalCall.java
/**
 * test main method of DistCp. Method should to call System.exit().
 * 
 */
@Test
public void testCleanupTestViaToolRunner() throws IOException, InterruptedException {

  Configuration conf = getConf();

  Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
  stagingDir.getFileSystem(conf).mkdirs(stagingDir);
 
  Path soure = createFile("tmp.txt");
  Path target = createFile("target.txt");
  try {

    String[] arg = {target.toString(),soure.toString()};
    DistCp.main(arg);
    Assert.fail();

  } catch (ExitException t) {
    Assert.assertTrue(fs.exists(target));
    Assert.assertEquals(t.status, 0);
    Assert.assertEquals(
        stagingDir.getFileSystem(conf).listStatus(stagingDir).length, 0);
  }

}
 
源代码3 项目: hadoop   文件: TestIntegration.java
@Test(timeout=100000)
public void testCleanup() {
  try {
    Path sourcePath = new Path("noscheme:///file");
    List<Path> sources = new ArrayList<Path>();
    sources.add(sourcePath);

    DistCpOptions options = new DistCpOptions(sources, target);

    Configuration conf = getConf();
    Path stagingDir = JobSubmissionFiles.getStagingDir(
            new Cluster(conf), conf);
    stagingDir.getFileSystem(conf).mkdirs(stagingDir);

    try {
      new DistCp(conf, options).execute();
    } catch (Throwable t) {
      Assert.assertEquals(stagingDir.getFileSystem(conf).
          listStatus(stagingDir).length, 0);
    }
  } catch (Exception e) {
    LOG.error("Exception encountered ", e);
    Assert.fail("testCleanup failed " + e.getMessage());
  }
}
 
源代码4 项目: big-c   文件: TestExternalCall.java
/**
* test methods run end execute of DistCp class. silple copy file
* @throws Exception 
*/
 @Test
 public void testCleanup() throws Exception {

     Configuration conf = getConf();

     Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf),
         conf);
     stagingDir.getFileSystem(conf).mkdirs(stagingDir);
     Path soure = createFile("tmp.txt");
     Path target = createFile("target.txt");

     DistCp distcp = new DistCp(conf, null);
     String[] arg = { soure.toString(), target.toString() };

     distcp.run(arg);
     Assert.assertTrue(fs.exists(target));

 
 }
 
源代码5 项目: big-c   文件: TestExternalCall.java
/**
 * test main method of DistCp. Method should to call System.exit().
 * 
 */
@Test
public void testCleanupTestViaToolRunner() throws IOException, InterruptedException {

  Configuration conf = getConf();

  Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
  stagingDir.getFileSystem(conf).mkdirs(stagingDir);
 
  Path soure = createFile("tmp.txt");
  Path target = createFile("target.txt");
  try {

    String[] arg = {target.toString(),soure.toString()};
    DistCp.main(arg);
    Assert.fail();

  } catch (ExitException t) {
    Assert.assertTrue(fs.exists(target));
    Assert.assertEquals(t.status, 0);
    Assert.assertEquals(
        stagingDir.getFileSystem(conf).listStatus(stagingDir).length, 0);
  }

}
 
源代码6 项目: big-c   文件: TestIntegration.java
@Test(timeout=100000)
public void testCleanup() {
  try {
    Path sourcePath = new Path("noscheme:///file");
    List<Path> sources = new ArrayList<Path>();
    sources.add(sourcePath);

    DistCpOptions options = new DistCpOptions(sources, target);

    Configuration conf = getConf();
    Path stagingDir = JobSubmissionFiles.getStagingDir(
            new Cluster(conf), conf);
    stagingDir.getFileSystem(conf).mkdirs(stagingDir);

    try {
      new DistCp(conf, options).execute();
    } catch (Throwable t) {
      Assert.assertEquals(stagingDir.getFileSystem(conf).
          listStatus(stagingDir).length, 0);
    }
  } catch (Exception e) {
    LOG.error("Exception encountered ", e);
    Assert.fail("testCleanup failed " + e.getMessage());
  }
}
 
源代码7 项目: incubator-tez   文件: MRHelpers.java
/**
 * Generate new-api mapreduce InputFormat splits
 * @param jobContext JobContext required by InputFormat
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * location hints for each split generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext,
    Path inputSplitDir) throws IOException, InterruptedException,
    ClassNotFoundException {
  
  org.apache.hadoop.mapreduce.InputSplit[] splits = 
      generateNewSplits(jobContext, null, 0);
  
  Configuration conf = jobContext.getConfiguration();

  JobSplitWriter.createSplitFiles(inputSplitDir, conf,
      inputSplitDir.getFileSystem(conf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        new TaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null));
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobContext.getCredentials());
}
 
源代码8 项目: incubator-tez   文件: MRHelpers.java
/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf,
    Path inputSplitDir) throws IOException {
  
  org.apache.hadoop.mapred.InputSplit[] splits = 
      generateOldSplits(jobConf, null, 0);
  
  JobSplitWriter.createSplitFiles(inputSplitDir, jobConf,
      inputSplitDir.getFileSystem(jobConf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        new TaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null));
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobConf.getCredentials());
}
 
源代码9 项目: tez   文件: MRInputHelpers.java
/**
 * Generate old-api mapred InputFormat splits
 * @param jobConf JobConf required by InputFormat class
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 */
private static InputSplitInfoDisk writeOldSplits(JobConf jobConf,
                                                 Path inputSplitDir) throws IOException {

  org.apache.hadoop.mapred.InputSplit[] splits =
      generateOldSplits(jobConf, false, true, 0);

  JobSplitWriter.createSplitFiles(inputSplitDir, jobConf,
      inputSplitDir.getFileSystem(jobConf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        TaskLocationHint.createTaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null)
    );
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobConf.getCredentials());
}
 
源代码10 项目: circus-train   文件: S3MapReduceCp.java
/**
 * Create a default working folder for the job, under the job staging directory
 *
 * @return Returns the working folder information
 * @throws Exception - EXception if any
 */
private Path createMetaFolderPath() throws Exception {
  Configuration configuration = getConf();
  Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(configuration), configuration);
  Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
  LOG.debug("Meta folder location: {}", metaFolderPath);
  configuration.set(S3MapReduceCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());
  return metaFolderPath;
}
 
源代码11 项目: hadoop   文件: SplitMetaInfoReader.java
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(
    JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) 
throws IOException {
  long maxMetaInfoSize = conf.getLong(MRJobConfig.SPLIT_METAINFO_MAXSIZE,
      MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE);
  Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
  String jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString();
  FileStatus fStatus = fs.getFileStatus(metaSplitFile);
  if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
    throw new IOException("Split metadata size exceeded " +
        maxMetaInfoSize +". Aborting job " + jobId);
  }
  FSDataInputStream in = fs.open(metaSplitFile);
  byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
  in.readFully(header);
  if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
    throw new IOException("Invalid header on split file");
  }
  int vers = WritableUtils.readVInt(in);
  if (vers != JobSplit.META_SPLIT_VERSION) {
    in.close();
    throw new IOException("Unsupported split version " + vers);
  }
  int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values
  JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = 
    new JobSplit.TaskSplitMetaInfo[numSplits];
  for (int i = 0; i < numSplits; i++) {
    JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
    splitMetaInfo.readFields(in);
    JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
        jobSplitFile, 
        splitMetaInfo.getStartOffset());
    allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, 
        splitMetaInfo.getLocations(), 
        splitMetaInfo.getInputDataLength());
  }
  in.close();
  return allSplitMetaInfo;
}
 
源代码12 项目: hadoop   文件: JobSplitWriter.java
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, T[] splits) 
throws IOException, InterruptedException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
源代码13 项目: hadoop   文件: JobSplitWriter.java
public static void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, 
    org.apache.hadoop.mapred.InputSplit[] splits) 
throws IOException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
源代码14 项目: hadoop   文件: JobSplitWriter.java
private static FSDataOutputStream createFile(FileSystem fs, Path splitFile, 
    Configuration job)  throws IOException {
  FSDataOutputStream out = FileSystem.create(fs, splitFile, 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION));
  int replication = job.getInt(Job.SUBMIT_REPLICATION, 10);
  fs.setReplication(splitFile, (short)replication);
  writeSplitHeader(out);
  return out;
}
 
源代码15 项目: hadoop   文件: DistCp.java
/**
 * Create a default working folder for the job, under the
 * job staging directory
 *
 * @return Returns the working folder information
 * @throws Exception - EXception if any
 */
private Path createMetaFolderPath() throws Exception {
  Configuration configuration = getConf();
  Path stagingDir = JobSubmissionFiles.getStagingDir(
          new Cluster(configuration), configuration);
  Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
  if (LOG.isDebugEnabled())
    LOG.debug("Meta folder location: " + metaFolderPath);
  configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());    
  return metaFolderPath;
}
 
源代码16 项目: big-c   文件: SplitMetaInfoReader.java
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(
    JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) 
throws IOException {
  long maxMetaInfoSize = conf.getLong(MRJobConfig.SPLIT_METAINFO_MAXSIZE,
      MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE);
  Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
  String jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString();
  FileStatus fStatus = fs.getFileStatus(metaSplitFile);
  if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
    throw new IOException("Split metadata size exceeded " +
        maxMetaInfoSize +". Aborting job " + jobId);
  }
  FSDataInputStream in = fs.open(metaSplitFile);
  byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
  in.readFully(header);
  if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
    throw new IOException("Invalid header on split file");
  }
  int vers = WritableUtils.readVInt(in);
  if (vers != JobSplit.META_SPLIT_VERSION) {
    in.close();
    throw new IOException("Unsupported split version " + vers);
  }
  int numSplits = WritableUtils.readVInt(in); //TODO: check for insane values
  JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = 
    new JobSplit.TaskSplitMetaInfo[numSplits];
  for (int i = 0; i < numSplits; i++) {
    JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
    splitMetaInfo.readFields(in);
    JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(
        jobSplitFile, 
        splitMetaInfo.getStartOffset());
    allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, 
        splitMetaInfo.getLocations(), 
        splitMetaInfo.getInputDataLength());
  }
  in.close();
  return allSplitMetaInfo;
}
 
源代码17 项目: big-c   文件: JobSplitWriter.java
public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, T[] splits) 
throws IOException, InterruptedException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
源代码18 项目: big-c   文件: JobSplitWriter.java
public static void createSplitFiles(Path jobSubmitDir, 
    Configuration conf, FileSystem fs, 
    org.apache.hadoop.mapred.InputSplit[] splits) 
throws IOException {
  FSDataOutputStream out = createFile(fs, 
      JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
  SplitMetaInfo[] info = writeOldSplits(splits, out, conf);
  out.close();
  writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
      info);
}
 
源代码19 项目: big-c   文件: JobSplitWriter.java
private static FSDataOutputStream createFile(FileSystem fs, Path splitFile, 
    Configuration job)  throws IOException {
  FSDataOutputStream out = FileSystem.create(fs, splitFile, 
      new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION));
  int replication = job.getInt(Job.SUBMIT_REPLICATION, 10);
  fs.setReplication(splitFile, (short)replication);
  writeSplitHeader(out);
  return out;
}
 
源代码20 项目: big-c   文件: DistCp.java
/**
 * Create a default working folder for the job, under the
 * job staging directory
 *
 * @return Returns the working folder information
 * @throws Exception - EXception if any
 */
private Path createMetaFolderPath() throws Exception {
  Configuration configuration = getConf();
  Path stagingDir = JobSubmissionFiles.getStagingDir(
          new Cluster(configuration), configuration);
  Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
  if (LOG.isDebugEnabled())
    LOG.debug("Meta folder location: " + metaFolderPath);
  configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());    
  return metaFolderPath;
}
 
源代码21 项目: tez   文件: MRInputHelpers.java
/**
 * Generate new-api mapreduce InputFormat splits
 * @param jobContext JobContext required by InputFormat
 * @param inputSplitDir Directory in which to generate splits information
 *
 * @return InputSplitInfo containing the split files' information and the
 * location hints for each split generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk writeNewSplits(JobContext jobContext,
                                                 Path inputSplitDir) throws IOException, InterruptedException,
    ClassNotFoundException {

  org.apache.hadoop.mapreduce.InputSplit[] splits =
      generateNewSplits(jobContext, false, true, 0);

  Configuration conf = jobContext.getConfiguration();

  JobSplitWriter.createSplitFiles(inputSplitDir, conf,
      inputSplitDir.getFileSystem(conf), splits);

  List<TaskLocationHint> locationHints =
      new ArrayList<TaskLocationHint>(splits.length);
  for (int i = 0; i < splits.length; ++i) {
    locationHints.add(
        TaskLocationHint.createTaskLocationHint(new HashSet<String>(
            Arrays.asList(splits[i].getLocations())), null)
    );
  }

  return new InputSplitInfoDisk(
      JobSubmissionFiles.getJobSplitFile(inputSplitDir),
      JobSubmissionFiles.getJobSplitMetaFile(inputSplitDir),
      splits.length, locationHints, jobContext.getCredentials());
}
 
源代码22 项目: hbase   文件: JobUtil.java
/**
 * Initializes the staging directory and returns the qualified path.
 *
 * @param conf conf system configuration
 * @return qualified staging directory path
 * @throws IOException if the ownership on the staging directory is not as expected
 * @throws InterruptedException if the thread getting the staging directory is interrupted
 */
public static Path getQualifiedStagingDir(Configuration conf)
  throws IOException, InterruptedException {
  Cluster cluster = new Cluster(conf);
  Path stagingDir = JobSubmissionFiles.getStagingDir(cluster, conf);
  return cluster.getFileSystem().makeQualified(stagingDir);
}
 
源代码23 项目: hbase   文件: JobUtil.java
/**
 * Initializes the staging directory and returns the path.
 *
 * @param conf system configuration
 * @return staging directory path
 * @throws IOException if the ownership on the staging directory is not as expected
 * @throws InterruptedException if the thread getting the staging directory is interrupted
 */
public static Path getStagingDir(Configuration conf)
    throws IOException, InterruptedException {
  return JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
}
 
 同包方法