下面列出了org.apache.hadoop.fs.Path#getFileSystem ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public boolean needsTaskCommit(TaskAttemptContext context)
throws IOException {
try {
Path taskOutputPath = getTempTaskOutputPath(context);
if (taskOutputPath != null) {
context.getProgressible().progress();
// Get the file-system for the task output directory
FileSystem fs = taskOutputPath.getFileSystem(context.getJobConf());
// since task output path is created on demand,
// if it exists, task needs a commit
if (fs.exists(taskOutputPath)) {
return true;
}
}
} catch (IOException ioe) {
throw ioe;
}
return false;
}
@Test
public void testJksProvider() throws Exception {
Configuration conf = new Configuration();
final Path jksPath = new Path(tmpDir.toString(), "test.jks");
final String ourUrl =
JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri();
File file = new File(tmpDir, "test.jks");
file.delete();
conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, ourUrl);
checkSpecificProvider(conf, ourUrl);
Path path = ProviderUtils.unnestUri(new URI(ourUrl));
FileSystem fs = path.getFileSystem(conf);
FileStatus s = fs.getFileStatus(path);
assertTrue(s.getPermission().toString().equals("rwx------"));
assertTrue(file + " should exist", file.isFile());
// check permission retention after explicit change
fs.setPermission(path, new FsPermission("777"));
checkPermissionRetention(conf, ourUrl, path);
}
/**
* <p>
* This function returns the staging directory defined in the config with
* property name <code>TezConfiguration.TEZ_AM_STAGING_DIR</code>. If the
* property is not defined in the conf, Tez uses the value defined as
* <code>TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT</code>. In addition, the
* function makes sure if the staging directory exists. If not, it creates the
* directory with permission <code>TEZ_AM_DIR_PERMISSION</code>.
* </p>
*
* @param conf
* TEZ configuration
* @return Fully qualified staging directory
*/
public static Path getTezBaseStagingPath(Configuration conf) {
String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR,
TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT);
Path baseStagingDir;
try {
Path p = new Path(stagingDirStr);
FileSystem fs = p.getFileSystem(conf);
if (!fs.exists(p)) {
mkDirForAM(fs, p);
LOG.info("Stage directory " + p + " doesn't exist and is created");
}
baseStagingDir = fs.resolvePath(p);
} catch (IOException e) {
throw new TezUncheckedException(e);
}
return baseStagingDir;
}
public RecordWriter<Text,Text> getRecordWriter(TaskAttemptContext job
) throws IOException {
Path file = getDefaultWorkFile(job, "");
FileSystem fs = file.getFileSystem(job.getConfiguration());
FSDataOutputStream fileOut = fs.create(file);
return new TeraRecordWriter(fileOut, job);
}
@Override
public void setConf(Configuration conf) {
super.setConf(conf);
Path rootDir;
try {
rootDir = CommonFSUtils.getRootDir(conf);
rootDir.getFileSystem(conf);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
protected void setup(Context context) throws IOException,
InterruptedException {
conf = context.getConfiguration();
iteration = Integer.parseInt(conf.get("iteration.number"));
addIntercept = conf.getBoolean("add.intercept", false);
rho = conf.getFloat("rho", DEFAULT_RHO);
regularizationFactor = conf.getFloat("regularization.factor",
DEFAULT_REGULARIZATION_FACTOR);
previousIntermediateOutputLocation = conf
.get("previous.intermediate.output.location");
previousIntermediateOutputLocationPath = new Path(
previousIntermediateOutputLocation);
try {
fs = previousIntermediateOutputLocationPath.getFileSystem(conf);
} catch (IOException e) {
LOG.info(e.toString());
}
lbfgs = new QNMinimizer();
FileSplit split = (FileSplit) context.getInputSplit();
splitId = split.getPath() + ":" + Long.toString(split.getStart())
+ " - " + Long.toString(split.getLength());
splitId = removeIpFromHdfsFileName(splitId);
inputSplitData = new LinkedList<Vector>();
}
public void cleanUp() {
for (Codec codec: Codec.getCodecs()) {
Path tmpdir = new Path(codec.tmpParityDirectory, this.getJobID());
try {
FileSystem fs = tmpdir.getFileSystem(jobconf);
if (fs.exists(tmpdir)) {
fs.delete(tmpdir, true);
}
} catch (IOException ioe) {
LOG.error("Fail to delete " + tmpdir, ioe);
}
}
}
private static long getFileSize(String hdfsUrl) throws IOException {
Configuration configuration = new Configuration();
Path path = new Path(hdfsUrl);
FileSystem fs = path.getFileSystem(configuration);
ContentSummary contentSummary = fs.getContentSummary(path);
return contentSummary.getLength();
}
private static SequenceFile.Writer[] createWriters(Path testdir,
JobConf conf, int srcs, Path[] src) throws IOException {
for (int i = 0; i < srcs; ++i) {
src[i] = new Path(testdir, Integer.toString(i + 10, 36));
}
SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
for (int i = 0; i < srcs; ++i) {
out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
src[i], Text.class, Text.class);
}
return out;
}
public String[] getLocations() throws IOException {
HashSet<String> hostSet = new HashSet<String>();
for (Path file : getPaths()) {
FileSystem fs = file.getFileSystem(getJob());
FileStatus status = fs.getFileStatus(file);
BlockLocation[] blkLocations = fs.getFileBlockLocations(status,
0, status.getLen());
if (blkLocations != null && blkLocations.length > 0) {
addToSet(hostSet, blkLocations[0].getHosts());
}
}
return hostSet.toArray(new String[hostSet.size()]);
}
private static SequenceFile.Writer[] createWriters(Path testdir,
Configuration conf, int srcs, Path[] src) throws IOException {
for (int i = 0; i < srcs; ++i) {
src[i] = new Path(testdir, Integer.toString(i + 10, 36));
}
SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
for (int i = 0; i < srcs; ++i) {
out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
src[i], IntWritable.class, IntWritable.class);
}
return out;
}
public void runTestAndValidate(String selection, String validationSelection, String inputTable, String outputFile, boolean sort) throws Exception {
try {
deleteTableIfExists(outputFile);
test("use dfs_test");
// test("ALTER SESSION SET \"planner.add_producer_consumer\" = false");
String query = select(selection, inputTable, sort);
System.out.println(outputFile);
String create = "CREATE TABLE " + outputFile + " AS " + query;
String validateQuery = select(validationSelection, outputFile, sort);
test(create);
test(validateQuery); // TODO: remove
testBuilder()
.unOrdered()
.sqlQuery(validateQuery)
.sqlBaselineQuery(query)
.go();
Configuration hadoopConf = new Configuration();
Path output = new Path(getDfsTestTmpSchemaLocation(), outputFile);
FileSystem fs = output.getFileSystem(hadoopConf);
for (FileStatus file : fs.listStatus(output)) {
ParquetMetadata footer = ParquetFileReader.readFooter(hadoopConf, file, SKIP_ROW_GROUPS);
String version = footer.getFileMetaData().getKeyValueMetaData().get(DREMIO_VERSION_PROPERTY);
assertEquals(DremioVersionInfo.getVersion(), version);
PageHeaderUtil.validatePageHeaders(file.getPath(), footer);
}
} finally {
deleteTableIfExists(outputFile);
}
}
/**
* Creates the cache loader for the status loading cache. This should be used
* to create an instance of the status cache that is passed into the
* FSDownload constructor.
*/
public static CacheLoader<Path,Future<FileStatus>>
createStatusCacheLoader(final Configuration conf) {
return new CacheLoader<Path,Future<FileStatus>>() {
public Future<FileStatus> load(Path path) {
try {
FileSystem fs = path.getFileSystem(conf);
return Futures.immediateFuture(fs.getFileStatus(path));
} catch (Throwable th) {
// report failures so it can be memoized
return Futures.immediateFailedFuture(th);
}
}
};
}
static boolean sync(DistCpOptions inputOptions, Configuration conf)
throws IOException {
List<Path> sourcePaths = inputOptions.getSourcePaths();
if (sourcePaths.size() != 1) {
// we only support one source dir which must be a snapshottable directory
throw new IllegalArgumentException(sourcePaths.size()
+ " source paths are provided");
}
final Path sourceDir = sourcePaths.get(0);
final Path targetDir = inputOptions.getTargetPath();
final FileSystem sfs = sourceDir.getFileSystem(conf);
final FileSystem tfs = targetDir.getFileSystem(conf);
// currently we require both the source and the target file system are
// DistributedFileSystem.
if (!(sfs instanceof DistributedFileSystem) ||
!(tfs instanceof DistributedFileSystem)) {
throw new IllegalArgumentException("The FileSystems needs to" +
" be DistributedFileSystem for using snapshot-diff-based distcp");
}
final DistributedFileSystem sourceFs = (DistributedFileSystem) sfs;
final DistributedFileSystem targetFs= (DistributedFileSystem) tfs;
// make sure targetFS has no change between from and the current states
if (!checkNoChange(inputOptions, targetFs, targetDir)) {
// set the source path using the snapshot path
inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
inputOptions.getToSnapshot())));
return false;
}
Path tmpDir = null;
try {
tmpDir = createTargetTmpDir(targetFs, targetDir);
DiffInfo[] diffs = getDiffs(inputOptions, sourceFs, sourceDir, targetDir);
if (diffs == null) {
return false;
}
// do the real sync work: deletion and rename
syncDiff(diffs, targetFs, tmpDir);
return true;
} catch (Exception e) {
DistCp.LOG.warn("Failed to use snapshot diff for distcp", e);
return false;
} finally {
deleteTargetTmpDir(targetFs, tmpDir);
// TODO: since we have tmp directory, we can support "undo" with failures
// set the source path using the snapshot path
inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
inputOptions.getToSnapshot())));
}
}
private void writeFile(JobConf conf , String filename) throws IOException {
System.out.println("writing file ----" + filename);
Path outputPath = FileOutputFormat.getOutputPath(conf);
FileSystem fs = outputPath.getFileSystem(conf);
fs.create(new Path(outputPath, filename)).close();
}
public void copyFromLocalFile(Path local, Path destination, boolean overwrite)
throws IOException {
FileSystem fs = local.getFileSystem(configuration);
fs.copyFromLocalFile(false, overwrite, local, destination);
}
public int run(String [] argv) throws Exception {
JobConf job = new JobConf(getConf());
job.setJarByClass(GenericMRLoadGenerator.class);
job.setMapperClass(SampleMapper.class);
job.setReducerClass(SampleReducer.class);
if (!parseArgs(argv, job)) {
return -1;
}
if (null == FileOutputFormat.getOutputPath(job)) {
// No output dir? No writes
job.setOutputFormat(NullOutputFormat.class);
}
if (0 == FileInputFormat.getInputPaths(job).length) {
// No input dir? Generate random data
System.err.println("No input path; ignoring InputFormat");
confRandom(job);
} else if (null != job.getClass(
org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FORMAT,
null)) {
// specified IndirectInputFormat? Build src list
JobClient jClient = new JobClient(job);
Path tmpDir = new Path(jClient.getFs().getHomeDirectory(), ".staging");
Random r = new Random();
Path indirInputFile = new Path(tmpDir,
Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files");
job.set(
org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE,
indirInputFile.toString());
SequenceFile.Writer writer = SequenceFile.createWriter(
tmpDir.getFileSystem(job), job, indirInputFile,
LongWritable.class, Text.class,
SequenceFile.CompressionType.NONE);
try {
for (Path p : FileInputFormat.getInputPaths(job)) {
FileSystem fs = p.getFileSystem(job);
Stack<Path> pathstack = new Stack<Path>();
pathstack.push(p);
while (!pathstack.empty()) {
for (FileStatus stat : fs.listStatus(pathstack.pop())) {
if (stat.isDirectory()) {
if (!stat.getPath().getName().startsWith("_")) {
pathstack.push(stat.getPath());
}
} else {
writer.sync();
writer.append(new LongWritable(stat.getLen()),
new Text(stat.getPath().toUri().toString()));
}
}
}
}
} finally {
writer.close();
}
}
Date startTime = new Date();
System.out.println("Job started: " + startTime);
JobClient.runJob(job);
Date endTime = new Date();
System.out.println("Job ended: " + endTime);
System.out.println("The job took " +
(endTime.getTime() - startTime.getTime()) /1000 +
" seconds.");
return 0;
}
public List<InputSplit> getSplits(JobContext job) throws IOException {
boolean delimSplit = isSplitInput(job.getConfiguration());
//if delimSplit is true, size of each split is determined by
//Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
List<InputSplit> splits = super.getSplits(job);
if (!delimSplit) {
return splits;
}
if (splits.size()>= SPLIT_COUNT_LIMIT) {
//if #splits > 1 million, there is enough parallelism
//therefore no point to split
LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:"
+ SPLIT_COUNT_LIMIT);
DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
return splits;
}
// add header info into splits
List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
LOG.info(splits.size() + " DelimitedSplits generated");
Configuration conf = job.getConfiguration();
char delimiter =0;
ArrayList<Text> hlist = new ArrayList<Text>();
for (InputSplit file: splits) {
FileSplit fsplit = ((FileSplit)file);
Path path = fsplit.getPath();
FileSystem fs = path.getFileSystem(conf);
if (fsplit.getStart() == 0) {
// parse the inSplit, get the header
FSDataInputStream fileIn = fs.open(path);
String delimStr = conf.get(ConfigConstants.CONF_DELIMITER,
ConfigConstants.DEFAULT_DELIMITER);
if (delimStr.length() == 1) {
delimiter = delimStr.charAt(0);
} else {
LOG.error("Incorrect delimitor: " + delimiter
+ ". Expects single character.");
}
String encoding = conf.get(
MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
InputStreamReader instream = new InputStreamReader(fileIn, encoding);
CSVParser parser = new CSVParser(instream, CSVParserFormatter.
getFormat(delimiter, DelimitedTextReader.encapsulator,
true, true));
Iterator<CSVRecord> it = parser.iterator();
String[] header = null;
if (it.hasNext()) {
CSVRecord record = (CSVRecord)it.next();
Iterator<String> recordIterator = record.iterator();
int recordSize = record.size();
header = new String[recordSize];
for (int i = 0; i < recordSize; i++) {
if (recordIterator.hasNext()) {
header[i] = (String)recordIterator.next();
} else {
throw new IOException("Record size doesn't match the real size");
}
}
EncodingUtil.handleBOMUTF8(header, 0);
hlist.clear();
for (String s : header) {
hlist.add(new Text(s));
}
}
instream.close();
}
DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(
hlist.toArray(new Text[hlist.size()])), path,
fsplit.getStart(), fsplit.getLength(),
fsplit.getLocations());
populatedSplits.add(ds);
}
return populatedSplits;
}
@Override
public TableResponse createExternalTable(RpcController controller, CreateTableRequest request)
throws ServiceException {
try {
Session session = context.getSessionManager().getSession(request.getSessionId().getId());
QueryContext queryContext = new QueryContext(conf, session);
Path path = new Path(request.getPath());
FileSystem fs = path.getFileSystem(conf);
if (!fs.exists(path)) {
throw new UnavailableTableLocationException(path.toString(), "no such a directory");
}
Schema schema = null;
if (request.hasSchema()) {
schema = SchemaFactory.newV1(request.getSchema());
}
TableMeta meta = new TableMeta(request.getMeta());
PartitionMethodDesc partitionDesc = null;
if (request.hasPartition()) {
partitionDesc = new PartitionMethodDesc(request.getPartition());
}
TableDesc desc = context.getGlobalEngine().getDDLExecutor().getCreateTableExecutor().create(
queryContext,
request.getName(),
null,
schema,
meta,
path.toUri(),
true,
partitionDesc,
false
);
return TableResponse.newBuilder()
.setState(OK)
.setTable(desc.getProto()).build();
} catch (Throwable t) {
printStackTraceIfError(LOG, t);
return TableResponse.newBuilder()
.setState(returnError(t))
.build();
}
}
/**
*
* COPIED from CommonFSUtils.getRootDir
*
* @param c configuration
* @return {@link Path} to hbase root directory from
* configuration as a qualified Path.
* @throws IOException e
*/
public static Path getRootDir(final Configuration c) throws IOException {
Path p = new Path(c.get(HConstants.HBASE_DIR));
FileSystem fs = p.getFileSystem(c);
return p.makeQualified(fs.getUri(), fs.getWorkingDirectory());
}