下面列出了怎么用org.apache.hadoop.fs.Path的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Check the the metastore listing against the s3 listing and return any paths
* missing from s3.
*
* @param metastoreListing
* @param s3Listing
* @return
*/
private List<Path> checkListing(List<FileInfo> metastoreListing, FileStatus [] s3Listing) {
Map<String, FileStatus> s3paths = new HashMap<String, FileStatus>();
if(s3Listing != null) {
for (FileStatus fileStatus : s3Listing) {
s3paths.put(fileStatus.getPath().toUri().normalize().getSchemeSpecificPart(), fileStatus);
}
}
List<Path> missingPaths = new ArrayList<Path>();
for (FileInfo f : metastoreListing) {
if(f.isDeleted()) {
continue;
}
if (!s3paths.containsKey(f.getPath().toUri().normalize().getSchemeSpecificPart())) {
missingPaths.add(f.getPath());
}
}
return missingPaths;
}
private int traverseToLeaf(FileSystem fs, Path dirPath, int depth,
int expectedDepth, int expectedSpanCnt,
int expectedFileCnt, int perFileSizeInBytes)
throws IOException {
FileStatus[] fileStatuses = fs.listStatus(dirPath);
// check the num of peer directories except root and leaf as both
// has less dirs.
if (depth < expectedDepth - 1) {
verifyActualSpan(expectedSpanCnt, fileStatuses);
}
int actualNumFiles = 0;
for (FileStatus fileStatus : fileStatuses) {
if (fileStatus.isDirectory()) {
++depth;
return traverseToLeaf(fs, fileStatus.getPath(), depth, expectedDepth,
expectedSpanCnt, expectedFileCnt, perFileSizeInBytes);
} else {
Assert.assertEquals("Mismatches file len",
perFileSizeInBytes, fileStatus.getLen());
actualNumFiles++;
}
}
Assert.assertEquals("Mismatches files count in a directory",
expectedFileCnt, actualNumFiles);
return depth;
}
/**
* Configure a waiting job
*/
static void configureWaitingJobConf(JobConf jobConf, Path inDir,
Path outputPath, int numMaps, int numRed,
String jobName, String mapSignalFilename,
String redSignalFilename)
throws IOException {
jobConf.setJobName(jobName);
jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
jobConf.setOutputFormat(SequenceFileOutputFormat.class);
FileInputFormat.setInputPaths(jobConf, inDir);
FileOutputFormat.setOutputPath(jobConf, outputPath);
jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class);
jobConf.setReducerClass(IdentityReducer.class);
jobConf.setOutputKeyClass(BytesWritable.class);
jobConf.setOutputValueClass(BytesWritable.class);
jobConf.setInputFormat(RandomInputFormat.class);
jobConf.setNumMapTasks(numMaps);
jobConf.setNumReduceTasks(numRed);
jobConf.setJar("build/test/mapred/testjar/testjob.jar");
jobConf.set(getTaskSignalParameter(true), mapSignalFilename);
jobConf.set(getTaskSignalParameter(false), redSignalFilename);
}
@Test
public void testGetSampleWithSpecialPath() throws Exception {
RecordSet rs = getSimpleTestData(0);
writeRandomCsvFile(mini.getFs(), "/user/test/Marketing Customer Contacts US.CSV", rs, "UTF-8");
String fileSpec = mini.getFs().getUri().resolve(new Path("/user/test/Marketing Customer Contacts US.CSV").toUri()).toString();
//the method above will escape it, so make it back here as the customer set the path, should not escape one
fileSpec = fileSpec.replace("%20", " ");
// Configure the component.
SimpleFileIODatasetProperties props = createDatasetProperties();
props.format.setValue(SimpleFileIOFormat.CSV);
props.path.setValue(fileSpec);
final List<IndexedRecord> actual = getSample(props,Integer.MAX_VALUE);
assertThat(actual, hasSize(10));
}
public HiveTargetPathHelper(HiveDataset dataset) {
this.dataset = dataset;
this.relocateDataFiles = Boolean
.valueOf(this.dataset.getProperties().getProperty(RELOCATE_DATA_FILES_KEY, DEFAULT_RELOCATE_DATA_FILES));
this.targetTableRoot = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_ROOT)
? Optional.of(resolvePath(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_ROOT),
this.dataset.getTable().getDbName(), this.dataset.getTable().getTableName()))
: Optional.<Path> absent();
this.targetTablePrefixTobeReplaced =
this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED)
? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED)))
: Optional.<Path> absent();
this.targetTablePrefixReplacement = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_REPLACEMENT)
? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_REPLACEMENT)))
: Optional.<Path> absent();
}
private FileSystem getCachingFileSystem(HdfsContext context, Path path)
throws IOException
{
HdfsConfigurationInitializer configurationInitializer = new HdfsConfigurationInitializer(config, ImmutableSet.of());
HiveHdfsConfiguration configuration = new HiveHdfsConfiguration(
configurationInitializer,
ImmutableSet.of(
rubixConfigInitializer,
(dynamicConfig, ignoredContext, ignoredUri) -> {
dynamicConfig.set("fs.file.impl", CachingLocalFileSystem.class.getName());
dynamicConfig.setBoolean("fs.gs.lazy.init.enable", true);
dynamicConfig.set("fs.azure.account.key", "Zm9vCg==");
dynamicConfig.set("fs.adl.oauth2.client.id", "test");
dynamicConfig.set("fs.adl.oauth2.refresh.url", "http://localhost");
dynamicConfig.set("fs.adl.oauth2.credential", "password");
}));
HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
return environment.getFileSystem(context, path);
}
@Test
public void testOpenErrorMessageHFileLink() throws IOException, IllegalStateException {
// Test file link exception
// Try to open nonsense hfilelink. Make sure exception is from HFileLink.
Path p = new Path("/hbase/test/0123/cf/testtb=4567-abcd");
try (FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration())) {
StoreFileInfo sfi = new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, p, true);
try {
ReaderContext context = sfi.createReaderContext(false, 1000, ReaderType.PREAD);
sfi.createReader(context, null);
throw new IllegalStateException();
} catch (FileNotFoundException fnfe) {
assertTrue(fnfe.getMessage().contains(HFileLink.class.getSimpleName()));
}
}
}
private void writeAndReadFile(FileSystem fs,
String name,
Configuration conf,
long size
) throws IOException {
Path f = null;
try {
f = writeFile(fs, name, conf, size);
readFile(fs, f, name, conf);
} finally {
try {
if (f != null) {
fs.delete(f, true);
}
} catch (IOException ie) {
// IGNORE
}
}
}
@Override
public void abort() throws IOException {
List<String> errors = Lists.newArrayList();
for (Path location : cleanUpLocations) {
try {
if (fs.exists(location)) {
fs.delete(location, true);
logger.info("Aborting writer. Location [{}] on file system [{}] is deleted.",
location.toUri().getPath(), fs.getUri());
}
} catch (IOException e) {
errors.add(location.toUri().getPath());
logger.error("Failed to delete location [{}] on file system [{}].",
location, fs.getUri(), e);
}
}
if (!errors.isEmpty()) {
throw new IOException(String.format("Failed to delete the following locations %s on file system [%s]" +
" during aborting writer", errors, fs.getUri()));
}
}
public void readHftpFile(
boolean strictContentLength, boolean sendContentLength
)
throws IOException, URISyntaxException {
int bufSize = 128 * 1024;
byte[] buf = DFSTestUtil.generateSequentialBytes(0, bufSize);
final ByteArrayInputStream inputStream = new ByteArrayInputStream(buf);
final long contentLength = bufSize + 1;
Configuration conf = new Configuration();
conf.setBoolean(HftpFileSystem.STRICT_CONTENT_LENGTH, strictContentLength);
HftpFileSystem fileSystem =
new MockHftpFileSystem(
sendContentLength ? contentLength : null, inputStream, conf
);
FSDataInputStream dataInputStream = fileSystem.open(new Path("dont-care"));
byte[] readBuf = new byte[1024];
while (dataInputStream.read(readBuf) > -1) {
//nothing
}
dataInputStream.close();
}
@Test
public void testGlobFiles() throws Exception {
final Path sub1 = new Path(inputPath, "dir1");
final Path sub2 = new Path(inputPath, "dir2");
fs.mkdirs(sub1);
String fileName = "a";
createFile(inputPath, fs, sub1.getName(), fileName);
createFile(inputPath, fs, sub2.getName(), fileName);
createFile(inputPath, fs, sub1.getName(), "b"); // not part of result
final String glob = "dir{1,2}/a";
final FsShell shell = new FsShell(conf);
final List<String> originalPaths = lsr(shell, inputPath.toString(),
inputPath + "/" + glob);
System.out.println("originalPaths: " + originalPaths);
// make the archive:
final String fullHarPathStr = makeArchive(inputPath, glob);
// compare results:
final List<String> harPaths = lsr(shell, fullHarPathStr,
fullHarPathStr + "/" + glob);
Assert.assertEquals(originalPaths, harPaths);
}
@Test
public void testSingleFile() throws Exception {
final Path sub1 = new Path(inputPath, "dir1");
fs.mkdirs(sub1);
String singleFileName = "a";
createFile(inputPath, fs, sub1.getName(), singleFileName);
final FsShell shell = new FsShell(conf);
final List<String> originalPaths = lsr(shell, sub1.toString());
System.out.println("originalPaths: " + originalPaths);
// make the archive:
final String fullHarPathStr = makeArchive(sub1, singleFileName);
// compare results:
final List<String> harPaths = lsr(shell, fullHarPathStr);
Assert.assertEquals(originalPaths, harPaths);
}
@BeforeClass
public static void setUp() throws IOException {
JobConf conf = new JobConf();
fileSys = FileSystem.get(conf);
fileSys.delete(new Path(TEST_ROOT_DIR), true);
conf.set("mapred.job.tracker.handler.count", "1");
conf.set("mapred.job.tracker", "127.0.0.1:0");
conf.set("mapred.job.tracker.http.address", "127.0.0.1:0");
conf.set("mapred.task.tracker.http.address", "127.0.0.1:0");
conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, TEST_ROOT_DIR +
"/intermediate");
conf.set(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "true");
mr = new MiniMRCluster(1, "file:///", 1, null, null, conf);
inDir = new Path(TEST_ROOT_DIR, "test-input");
String input = "The quick brown fox\n" + "has many silly\n"
+ "red fox sox\n";
DataOutputStream file = fileSys.create(new Path(inDir, "part-" + 0));
file.writeBytes(input);
file.close();
emptyInDir = new Path(TEST_ROOT_DIR, "empty-input");
fileSys.mkdirs(emptyInDir);
}
/**
* Convert a nested URI to decode the underlying path. The translation takes
* the authority and parses it into the underlying scheme and authority.
* For example, "myscheme://[email protected]/my/path" is converted to
* "hdfs://nn/my/path".
* @param nestedUri the URI from the nested URI
* @return the unnested path
*/
public static Path unnestUri(URI nestedUri) {
String[] parts = nestedUri.getAuthority().split("@", 2);
StringBuilder result = new StringBuilder(parts[0]);
result.append("://");
if (parts.length == 2) {
result.append(parts[1]);
}
result.append(nestedUri.getPath());
if (nestedUri.getQuery() != null) {
result.append("?");
result.append(nestedUri.getQuery());
}
if (nestedUri.getFragment() != null) {
result.append("#");
result.append(nestedUri.getFragment());
}
return new Path(result.toString());
}
private void testDeleteCreatesFakeParentDir() throws Exception {
Path grandparent = new Path("/testDeleteCreatesFakeParentDir");
Path parent = new Path(grandparent, "parent");
Path child = new Path(parent, "child");
ContractTestUtils.touch(fs, child);
rootItemCount++; // grandparent
// Verify that parent dir key does not exist
// Creating a child should not add parent keys to the bucket
try {
getKey(parent, true);
} catch (IOException ex) {
assertKeyNotFoundException(ex);
}
// Delete the child key
fs.delete(child, false);
// Deleting the only child should create the parent dir key if it does
// not exist
String parentKey = o3fs.pathToKey(parent) + "/";
OzoneKeyDetails parentKeyInfo = getKey(parent, true);
assertEquals(parentKey, parentKeyInfo.getName());
}
/** @throws Exception If failed. */
@Test
public void testSetPermissionIfOutputStreamIsNotClosed() throws Exception {
Path fsHome = new Path(primaryFsUri);
Path file = new Path(fsHome, "myFile");
FsPermission perm = new FsPermission((short)123);
FSDataOutputStream os = fs.create(file, EnumSet.noneOf(CreateFlag.class),
Options.CreateOpts.perms(FsPermission.getDefault()));
fs.setPermission(file, perm);
os.close();
assertEquals(perm, fs.getFileStatus(file).getPermission());
}
@Override
public FSDataOutputStream create(Path f, FsPermission permission,
boolean overwrite, int bufferSize, short replication, long blockSize,
Progressable progress) throws IOException {
if (exists(f) && !overwrite) {
throw new IOException("File already exists:"+f);
}
Path absolutePath = makeAbsolute(f);
String key = pathToKey(absolutePath);
return new FSDataOutputStream(new NativeS3FsOutputStream(getConf(), store,
key, progress, bufferSize), statistics);
}
private static SequenceFile.Writer[] createWriters(Path testdir,
JobConf conf, int srcs, Path[] src) throws IOException {
for (int i = 0; i < srcs; ++i) {
src[i] = new Path(testdir, Integer.toString(i + 10, 36));
}
SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
for (int i = 0; i < srcs; ++i) {
out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
src[i], Text.class, Text.class);
}
return out;
}
private Path getRealHDFSPath(String resourcePath) {
if (resourcePath.equals("/"))
return this.hdfsMetaPath;
if (resourcePath.startsWith("/") && resourcePath.length() > 1)
resourcePath = resourcePath.substring(1, resourcePath.length());
return new Path(this.hdfsMetaPath, resourcePath);
}
public static Path[] getSequenceFilePaths( FileSystem fs, Path file )
throws IOException
{
Path[] ret = null;
//Note on object stores: Since the object store file system implementations
//only emulate a file system, the directory of a multi-part file does not
//exist physically and hence the isDirectory call returns false. Furthermore,
//listStatus call returns all files with the given directory as prefix, which
//includes the mtd file which needs to be ignored accordingly.
if( fs.isDirectory(file)
|| IOUtilFunctions.isObjectStoreFileScheme(file) )
{
LinkedList<Path> tmp = new LinkedList<>();
FileStatus[] dStatus = fs.listStatus(file);
for( FileStatus fdStatus : dStatus )
if( !fdStatus.getPath().getName().startsWith("_") //skip internal files
&& !fdStatus.getPath().toString().equals(file.toString()+".mtd") ) //mtd file
tmp.add(fdStatus.getPath());
ret = tmp.toArray(new Path[0]);
}
else {
ret = new Path[]{ file };
}
return ret;
}
@Test
public void testRegex() throws Exception {
String regex1 = "regex" + File.separator + "regex-small.txt";
File regexFile1 = new File(ClassLoader.getSystemClassLoader().getResource(regex1).getPath());
assertTrue(regex1 + " does not exist: " + regexFile1.getAbsolutePath(), regexFile1.exists());
Path input1 = new Path(tempDir, regex1);
addContentToFS(input1, Files.toByteArray(regexFile1));
String regex2 = "regex" + File.separator + "regex-small-2.txt";
File regexFile2 = new File(ClassLoader.getSystemClassLoader().getResource(regex2).getPath());
assertTrue(regex2 + " does not exist: " + regexFile2.getAbsolutePath(), regexFile2.exists());
Path input2 = new Path(tempDir, regex2);
addContentToFS(input2, Files.toByteArray(regexFile2));
String jobName = "testRegex";
String[] args = new JobArgs().withJobName(jobName).withClassname(RegexIngestMapper.class.getName())
.withCollection(DEFAULT_COLLECTION).withZkString(getBaseUrl())
.withInput(tempDir.toUri().toString() + File.separator + "regex" + File.separator +
"regex-small*")
.withDArgs("-D" + RegexIngestMapper.REGEX + "=\\w+", "-D" + RegexIngestMapper
.GROUPS_TO_FIELDS + "=0=match")
.getJobArgs();
int val = ToolRunner.run(conf, new IngestJob(), args);
assertEquals(0, val);
MockRecordWriter mockRecordWriter = IngestJobMockMapRedOutFormat.writers.get(jobName);
Assert.assertNotNull(mockRecordWriter);
assertEquals(2, mockRecordWriter.map.size());
}
@Override
public void commitJob(JobContext context) throws IOException {
Configuration conf = context.getConfiguration();
Path share = new Path(conf.get("share"));
FileSystem fs = FileSystem.get(conf);
while (true) {
if (fs.exists(share)) {
break;
}
UtilsForTests.waitFor(100);
}
super.commitJob(context);
}
public void append(HdfsConfig config, HdfsRule rule, Event event) {
try {
Configuration hadoopConfig = new Configuration();
FileSystem fileSystem = FileSystem.get(URI.create(this.address), hadoopConfig);
Path hdfsPath = new Path(fileName);
FSDataOutputStream fileOutputStream = null;
try {
if (fileSystem.exists(hdfsPath)) {
fileOutputStream = fileSystem.append(hdfsPath);
} else {
fileOutputStream = fileSystem.create(hdfsPath);
}
fileOutputStream.writeUTF(JSON.toJSONString(event));
} finally {
if (fileSystem != null) {
fileSystem.close();
}
if (fileOutputStream != null) {
fileOutputStream.close();
}
}
} catch (IOException e) {
}
}
/**
* Reads the schema from the base file.
*/
public static Schema readSchema(Configuration conf, Path filePath) {
try {
HoodieFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
return storageReader.getSchema();
} catch (IOException e) {
throw new HoodieIOException("Failed to read schema from " + filePath, e);
}
}
private void assertSingleShardedWritingWorks(GATKPath inputBam, String referenceFile, String outputPath, String outputPartsPath, boolean writeBai, boolean writeSbi, long sbiGranularity) throws IOException {
JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final GATKPath referencePath = referenceFile == null ? null : new GATKPath(referenceFile);
ReadsSparkSource readSource = new ReadsSparkSource(ctx);
JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, referencePath);
SAMFileHeader header = readSource.getHeader(inputBam, referencePath);
ReadsSparkSink.writeReads(ctx, outputPath, referencePath, rddParallelReads, header, ReadsWriteFormat.SINGLE, 0, outputPartsPath, writeBai, writeSbi, true, sbiGranularity);
// check that a bai file is created
if (new GATKPath(outputPath).isBam() && writeBai) {
Assert.assertTrue(Files.exists(IOUtils.getPath(outputPath + FileExtensions.BAI_INDEX)));
}
// check that a splitting bai file is created with correct granularity
if (new GATKPath(outputPath).isBam() && writeSbi) {
final java.nio.file.Path sbiPath = IOUtils.getPath(outputPath + FileExtensions.SBI);
Assert.assertTrue(Files.exists(sbiPath));
final SBIIndex sbi = SBIIndex.load(sbiPath);
Assert.assertEquals(sbi.getGranularity(), sbiGranularity);
}
JavaRDD<GATKRead> rddParallelReads2 = readSource.getParallelReads(new GATKPath(outputPath), referencePath);
final List<GATKRead> writtenReads = rddParallelReads2.collect();
assertReadsAreSorted(header, writtenReads);
Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count());
}
@Test
public void testValidateShardCountExtraDir() throws IOException {
File file = new File(TMPDIR, "ValidateShardCount-test");
rm(file);
Path path = new Path(file.toURI());
Configuration conf = new Configuration();
FileSystem fileSystem = path.getFileSystem(conf);
fileSystem.mkdirs(path);
int shardCount = 10;
createShardDirs(shardCount, fileSystem, path);
fileSystem.mkdirs(new Path(path, "logs"));
BlurUtil.validateShardCount(shardCount, fileSystem, path);
}
@Override
protected EventWriter createEventWriter(Path historyFilePath)
throws IOException {
if (mockHistoryProcessing) {
this.eventWriter = mock(EventWriter.class);
}
else {
this.eventWriter = super.createEventWriter(historyFilePath);
}
return this.eventWriter;
}
@Override
protected boolean isSplitable(JobContext context, Path file) {
final CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
if (null == codec) {
return true;
}
return codec instanceof SplittableCompressionCodec;
}
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: WordCount <input path> <result path>");
return;
}
final String inputPath = args[0];
final String outputPath = args[1];
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// Set up the Hadoop Input Format
Job job = Job.getInstance();
HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
TextInputFormat.addInputPath(job, new Path(inputPath));
// Create a Flink job with it
DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
// Tokenize the line and convert from Writable "Text" to String for better handling
DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
// Sum up the words
DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
// Convert String back to Writable "Text" for use with Hadoop Output Format
DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
// Set up Hadoop Output Format
HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
TextOutputFormat.setOutputPath(job, new Path(outputPath));
// Output & Execute
hadoopResult.output(hadoopOutputFormat);
env.execute("Word Count");
}
@Test
public void checkDeleteWithRefresh() {
List<String> allSegmentsInCluster = new ArrayList<>();
allSegmentsInCluster.add("mytable_0");
allSegmentsInCluster.add("mytable_1");
allSegmentsInCluster.add("mytable_2");
List<Path> currentSegments = new ArrayList<>();
currentSegments.add(new Path("mytable_0"));
SegmentTarPushJob segmentTarPushJob = new SegmentTarPushJob(_defaultProperties);
List<String> segmentsToDelete = segmentTarPushJob.getSegmentsToDelete(allSegmentsInCluster, currentSegments);
Assert.assertEquals(segmentsToDelete.size(), 2);
Assert.assertFalse(segmentsToDelete.contains("mytable_0"));
}