类org.apache.hadoop.fs.Path源码实例Demo

下面列出了怎么用org.apache.hadoop.fs.Path的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: s3mper   文件: ConsistentListingAspect.java
/**
 * Check the the metastore listing against the s3 listing and return any paths 
 * missing from s3.
 * 
 * @param metastoreListing
 * @param s3Listing
 * @return 
 */
private List<Path> checkListing(List<FileInfo> metastoreListing, FileStatus [] s3Listing) {
    Map<String, FileStatus> s3paths = new HashMap<String, FileStatus>();
        
    if(s3Listing != null) {
        for (FileStatus fileStatus : s3Listing) {
            s3paths.put(fileStatus.getPath().toUri().normalize().getSchemeSpecificPart(), fileStatus);
        }
    }

    List<Path> missingPaths = new ArrayList<Path>();

    for (FileInfo f : metastoreListing) {
        if(f.isDeleted()) {
            continue;
        }
        
        if (!s3paths.containsKey(f.getPath().toUri().normalize().getSchemeSpecificPart())) {
            missingPaths.add(f.getPath());
        }
    }
    
    return missingPaths;
}
 
源代码2 项目: hadoop-ozone   文件: TestHadoopDirTreeGenerator.java
private int traverseToLeaf(FileSystem fs, Path dirPath, int depth,
                           int expectedDepth, int expectedSpanCnt,
                           int expectedFileCnt, int perFileSizeInBytes)
        throws IOException {
  FileStatus[] fileStatuses = fs.listStatus(dirPath);
  // check the num of peer directories except root and leaf as both
  // has less dirs.
  if (depth < expectedDepth - 1) {
    verifyActualSpan(expectedSpanCnt, fileStatuses);
  }
  int actualNumFiles = 0;
  for (FileStatus fileStatus : fileStatuses) {
    if (fileStatus.isDirectory()) {
      ++depth;
      return traverseToLeaf(fs, fileStatus.getPath(), depth, expectedDepth,
              expectedSpanCnt, expectedFileCnt, perFileSizeInBytes);
    } else {
      Assert.assertEquals("Mismatches file len",
              perFileSizeInBytes, fileStatus.getLen());
      actualNumFiles++;
    }
  }
  Assert.assertEquals("Mismatches files count in a directory",
          expectedFileCnt, actualNumFiles);
  return depth;
}
 
源代码3 项目: hadoop   文件: UtilsForTests.java
/**
 * Configure a waiting job
 */
static void configureWaitingJobConf(JobConf jobConf, Path inDir,
                                    Path outputPath, int numMaps, int numRed,
                                    String jobName, String mapSignalFilename,
                                    String redSignalFilename)
throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setInputFormat(RandomInputFormat.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numRed);
  jobConf.setJar("build/test/mapred/testjar/testjob.jar");
  jobConf.set(getTaskSignalParameter(true), mapSignalFilename);
  jobConf.set(getTaskSignalParameter(false), redSignalFilename);
}
 
@Test
public void testGetSampleWithSpecialPath() throws Exception {
    RecordSet rs = getSimpleTestData(0);
    writeRandomCsvFile(mini.getFs(), "/user/test/Marketing Customer Contacts US.CSV", rs, "UTF-8");
    String fileSpec = mini.getFs().getUri().resolve(new Path("/user/test/Marketing Customer Contacts US.CSV").toUri()).toString();
    //the method above will escape it, so make it back here as the customer set the path, should not escape one
    fileSpec = fileSpec.replace("%20", " ");
    
    // Configure the component.
    SimpleFileIODatasetProperties props = createDatasetProperties();
    props.format.setValue(SimpleFileIOFormat.CSV);
    props.path.setValue(fileSpec);

    final List<IndexedRecord> actual = getSample(props,Integer.MAX_VALUE);

    assertThat(actual, hasSize(10));
}
 
源代码5 项目: incubator-gobblin   文件: HiveTargetPathHelper.java
public HiveTargetPathHelper(HiveDataset dataset) {

    this.dataset = dataset;
    this.relocateDataFiles = Boolean
        .valueOf(this.dataset.getProperties().getProperty(RELOCATE_DATA_FILES_KEY, DEFAULT_RELOCATE_DATA_FILES));
    this.targetTableRoot = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_ROOT)
        ? Optional.of(resolvePath(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_ROOT),
        this.dataset.getTable().getDbName(), this.dataset.getTable().getTableName()))
        : Optional.<Path> absent();

    this.targetTablePrefixTobeReplaced =
        this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED)
            ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED)))
            : Optional.<Path> absent();

    this.targetTablePrefixReplacement = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_REPLACEMENT)
        ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_REPLACEMENT)))
        : Optional.<Path> absent();
  }
 
源代码6 项目: presto   文件: TestRubixCaching.java
private FileSystem getCachingFileSystem(HdfsContext context, Path path)
        throws IOException
{
    HdfsConfigurationInitializer configurationInitializer = new HdfsConfigurationInitializer(config, ImmutableSet.of());
    HiveHdfsConfiguration configuration = new HiveHdfsConfiguration(
            configurationInitializer,
            ImmutableSet.of(
                    rubixConfigInitializer,
                    (dynamicConfig, ignoredContext, ignoredUri) -> {
                        dynamicConfig.set("fs.file.impl", CachingLocalFileSystem.class.getName());
                        dynamicConfig.setBoolean("fs.gs.lazy.init.enable", true);
                        dynamicConfig.set("fs.azure.account.key", "Zm9vCg==");
                        dynamicConfig.set("fs.adl.oauth2.client.id", "test");
                        dynamicConfig.set("fs.adl.oauth2.refresh.url", "http://localhost");
                        dynamicConfig.set("fs.adl.oauth2.credential", "password");
                    }));
    HdfsEnvironment environment = new HdfsEnvironment(configuration, config, new NoHdfsAuthentication());
    return environment.getFileSystem(context, path);
}
 
源代码7 项目: hbase   文件: TestStoreFileInfo.java
@Test
public void testOpenErrorMessageHFileLink() throws IOException, IllegalStateException {
  // Test file link exception
  // Try to open nonsense hfilelink. Make sure exception is from HFileLink.
  Path p = new Path("/hbase/test/0123/cf/testtb=4567-abcd");
  try (FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration())) {
    StoreFileInfo sfi = new StoreFileInfo(TEST_UTIL.getConfiguration(), fs, p, true);
    try {
      ReaderContext context = sfi.createReaderContext(false, 1000, ReaderType.PREAD);
      sfi.createReader(context, null);
      throw new IllegalStateException();
    } catch (FileNotFoundException fnfe) {
      assertTrue(fnfe.getMessage().contains(HFileLink.class.getSimpleName()));
    }
  }
}
 
源代码8 项目: hadoop   文件: BenchmarkThroughput.java
private void writeAndReadFile(FileSystem fs,
                                     String name,
                                     Configuration conf,
                                     long size
                                     ) throws IOException {
  Path f = null;
  try {
    f = writeFile(fs, name, conf, size);
    readFile(fs, f, name, conf);
  } finally {
    try {
      if (f != null) {
        fs.delete(f, true);
      }
    } catch (IOException ie) {
      // IGNORE
    }
  }
}
 
源代码9 项目: Bats   文件: ParquetRecordWriter.java
@Override
public void abort() throws IOException {
  List<String> errors = Lists.newArrayList();
  for (Path location : cleanUpLocations) {
    try {
      if (fs.exists(location)) {
        fs.delete(location, true);
        logger.info("Aborting writer. Location [{}] on file system [{}] is deleted.",
            location.toUri().getPath(), fs.getUri());
      }
    } catch (IOException e) {
      errors.add(location.toUri().getPath());
      logger.error("Failed to delete location [{}] on file system [{}].",
          location, fs.getUri(), e);
    }
  }
  if (!errors.isEmpty()) {
    throw new IOException(String.format("Failed to delete the following locations %s on file system [%s]" +
        " during aborting writer", errors, fs.getUri()));
  }
}
 
源代码10 项目: RDFS   文件: TestHftpFileSystem.java
public void readHftpFile(
  boolean strictContentLength, boolean sendContentLength
)
  throws IOException, URISyntaxException {
  int bufSize = 128 * 1024;
  byte[] buf = DFSTestUtil.generateSequentialBytes(0, bufSize);
  final ByteArrayInputStream inputStream = new ByteArrayInputStream(buf);
  final long contentLength = bufSize + 1;
  Configuration conf = new Configuration();

  conf.setBoolean(HftpFileSystem.STRICT_CONTENT_LENGTH, strictContentLength);

  HftpFileSystem fileSystem =
    new MockHftpFileSystem(
      sendContentLength ? contentLength : null, inputStream, conf
    );
  FSDataInputStream dataInputStream = fileSystem.open(new Path("dont-care"));
  byte[] readBuf = new byte[1024];

  while (dataInputStream.read(readBuf) > -1) {
    //nothing
  }

  dataInputStream.close();
}
 
源代码11 项目: hadoop   文件: TestHadoopArchives.java
@Test
public void testGlobFiles() throws Exception {
  final Path sub1 = new Path(inputPath, "dir1");
  final Path sub2 = new Path(inputPath, "dir2");
  fs.mkdirs(sub1);
  String fileName = "a";
  createFile(inputPath, fs, sub1.getName(), fileName);
  createFile(inputPath, fs, sub2.getName(), fileName);
  createFile(inputPath, fs, sub1.getName(), "b"); // not part of result

  final String glob =  "dir{1,2}/a";
  final FsShell shell = new FsShell(conf);
  final List<String> originalPaths = lsr(shell, inputPath.toString(),
      inputPath + "/" + glob);
  System.out.println("originalPaths: " + originalPaths);

  // make the archive:
  final String fullHarPathStr = makeArchive(inputPath, glob);

  // compare results:
  final List<String> harPaths = lsr(shell, fullHarPathStr,
      fullHarPathStr + "/" + glob);
  Assert.assertEquals(originalPaths, harPaths);
}
 
源代码12 项目: big-c   文件: TestHadoopArchives.java
@Test
public void testSingleFile() throws Exception {
  final Path sub1 = new Path(inputPath, "dir1");
  fs.mkdirs(sub1);
  String singleFileName = "a";
  createFile(inputPath, fs, sub1.getName(), singleFileName);
  final FsShell shell = new FsShell(conf);

  final List<String> originalPaths = lsr(shell, sub1.toString());
  System.out.println("originalPaths: " + originalPaths);

  // make the archive:
  final String fullHarPathStr = makeArchive(sub1, singleFileName);

  // compare results:
  final List<String> harPaths = lsr(shell, fullHarPathStr);
  Assert.assertEquals(originalPaths, harPaths);
}
 
源代码13 项目: big-c   文件: TestJobCleanup.java
@BeforeClass
public static void setUp() throws IOException {
  JobConf conf = new JobConf();
  fileSys = FileSystem.get(conf);
  fileSys.delete(new Path(TEST_ROOT_DIR), true);
  conf.set("mapred.job.tracker.handler.count", "1");
  conf.set("mapred.job.tracker", "127.0.0.1:0");
  conf.set("mapred.job.tracker.http.address", "127.0.0.1:0");
  conf.set("mapred.task.tracker.http.address", "127.0.0.1:0");
  conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, TEST_ROOT_DIR +
    "/intermediate");
  conf.set(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
    .SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "true");

  mr = new MiniMRCluster(1, "file:///", 1, null, null, conf);
  inDir = new Path(TEST_ROOT_DIR, "test-input");
  String input = "The quick brown fox\n" + "has many silly\n"
      + "red fox sox\n";
  DataOutputStream file = fileSys.create(new Path(inDir, "part-" + 0));
  file.writeBytes(input);
  file.close();
  emptyInDir = new Path(TEST_ROOT_DIR, "empty-input");
  fileSys.mkdirs(emptyInDir);
}
 
源代码14 项目: hadoop   文件: ProviderUtils.java
/**
 * Convert a nested URI to decode the underlying path. The translation takes
 * the authority and parses it into the underlying scheme and authority.
 * For example, "myscheme://[email protected]/my/path" is converted to
 * "hdfs://nn/my/path".
 * @param nestedUri the URI from the nested URI
 * @return the unnested path
 */
public static Path unnestUri(URI nestedUri) {
  String[] parts = nestedUri.getAuthority().split("@", 2);
  StringBuilder result = new StringBuilder(parts[0]);
  result.append("://");
  if (parts.length == 2) {
    result.append(parts[1]);
  }
  result.append(nestedUri.getPath());
  if (nestedUri.getQuery() != null) {
    result.append("?");
    result.append(nestedUri.getQuery());
  }
  if (nestedUri.getFragment() != null) {
    result.append("#");
    result.append(nestedUri.getFragment());
  }
  return new Path(result.toString());
}
 
源代码15 项目: hadoop-ozone   文件: TestOzoneFileSystem.java
private void testDeleteCreatesFakeParentDir() throws Exception {
  Path grandparent = new Path("/testDeleteCreatesFakeParentDir");
  Path parent = new Path(grandparent, "parent");
  Path child = new Path(parent, "child");
  ContractTestUtils.touch(fs, child);
  rootItemCount++; // grandparent

  // Verify that parent dir key does not exist
  // Creating a child should not add parent keys to the bucket
  try {
    getKey(parent, true);
  } catch (IOException ex) {
    assertKeyNotFoundException(ex);
  }

  // Delete the child key
  fs.delete(child, false);

  // Deleting the only child should create the parent dir key if it does
  // not exist
  String parentKey = o3fs.pathToKey(parent) + "/";
  OzoneKeyDetails parentKeyInfo = getKey(parent, true);
  assertEquals(parentKey, parentKeyInfo.getName());
}
 
/** @throws Exception If failed. */
@Test
public void testSetPermissionIfOutputStreamIsNotClosed() throws Exception {
    Path fsHome = new Path(primaryFsUri);
    Path file = new Path(fsHome, "myFile");

    FsPermission perm = new FsPermission((short)123);

    FSDataOutputStream os = fs.create(file, EnumSet.noneOf(CreateFlag.class),
        Options.CreateOpts.perms(FsPermission.getDefault()));

    fs.setPermission(file, perm);

    os.close();

    assertEquals(perm, fs.getFileStatus(file).getPermission());
}
 
源代码17 项目: RDFS   文件: NativeS3FileSystem.java
@Override
public FSDataOutputStream create(Path f, FsPermission permission,
    boolean overwrite, int bufferSize, short replication, long blockSize,
    Progressable progress) throws IOException {

  if (exists(f) && !overwrite) {
    throw new IOException("File already exists:"+f);
  }
  Path absolutePath = makeAbsolute(f);
  String key = pathToKey(absolutePath);
  return new FSDataOutputStream(new NativeS3FsOutputStream(getConf(), store,
      key, progress, bufferSize), statistics);
}
 
源代码18 项目: RDFS   文件: TestDataJoin.java
private static SequenceFile.Writer[] createWriters(Path testdir,
    JobConf conf, int srcs, Path[] src) throws IOException {
  for (int i = 0; i < srcs; ++i) {
    src[i] = new Path(testdir, Integer.toString(i + 10, 36));
  }
  SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
  for (int i = 0; i < srcs; ++i) {
    out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
        src[i], Text.class, Text.class);
  }
  return out;
}
 
源代码19 项目: kylin-on-parquet-v2   文件: HDFSResourceStore.java
private Path getRealHDFSPath(String resourcePath) {
    if (resourcePath.equals("/"))
        return this.hdfsMetaPath;
    if (resourcePath.startsWith("/") && resourcePath.length() > 1)
        resourcePath = resourcePath.substring(1, resourcePath.length());
    return new Path(this.hdfsMetaPath, resourcePath);
}
 
源代码20 项目: systemds   文件: IOUtilFunctions.java
public static Path[] getSequenceFilePaths( FileSystem fs, Path file ) 
	throws IOException
{
	Path[] ret = null;
	
	//Note on object stores: Since the object store file system implementations 
	//only emulate a file system, the directory of a multi-part file does not
	//exist physically and hence the isDirectory call returns false. Furthermore,
	//listStatus call returns all files with the given directory as prefix, which
	//includes the mtd file which needs to be ignored accordingly.
	
	if( fs.isDirectory(file) 
		|| IOUtilFunctions.isObjectStoreFileScheme(file) )
	{
		LinkedList<Path> tmp = new LinkedList<>();
		FileStatus[] dStatus = fs.listStatus(file);
		for( FileStatus fdStatus : dStatus )
			if( !fdStatus.getPath().getName().startsWith("_") //skip internal files
				&& !fdStatus.getPath().toString().equals(file.toString()+".mtd") ) //mtd file
				tmp.add(fdStatus.getPath());
		ret = tmp.toArray(new Path[0]);
	}
	else {
		ret = new Path[]{ file };
	}
	
	return ret;
}
 
源代码21 项目: hadoop-solr   文件: IngestJobTest.java
@Test
public void testRegex() throws Exception {
  String regex1 = "regex" + File.separator + "regex-small.txt";
  File regexFile1 = new File(ClassLoader.getSystemClassLoader().getResource(regex1).getPath());
  assertTrue(regex1 + " does not exist: " + regexFile1.getAbsolutePath(), regexFile1.exists());
  Path input1 = new Path(tempDir, regex1);
  addContentToFS(input1, Files.toByteArray(regexFile1));

  String regex2 = "regex" + File.separator + "regex-small-2.txt";
  File regexFile2 = new File(ClassLoader.getSystemClassLoader().getResource(regex2).getPath());
  assertTrue(regex2 + " does not exist: " + regexFile2.getAbsolutePath(), regexFile2.exists());
  Path input2 = new Path(tempDir, regex2);
  addContentToFS(input2, Files.toByteArray(regexFile2));

  String jobName = "testRegex";

  String[] args = new JobArgs().withJobName(jobName).withClassname(RegexIngestMapper.class.getName())
      .withCollection(DEFAULT_COLLECTION).withZkString(getBaseUrl())
      .withInput(tempDir.toUri().toString() + File.separator + "regex" + File.separator +
          "regex-small*")
      .withDArgs("-D" + RegexIngestMapper.REGEX + "=\\w+", "-D" + RegexIngestMapper
          .GROUPS_TO_FIELDS + "=0=match")
      .getJobArgs();

  int val = ToolRunner.run(conf, new IngestJob(), args);
  assertEquals(0, val);
  MockRecordWriter mockRecordWriter = IngestJobMockMapRedOutFormat.writers.get(jobName);
  Assert.assertNotNull(mockRecordWriter);
  assertEquals(2, mockRecordWriter.map.size());
}
 
源代码22 项目: RDFS   文件: TestEmptyJob.java
@Override
public void commitJob(JobContext context) throws IOException {
  Configuration conf = context.getConfiguration();
  Path share = new Path(conf.get("share"));
  FileSystem fs = FileSystem.get(conf);

  
  while (true) {
    if (fs.exists(share)) {
      break;
    }
    UtilsForTests.waitFor(100);
  }
  super.commitJob(context);
}
 
源代码23 项目: tunnel   文件: HdfsClient.java
public void append(HdfsConfig config, HdfsRule rule, Event event) {
    try {
        Configuration hadoopConfig = new Configuration();
        FileSystem fileSystem = FileSystem.get(URI.create(this.address), hadoopConfig);
        Path hdfsPath = new Path(fileName);
        FSDataOutputStream fileOutputStream = null;
        try {
            if (fileSystem.exists(hdfsPath)) {
                fileOutputStream = fileSystem.append(hdfsPath);
            } else {
                fileOutputStream = fileSystem.create(hdfsPath);
            }
            fileOutputStream.writeUTF(JSON.toJSONString(event));

        } finally {
            if (fileSystem != null) {
                fileSystem.close();
            }
            if (fileOutputStream != null) {
                fileOutputStream.close();
            }
        }
    } catch (IOException e) {

    }

}
 
源代码24 项目: hudi   文件: HoodieRealtimeRecordReaderUtils.java
/**
 * Reads the schema from the base file.
 */
public static Schema readSchema(Configuration conf, Path filePath) {
  try {
    HoodieFileReader storageReader = HoodieFileReaderFactory.getFileReader(conf, filePath);
    return storageReader.getSchema();
  } catch (IOException e) {
    throw new HoodieIOException("Failed to read schema from " + filePath, e);
  }
}
 
源代码25 项目: gatk   文件: ReadsSparkSinkUnitTest.java
private void assertSingleShardedWritingWorks(GATKPath inputBam, String referenceFile, String outputPath, String outputPartsPath, boolean writeBai, boolean writeSbi, long sbiGranularity) throws IOException {
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();

    final GATKPath referencePath = referenceFile == null ? null : new GATKPath(referenceFile);

    ReadsSparkSource readSource = new ReadsSparkSource(ctx);
    JavaRDD<GATKRead> rddParallelReads = readSource.getParallelReads(inputBam, referencePath);
    SAMFileHeader header = readSource.getHeader(inputBam, referencePath);

    ReadsSparkSink.writeReads(ctx, outputPath, referencePath, rddParallelReads, header, ReadsWriteFormat.SINGLE, 0, outputPartsPath, writeBai, writeSbi, true, sbiGranularity);

    // check that a bai file is created
    if (new GATKPath(outputPath).isBam() && writeBai) {
        Assert.assertTrue(Files.exists(IOUtils.getPath(outputPath + FileExtensions.BAI_INDEX)));
    }
    // check that a splitting bai file is created with correct granularity
    if (new GATKPath(outputPath).isBam() && writeSbi) {
        final java.nio.file.Path sbiPath = IOUtils.getPath(outputPath + FileExtensions.SBI);
        Assert.assertTrue(Files.exists(sbiPath));
        final SBIIndex sbi = SBIIndex.load(sbiPath);
        Assert.assertEquals(sbi.getGranularity(), sbiGranularity);
    }

    JavaRDD<GATKRead> rddParallelReads2 = readSource.getParallelReads(new GATKPath(outputPath), referencePath);
    final List<GATKRead> writtenReads = rddParallelReads2.collect();

    assertReadsAreSorted(header, writtenReads);
    Assert.assertEquals(rddParallelReads.count(), rddParallelReads2.count());
}
 
源代码26 项目: incubator-retired-blur   文件: BlurUtilsTest.java
@Test
public void testValidateShardCountExtraDir() throws IOException {
  File file = new File(TMPDIR, "ValidateShardCount-test");
  rm(file);
  Path path = new Path(file.toURI());
  Configuration conf = new Configuration();
  FileSystem fileSystem = path.getFileSystem(conf);
  fileSystem.mkdirs(path);
  int shardCount = 10;
  createShardDirs(shardCount, fileSystem, path);
  fileSystem.mkdirs(new Path(path, "logs"));
  BlurUtil.validateShardCount(shardCount, fileSystem, path);
}
 
源代码27 项目: big-c   文件: TestJobHistoryEventHandler.java
@Override
protected EventWriter createEventWriter(Path historyFilePath)
    throws IOException {
  if (mockHistoryProcessing) {
    this.eventWriter = mock(EventWriter.class);
  }
  else {
    this.eventWriter = super.createEventWriter(historyFilePath);
  }
  return this.eventWriter;
}
 
源代码28 项目: hadoop   文件: KeyValueTextInputFormat.java
@Override
protected boolean isSplitable(JobContext context, Path file) {
  final CompressionCodec codec =
    new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
  if (null == codec) {
    return true;
  }
  return codec instanceof SplittableCompressionCodec;
}
 
源代码29 项目: flink   文件: WordCount.java
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: WordCount <input path> <result path>");
		return;
	}

	final String inputPath = args[0];
	final String outputPath = args[1];

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Set up the Hadoop Input Format
	Job job = Job.getInstance();
	HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
	TextInputFormat.addInputPath(job, new Path(inputPath));

	// Create a Flink job with it
	DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);

	// Tokenize the line and convert from Writable "Text" to String for better handling
	DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());

	// Sum up the words
	DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

	// Convert String back to Writable "Text" for use with Hadoop Output Format
	DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());

	// Set up Hadoop Output Format
	HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
	hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
	hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
	TextOutputFormat.setOutputPath(job, new Path(outputPath));

	// Output & Execute
	hadoopResult.output(hadoopOutputFormat);
	env.execute("Word Count");
}
 
@Test
public void checkDeleteWithRefresh() {
  List<String> allSegmentsInCluster = new ArrayList<>();
  allSegmentsInCluster.add("mytable_0");
  allSegmentsInCluster.add("mytable_1");
  allSegmentsInCluster.add("mytable_2");

  List<Path> currentSegments = new ArrayList<>();
  currentSegments.add(new Path("mytable_0"));
  SegmentTarPushJob segmentTarPushJob = new SegmentTarPushJob(_defaultProperties);
  List<String> segmentsToDelete = segmentTarPushJob.getSegmentsToDelete(allSegmentsInCluster, currentSegments);
  Assert.assertEquals(segmentsToDelete.size(), 2);
  Assert.assertFalse(segmentsToDelete.contains("mytable_0"));
}
 
 类所在包
 同包方法