org.apache.hadoop.fs.ContentSummary#getLength ( )源码实例Demo

下面列出了org.apache.hadoop.fs.ContentSummary#getLength ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: kylin-on-parquet-v2   文件: ColumnToRowJob.java
private int calReducerNum(Path input) {
    try {
        long bytesPerReducer = DEFAULT_SIZE_PER_REDUCER;
        FileSystem fs = FileSystem.get(job.getConfiguration());
        ContentSummary cs = fs.getContentSummary(input);
        long totalInputFileSize = cs.getLength();

        int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer);
        reducers = Math.max(1, reducers);
        reducers = Math.min(MAX_REDUCERS, reducers);
        logger.info("BytesPerReducer={}, maxReducers={}, totalInputFileSize={}, setReducers={}", bytesPerReducer,
                MAX_REDUCERS, totalInputFileSize, reducers);
        return reducers;
    } catch (IOException e) {
        logger.error("error when calculate reducer number", e);
    }
    return 1;
}
 
源代码2 项目: kylin   文件: ColumnToRowJob.java
private int calReducerNum(Path input) {
    try {
        long bytesPerReducer = DEFAULT_SIZE_PER_REDUCER;
        FileSystem fs = FileSystem.get(job.getConfiguration());
        ContentSummary cs = fs.getContentSummary(input);
        long totalInputFileSize = cs.getLength();

        int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer);
        reducers = Math.max(1, reducers);
        reducers = Math.min(MAX_REDUCERS, reducers);
        logger.info("BytesPerReducer={}, maxReducers={}, totalInputFileSize={}, setReducers={}", bytesPerReducer,
                MAX_REDUCERS, totalInputFileSize, reducers);
        return reducers;
    } catch (IOException e) {
        logger.error("error when calculate reducer number", e);
    }
    return 1;
}
 
源代码3 项目: jumbune   文件: DataValidationInputFormat.java
/**
 *  Finds files inside directories recusively and add to  fileStatusList
 * @param job refers to JobContext that is being used to read the configurations of the job that ran
 * @param minSize refers to the minimum file block size.
 * @param maxSize refers to the maximum file block size.
 * @param splits refers  to a list of splits that are being generated.
 * @param fileStatusList list of FileStatus
 * @throws IOException Signals that an I/O exception has occurred.
 */
public void setData(JobContext job, long minSize, long maxSize,
		List<InputSplit> splits, List<FileStatus> fileStatusList) throws IOException {
	for(FileStatus file:fileStatusList) {
		if (file.isDirectory()) {
			Path dirPath = file.getPath();
			FileStatus [] fileArray = dirPath.getFileSystem(job.getConfiguration()).listStatus(dirPath);
			setData(job, minSize, maxSize, splits, Arrays.asList(fileArray));
		} else {
			//Checking whether file is empty or not
			Path path  = file.getPath();
			FileSystem fs = path.getFileSystem(job.getConfiguration());
			ContentSummary cs = fs.getContentSummary(path);
			if (cs.getLength() > 0) {
				generateSplits(job, minSize, maxSize, splits, file);	
			} 
	    }
	}
}
 
源代码4 项目: incubator-tajo   文件: LogicalPlanner.java
private void updatePhysicalInfo(TableDesc desc) {
  if (desc.getPath() != null) {
    try {
      FileSystem fs = desc.getPath().getFileSystem(new Configuration());
      FileStatus status = fs.getFileStatus(desc.getPath());
      if (desc.getStats() != null && (status.isDirectory() || status.isFile())) {
        ContentSummary summary = fs.getContentSummary(desc.getPath());
        if (summary != null) {
          long volume = summary.getLength();
          desc.getStats().setNumBytes(volume);
        }
      }
    } catch (Throwable t) {
      LOG.warn(t);
    }
  }
}
 
源代码5 项目: kylin-on-parquet-v2   文件: MRHiveDictUtil.java
private static long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    return contentSummary.getLength();
}
 
private long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    long length = contentSummary.getLength();
    return length;
}
 
源代码7 项目: kylin-on-parquet-v2   文件: StorageCleanupJob.java
private void cleanUnusedHdfsFiles() throws IOException {

        UnusedHdfsFileCollector collector = new UnusedHdfsFileCollector();
        collectUnusedHdfsFiles(collector);

        if (collector.list.isEmpty()) {
            logger.info("No HDFS files to clean up");
            return;
        }

        long garbageBytes = 0;
        List<String> garbageList = new ArrayList<>();

        for (Pair<FileSystem, String> entry : collector.list) {
            FileSystem fs = entry.getKey();
            String path = entry.getValue();
            try {
                garbageList.add(path);
                ContentSummary sum = fs.getContentSummary(new Path(path));
                if (sum != null)
                    garbageBytes += sum.getLength();

                if (delete) {
                    logger.info("Deleting HDFS path " + path);
                    fs.delete(new Path(path), true);
                } else {
                    logger.info("Dry run, pending delete HDFS path " + path);
                }
            } catch (IOException e) {
                logger.error("Error dealing unused HDFS path " + path, e);
            }
        }

        hdfsGarbageFileBytes = garbageBytes;
        hdfsGarbageFiles = garbageList;
    }
 
源代码8 项目: kylin   文件: MRHiveDictUtil.java
private static long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    return contentSummary.getLength();
}
 
源代码9 项目: kylin   文件: CreateFlatHiveTableStep.java
private long getFileSize(String hdfsUrl) throws IOException {
    Configuration configuration = new Configuration();
    Path path = new Path(hdfsUrl);
    FileSystem fs = path.getFileSystem(configuration);
    ContentSummary contentSummary = fs.getContentSummary(path);
    long length = contentSummary.getLength();
    return length;
}
 
源代码10 项目: kylin   文件: StorageCleanupJob.java
private void cleanUnusedHdfsFiles() throws IOException {

        UnusedHdfsFileCollector collector = new UnusedHdfsFileCollector();
        collectUnusedHdfsFiles(collector);

        if (collector.list.isEmpty()) {
            logger.info("No HDFS files to clean up");
            return;
        }

        long garbageBytes = 0;
        List<String> garbageList = new ArrayList<>();

        for (Pair<FileSystem, String> entry : collector.list) {
            FileSystem fs = entry.getKey();
            String path = entry.getValue();
            try {
                garbageList.add(path);
                ContentSummary sum = fs.getContentSummary(new Path(path));
                if (sum != null)
                    garbageBytes += sum.getLength();

                if (delete) {
                    logger.info("Deleting HDFS path " + path);
                    fs.delete(new Path(path), true);
                } else {
                    logger.info("Dry run, pending delete HDFS path " + path);
                }
            } catch (IOException e) {
                logger.error("Error dealing unused HDFS path " + path, e);
            }
        }

        hdfsGarbageFileBytes = garbageBytes;
        hdfsGarbageFiles = garbageList;
    }
 
@Override
public long getOnDiskSize() throws IOException {
  Path hdfsDirPath = _shardContext.getHdfsDirPath();
  Configuration configuration = _tableContext.getConfiguration();
  FileSystem fileSystem = hdfsDirPath.getFileSystem(configuration);
  ContentSummary contentSummary = fileSystem.getContentSummary(hdfsDirPath);
  return contentSummary.getLength();
}
 
源代码12 项目: tajo   文件: Query.java
public static long getTableVolume(TajoConf systemConf, Path tablePath) throws IOException {
  FileSystem fs = tablePath.getFileSystem(systemConf);
  ContentSummary directorySummary = fs.getContentSummary(tablePath);
  return directorySummary.getLength();
}
 
源代码13 项目: incubator-tajo   文件: Query.java
private long getTableVolume(TajoConf systemConf, Path tablePath) throws IOException {
  FileSystem fs = tablePath.getFileSystem(systemConf);
  ContentSummary directorySummary = fs.getContentSummary(tablePath);
  return directorySummary.getLength();
}