下面列出了org.apache.hadoop.fs.ContentSummary#getLength ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private int calReducerNum(Path input) {
try {
long bytesPerReducer = DEFAULT_SIZE_PER_REDUCER;
FileSystem fs = FileSystem.get(job.getConfiguration());
ContentSummary cs = fs.getContentSummary(input);
long totalInputFileSize = cs.getLength();
int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer);
reducers = Math.max(1, reducers);
reducers = Math.min(MAX_REDUCERS, reducers);
logger.info("BytesPerReducer={}, maxReducers={}, totalInputFileSize={}, setReducers={}", bytesPerReducer,
MAX_REDUCERS, totalInputFileSize, reducers);
return reducers;
} catch (IOException e) {
logger.error("error when calculate reducer number", e);
}
return 1;
}
private int calReducerNum(Path input) {
try {
long bytesPerReducer = DEFAULT_SIZE_PER_REDUCER;
FileSystem fs = FileSystem.get(job.getConfiguration());
ContentSummary cs = fs.getContentSummary(input);
long totalInputFileSize = cs.getLength();
int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer);
reducers = Math.max(1, reducers);
reducers = Math.min(MAX_REDUCERS, reducers);
logger.info("BytesPerReducer={}, maxReducers={}, totalInputFileSize={}, setReducers={}", bytesPerReducer,
MAX_REDUCERS, totalInputFileSize, reducers);
return reducers;
} catch (IOException e) {
logger.error("error when calculate reducer number", e);
}
return 1;
}
/**
* Finds files inside directories recusively and add to fileStatusList
* @param job refers to JobContext that is being used to read the configurations of the job that ran
* @param minSize refers to the minimum file block size.
* @param maxSize refers to the maximum file block size.
* @param splits refers to a list of splits that are being generated.
* @param fileStatusList list of FileStatus
* @throws IOException Signals that an I/O exception has occurred.
*/
public void setData(JobContext job, long minSize, long maxSize,
List<InputSplit> splits, List<FileStatus> fileStatusList) throws IOException {
for(FileStatus file:fileStatusList) {
if (file.isDirectory()) {
Path dirPath = file.getPath();
FileStatus [] fileArray = dirPath.getFileSystem(job.getConfiguration()).listStatus(dirPath);
setData(job, minSize, maxSize, splits, Arrays.asList(fileArray));
} else {
//Checking whether file is empty or not
Path path = file.getPath();
FileSystem fs = path.getFileSystem(job.getConfiguration());
ContentSummary cs = fs.getContentSummary(path);
if (cs.getLength() > 0) {
generateSplits(job, minSize, maxSize, splits, file);
}
}
}
}
private void updatePhysicalInfo(TableDesc desc) {
if (desc.getPath() != null) {
try {
FileSystem fs = desc.getPath().getFileSystem(new Configuration());
FileStatus status = fs.getFileStatus(desc.getPath());
if (desc.getStats() != null && (status.isDirectory() || status.isFile())) {
ContentSummary summary = fs.getContentSummary(desc.getPath());
if (summary != null) {
long volume = summary.getLength();
desc.getStats().setNumBytes(volume);
}
}
} catch (Throwable t) {
LOG.warn(t);
}
}
}
private static long getFileSize(String hdfsUrl) throws IOException {
Configuration configuration = new Configuration();
Path path = new Path(hdfsUrl);
FileSystem fs = path.getFileSystem(configuration);
ContentSummary contentSummary = fs.getContentSummary(path);
return contentSummary.getLength();
}
private long getFileSize(String hdfsUrl) throws IOException {
Configuration configuration = new Configuration();
Path path = new Path(hdfsUrl);
FileSystem fs = path.getFileSystem(configuration);
ContentSummary contentSummary = fs.getContentSummary(path);
long length = contentSummary.getLength();
return length;
}
private void cleanUnusedHdfsFiles() throws IOException {
UnusedHdfsFileCollector collector = new UnusedHdfsFileCollector();
collectUnusedHdfsFiles(collector);
if (collector.list.isEmpty()) {
logger.info("No HDFS files to clean up");
return;
}
long garbageBytes = 0;
List<String> garbageList = new ArrayList<>();
for (Pair<FileSystem, String> entry : collector.list) {
FileSystem fs = entry.getKey();
String path = entry.getValue();
try {
garbageList.add(path);
ContentSummary sum = fs.getContentSummary(new Path(path));
if (sum != null)
garbageBytes += sum.getLength();
if (delete) {
logger.info("Deleting HDFS path " + path);
fs.delete(new Path(path), true);
} else {
logger.info("Dry run, pending delete HDFS path " + path);
}
} catch (IOException e) {
logger.error("Error dealing unused HDFS path " + path, e);
}
}
hdfsGarbageFileBytes = garbageBytes;
hdfsGarbageFiles = garbageList;
}
private static long getFileSize(String hdfsUrl) throws IOException {
Configuration configuration = new Configuration();
Path path = new Path(hdfsUrl);
FileSystem fs = path.getFileSystem(configuration);
ContentSummary contentSummary = fs.getContentSummary(path);
return contentSummary.getLength();
}
private long getFileSize(String hdfsUrl) throws IOException {
Configuration configuration = new Configuration();
Path path = new Path(hdfsUrl);
FileSystem fs = path.getFileSystem(configuration);
ContentSummary contentSummary = fs.getContentSummary(path);
long length = contentSummary.getLength();
return length;
}
private void cleanUnusedHdfsFiles() throws IOException {
UnusedHdfsFileCollector collector = new UnusedHdfsFileCollector();
collectUnusedHdfsFiles(collector);
if (collector.list.isEmpty()) {
logger.info("No HDFS files to clean up");
return;
}
long garbageBytes = 0;
List<String> garbageList = new ArrayList<>();
for (Pair<FileSystem, String> entry : collector.list) {
FileSystem fs = entry.getKey();
String path = entry.getValue();
try {
garbageList.add(path);
ContentSummary sum = fs.getContentSummary(new Path(path));
if (sum != null)
garbageBytes += sum.getLength();
if (delete) {
logger.info("Deleting HDFS path " + path);
fs.delete(new Path(path), true);
} else {
logger.info("Dry run, pending delete HDFS path " + path);
}
} catch (IOException e) {
logger.error("Error dealing unused HDFS path " + path, e);
}
}
hdfsGarbageFileBytes = garbageBytes;
hdfsGarbageFiles = garbageList;
}
@Override
public long getOnDiskSize() throws IOException {
Path hdfsDirPath = _shardContext.getHdfsDirPath();
Configuration configuration = _tableContext.getConfiguration();
FileSystem fileSystem = hdfsDirPath.getFileSystem(configuration);
ContentSummary contentSummary = fileSystem.getContentSummary(hdfsDirPath);
return contentSummary.getLength();
}
public static long getTableVolume(TajoConf systemConf, Path tablePath) throws IOException {
FileSystem fs = tablePath.getFileSystem(systemConf);
ContentSummary directorySummary = fs.getContentSummary(tablePath);
return directorySummary.getLength();
}
private long getTableVolume(TajoConf systemConf, Path tablePath) throws IOException {
FileSystem fs = tablePath.getFileSystem(systemConf);
ContentSummary directorySummary = fs.getContentSummary(tablePath);
return directorySummary.getLength();
}