下面列出了org.apache.hadoop.fs.VolumeId#org.apache.hadoop.fs.BlockStorageLocation 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Helper method to combine a list of {@link LocatedBlock} with associated
* {@link VolumeId} information to form a list of {@link BlockStorageLocation}
* .
*/
static BlockStorageLocation[] convertToVolumeBlockLocations(
List<LocatedBlock> blocks,
Map<LocatedBlock, List<VolumeId>> blockVolumeIds) throws IOException {
// Construct the final return value of VolumeBlockLocation[]
BlockLocation[] locations = DFSUtil.locatedBlocks2Locations(blocks);
List<BlockStorageLocation> volumeBlockLocs =
new ArrayList<BlockStorageLocation>(locations.length);
for (int i = 0; i < locations.length; i++) {
LocatedBlock locBlock = blocks.get(i);
List<VolumeId> volumeIds = blockVolumeIds.get(locBlock);
BlockStorageLocation bsLoc = new BlockStorageLocation(locations[i],
volumeIds.toArray(new VolumeId[0]));
volumeBlockLocs.add(bsLoc);
}
return volumeBlockLocs.toArray(new BlockStorageLocation[] {});
}
/**
* Helper method to combine a list of {@link LocatedBlock} with associated
* {@link VolumeId} information to form a list of {@link BlockStorageLocation}
* .
*/
static BlockStorageLocation[] convertToVolumeBlockLocations(
List<LocatedBlock> blocks,
Map<LocatedBlock, List<VolumeId>> blockVolumeIds) throws IOException {
// Construct the final return value of VolumeBlockLocation[]
BlockLocation[] locations = DFSUtil.locatedBlocks2Locations(blocks);
List<BlockStorageLocation> volumeBlockLocs =
new ArrayList<BlockStorageLocation>(locations.length);
for (int i = 0; i < locations.length; i++) {
LocatedBlock locBlock = blocks.get(i);
List<VolumeId> volumeIds = blockVolumeIds.get(locBlock);
BlockStorageLocation bsLoc = new BlockStorageLocation(locations[i],
volumeIds.toArray(new VolumeId[0]));
volumeBlockLocs.add(bsLoc);
}
return volumeBlockLocs.toArray(new BlockStorageLocation[] {});
}
private void printBlockMetadata(BlockLocation blockLocation, String[] dataDirs) throws IOException {
System.out.println(" Offset: " + blockLocation.getOffset());
System.out.println(" Length: " + blockLocation.getLength());
String[] cachedHosts = blockLocation.getCachedHosts();
if (cachedHosts.length == 0) {
System.out.println(" No cached hosts");
}
System.out.println(" Replicas:");
VolumeId[] volumeIds = blockLocation instanceof BlockStorageLocation ?
(((BlockStorageLocation) blockLocation).getVolumeIds()) : null;
String[] hosts = blockLocation.getHosts();
String[] names = blockLocation.getNames();
String[] topologyPaths = blockLocation.getTopologyPaths();
for (int i = 0; i < topologyPaths.length; i++) {
int diskId = volumeIds != null ? DistributedFileSystemMetadata.getDiskId(volumeIds[i]) : -1;
System.out.println(" Replica (" + i + "):");
System.out.println(" Host: " + hosts[i]);
if(diskId == -1)
System.out.println(" DiskId: unknown");
else if(dataDirs != null && diskId < dataDirs.length)
System.out.println(" Location: " + dataDirs[diskId] + " (DiskId: " + diskId + ")");
else
System.out.println(" DiskId: " + diskId);
System.out.println(" Name: " + names[i]);
System.out.println(" TopologyPaths: " + topologyPaths[i]);
}
if (cachedHosts.length > 0) {
System.out.println(" Cached hosts:");
for (String cachedHost : cachedHosts) {
System.out.println(" Host: " + cachedHost);
}
}
}
@Test
public void computeHostsDiskIdsCount() throws IOException{
List<BlockLocation> blockStorageLocations = new LinkedList<>();
blockStorageLocations.add(new BlockStorageLocation(
new BlockLocation(null, new String[]{"host1", "host2"}, 0, 0),
new VolumeId[]{new TVolumeId("3"), new TVolumeId("4")}));
blockStorageLocations.add(new BlockStorageLocation(
new BlockLocation(null, new String[]{"host2", "host3"}, 0, 0),
new VolumeId[]{new TVolumeId("4"), new TVolumeId("5")}));
blockStorageLocations.add(new BlockStorageLocation(
new BlockLocation(null, new String[]{"host10", "host2"}, 0, 0),
new VolumeId[]{new TVolumeId("3"), new TVolumeId("4")}));
blockStorageLocations.add(new BlockStorageLocation(
new BlockLocation(null, new String[]{"host10", "host3"}, 0, 0),
new VolumeId[]{new TVolumeId("8"), new TVolumeId("5")}));
blockStorageLocations.add(new BlockLocation(null, new String[]{"host10", "host3", "host3"}, 0, 0));
HashMap<String, HashMap<Integer, Integer>> hosts_diskids =
DistributedFileSystemMetadata.computeHostsDiskIdsCount(blockStorageLocations);
Assert.assertEquals(1, hosts_diskids.get("host1").get(3).intValue());
Assert.assertEquals(3, hosts_diskids.get("host2").get(4).intValue());
Assert.assertEquals(2, hosts_diskids.get("host3").get(5).intValue());
Assert.assertEquals(2, hosts_diskids.get("host3").get(-1).intValue());
Assert.assertEquals(1, hosts_diskids.get("host10").get(3).intValue());
Assert.assertEquals(1, hosts_diskids.get("host10").get(8).intValue());
Assert.assertEquals(1, hosts_diskids.get("host10").get(-1).intValue());
}
/**
* Get block location information about a list of {@link HdfsBlockLocation}.
* Used by {@link DistributedFileSystem#getFileBlockStorageLocations(List)} to
* get {@link BlockStorageLocation}s for blocks returned by
* {@link DistributedFileSystem#getFileBlockLocations(org.apache.hadoop.fs.FileStatus, long, long)}
* .
*
* This is done by making a round of RPCs to the associated datanodes, asking
* the volume of each block replica. The returned array of
* {@link BlockStorageLocation} expose this information as a
* {@link VolumeId}.
*
* @param blockLocations
* target blocks on which to query volume location information
* @return volumeBlockLocations original block array augmented with additional
* volume location information for each replica.
*/
public BlockStorageLocation[] getBlockStorageLocations(
List<BlockLocation> blockLocations) throws IOException,
UnsupportedOperationException, InvalidBlockTokenException {
if (!getConf().getHdfsBlocksMetadataEnabled) {
throw new UnsupportedOperationException("Datanode-side support for " +
"getVolumeBlockLocations() must also be enabled in the client " +
"configuration.");
}
// Downcast blockLocations and fetch out required LocatedBlock(s)
List<LocatedBlock> blocks = new ArrayList<LocatedBlock>();
for (BlockLocation loc : blockLocations) {
if (!(loc instanceof HdfsBlockLocation)) {
throw new ClassCastException("DFSClient#getVolumeBlockLocations " +
"expected to be passed HdfsBlockLocations");
}
HdfsBlockLocation hdfsLoc = (HdfsBlockLocation) loc;
blocks.add(hdfsLoc.getLocatedBlock());
}
// Re-group the LocatedBlocks to be grouped by datanodes, with the values
// a list of the LocatedBlocks on the datanode.
Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks =
new LinkedHashMap<DatanodeInfo, List<LocatedBlock>>();
for (LocatedBlock b : blocks) {
for (DatanodeInfo info : b.getLocations()) {
if (!datanodeBlocks.containsKey(info)) {
datanodeBlocks.put(info, new ArrayList<LocatedBlock>());
}
List<LocatedBlock> l = datanodeBlocks.get(info);
l.add(b);
}
}
// Make RPCs to the datanodes to get volume locations for its replicas
TraceScope scope =
Trace.startSpan("getBlockStorageLocations", traceSampler);
Map<DatanodeInfo, HdfsBlocksMetadata> metadatas;
try {
metadatas = BlockStorageLocationUtil.
queryDatanodesForHdfsBlocksMetadata(conf, datanodeBlocks,
getConf().getFileBlockStorageLocationsNumThreads,
getConf().getFileBlockStorageLocationsTimeoutMs,
getConf().connectToDnViaHostname);
if (LOG.isTraceEnabled()) {
LOG.trace("metadata returned: "
+ Joiner.on("\n").withKeyValueSeparator("=").join(metadatas));
}
} finally {
scope.close();
}
// Regroup the returned VolumeId metadata to again be grouped by
// LocatedBlock rather than by datanode
Map<LocatedBlock, List<VolumeId>> blockVolumeIds = BlockStorageLocationUtil
.associateVolumeIdsWithBlocks(blocks, metadatas);
// Combine original BlockLocations with new VolumeId information
BlockStorageLocation[] volumeBlockLocations = BlockStorageLocationUtil
.convertToVolumeBlockLocations(blocks, blockVolumeIds);
return volumeBlockLocations;
}
public LinkedList<BlockLocation> getBlockLocations(Path path) throws IOException {
LOG.info("Collecting block locations...");
LinkedList<BlockLocation> blockLocations = new LinkedList<BlockLocation>();
RemoteIterator<LocatedFileStatus> statuses = listFiles(path, true);
int hasNextCode = hasNextCode(statuses);
while(hasNextCode > 0){
if(hasNextCode > 1){
hasNextCode = hasNextCode(statuses);
continue;
}
LocatedFileStatus fileStatus = statuses.next();
if(fileStatus.isFile()){
BlockLocation[] blockLocations_tmp = getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
blockLocations.addAll(Arrays.asList(blockLocations_tmp));
}
int size = blockLocations.size();
if(size > 0 && size % 5000 == 0)
LOG.info("Collected " + size + " locations. Still in progress...");
if(size >= MAX_NUMBER_OF_LOCATIONS){
LOG.info("Reached max number of locations to collect. The amount will be representative enough.");
break;
}
hasNextCode = hasNextCode(statuses);
}
LOG.info("Collected " + blockLocations.size() + " locations.");
if(isHdfsBlocksMetadataEnabled()){
BlockStorageLocation[] blockStorageLocations = getFileBlockStorageLocations(blockLocations);
blockLocations.clear();
blockLocations.addAll(Arrays.asList(blockStorageLocations));
}else{
LOG.error("VolumnId/DiskId can not be collected since "
+ "dfs.datanode.hdfs-blocks-metadata.enabled is not enabled.");
}
return blockLocations;
}
/**
* Get block location information about a list of {@link HdfsBlockLocation}.
* Used by {@link DistributedFileSystem#getFileBlockStorageLocations(List)} to
* get {@link BlockStorageLocation}s for blocks returned by
* {@link DistributedFileSystem#getFileBlockLocations(org.apache.hadoop.fs.FileStatus, long, long)}
* .
*
* This is done by making a round of RPCs to the associated datanodes, asking
* the volume of each block replica. The returned array of
* {@link BlockStorageLocation} expose this information as a
* {@link VolumeId}.
*
* @param blockLocations
* target blocks on which to query volume location information
* @return volumeBlockLocations original block array augmented with additional
* volume location information for each replica.
*/
public BlockStorageLocation[] getBlockStorageLocations(
List<BlockLocation> blockLocations) throws IOException,
UnsupportedOperationException, InvalidBlockTokenException {
if (!getConf().getHdfsBlocksMetadataEnabled) {
throw new UnsupportedOperationException("Datanode-side support for " +
"getVolumeBlockLocations() must also be enabled in the client " +
"configuration.");
}
// Downcast blockLocations and fetch out required LocatedBlock(s)
List<LocatedBlock> blocks = new ArrayList<LocatedBlock>();
for (BlockLocation loc : blockLocations) {
if (!(loc instanceof HdfsBlockLocation)) {
throw new ClassCastException("DFSClient#getVolumeBlockLocations " +
"expected to be passed HdfsBlockLocations");
}
HdfsBlockLocation hdfsLoc = (HdfsBlockLocation) loc;
blocks.add(hdfsLoc.getLocatedBlock());
}
// Re-group the LocatedBlocks to be grouped by datanodes, with the values
// a list of the LocatedBlocks on the datanode.
Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks =
new LinkedHashMap<DatanodeInfo, List<LocatedBlock>>();
for (LocatedBlock b : blocks) {
for (DatanodeInfo info : b.getLocations()) {
if (!datanodeBlocks.containsKey(info)) {
datanodeBlocks.put(info, new ArrayList<LocatedBlock>());
}
List<LocatedBlock> l = datanodeBlocks.get(info);
l.add(b);
}
}
// Make RPCs to the datanodes to get volume locations for its replicas
TraceScope scope =
Trace.startSpan("getBlockStorageLocations", traceSampler);
Map<DatanodeInfo, HdfsBlocksMetadata> metadatas;
try {
metadatas = BlockStorageLocationUtil.
queryDatanodesForHdfsBlocksMetadata(conf, datanodeBlocks,
getConf().getFileBlockStorageLocationsNumThreads,
getConf().getFileBlockStorageLocationsTimeoutMs,
getConf().connectToDnViaHostname);
if (LOG.isTraceEnabled()) {
LOG.trace("metadata returned: "
+ Joiner.on("\n").withKeyValueSeparator("=").join(metadatas));
}
} finally {
scope.close();
}
// Regroup the returned VolumeId metadata to again be grouped by
// LocatedBlock rather than by datanode
Map<LocatedBlock, List<VolumeId>> blockVolumeIds = BlockStorageLocationUtil
.associateVolumeIdsWithBlocks(blocks, metadatas);
// Combine original BlockLocations with new VolumeId information
BlockStorageLocation[] volumeBlockLocations = BlockStorageLocationUtil
.convertToVolumeBlockLocations(blocks, blockVolumeIds);
return volumeBlockLocations;
}
/**
* Used to query storage location information for a list of blocks. This list
* of blocks is normally constructed via a series of calls to
* {@link DistributedFileSystem#getFileBlockLocations(Path, long, long)} to
* get the blocks for ranges of a file.
*
* The returned array of {@link BlockStorageLocation} augments
* {@link BlockLocation} with a {@link VolumeId} per block replica. The
* VolumeId specifies the volume on the datanode on which the replica resides.
* The VolumeId associated with a replica may be null because volume
* information can be unavailable if the corresponding datanode is down or
* if the requested block is not found.
*
* This API is unstable, and datanode-side support is disabled by default. It
* can be enabled by setting "dfs.datanode.hdfs-blocks-metadata.enabled" to
* true.
*
* @param blocks
* List of target BlockLocations to query volume location information
* @return volumeBlockLocations Augmented array of
* {@link BlockStorageLocation}s containing additional volume location
* information for each replica of each block.
*/
@InterfaceStability.Unstable
public BlockStorageLocation[] getFileBlockStorageLocations(
List<BlockLocation> blocks) throws IOException,
UnsupportedOperationException, InvalidBlockTokenException {
return dfs.getBlockStorageLocations(blocks);
}
/**
* Used to query storage location information for a list of blocks. This list
* of blocks is normally constructed via a series of calls to
* {@link DistributedFileSystem#getFileBlockLocations(Path, long, long)} to
* get the blocks for ranges of a file.
*
* The returned array of {@link BlockStorageLocation} augments
* {@link BlockLocation} with a {@link VolumeId} per block replica. The
* VolumeId specifies the volume on the datanode on which the replica resides.
* The VolumeId associated with a replica may be null because volume
* information can be unavailable if the corresponding datanode is down or
* if the requested block is not found.
*
* This API is unstable, and datanode-side support is disabled by default. It
* can be enabled by setting "dfs.datanode.hdfs-blocks-metadata.enabled" to
* true.
*
* @param blocks
* List of target BlockLocations to query volume location information
* @return volumeBlockLocations Augmented array of
* {@link BlockStorageLocation}s containing additional volume location
* information for each replica of each block.
*/
@InterfaceStability.Unstable
public BlockStorageLocation[] getFileBlockStorageLocations(
List<BlockLocation> blocks) throws IOException,
UnsupportedOperationException, InvalidBlockTokenException {
return dfs.getBlockStorageLocations(blocks);
}