下面列出了怎么用org.apache.hadoop.hbase.io.hfile.HFile的API类实例代码及写法,或者点击链接到github查看源代码。
/**
*
* This is borrowed from DefaultCompactor.
*
* @param compression
* @param includeMVCCReadpoint
* @param includesTag
* @param cryptoContext
* @return
*/
private HFileContext createFileContext(Compression.Algorithm compression,
boolean includeMVCCReadpoint, boolean includesTag, Encryption.Context cryptoContext) {
if (compression == null) {
compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
}
HFileContext hFileContext = new HFileContextBuilder()
.withIncludesMvcc(includeMVCCReadpoint)
.withIncludesTags(includesTag)
.withCompression(compression)
.withCompressTags(store.getColumnFamilyDescriptor().isCompressTags())
.withChecksumType(HStore.getChecksumType(conf))
.withBytesPerCheckSum(HStore.getBytesPerChecksum(conf))
.withBlockSize(store.getColumnFamilyDescriptor().getBlocksize())
.withHBaseCheckSum(true)
.withDataBlockEncoding(store.getColumnFamilyDescriptor().getDataBlockEncoding())
.withEncryptionContext(cryptoContext)
.withCreateTime(EnvironmentEdgeManager.currentTime())
.build();
return hFileContext;
}
public HFileSortedOplogWriter() throws IOException {
writer = HFile.getWriterFactory(hconf, hcache)
.withPath(fs, path)
.withBlockSize(sopConfig.getBlockSize())
.withBytesPerChecksum(sopConfig.getBytesPerChecksum())
.withChecksumType(HFileSortedOplogFactory.convertChecksum(sopConfig.getChecksum()))
// .withComparator(sopConfig.getComparator())
.withCompression(HFileSortedOplogFactory.convertCompression(sopConfig.getCompression()))
.withDataBlockEncoder(HFileSortedOplogFactory.convertEncoding(sopConfig.getKeyEncoding()))
.create();
bfw = sopConfig.isBloomFilterEnabled() ?
// BloomFilterFactory.createGeneralBloomAtWrite(hconf, hcache, BloomType.ROW,
// 0, writer, sopConfig.getComparator())
BloomFilterFactory.createGeneralBloomAtWrite(hconf, hcache, BloomType.ROW,
0, writer)
: null;
}
public HFileSortedOplogWriter(int keys) throws IOException {
try {
int hfileBlockSize = Integer.getInteger(
HoplogConfig.HFILE_BLOCK_SIZE_CONF, (1 << 16));
Algorithm compress = Algorithm.valueOf(System.getProperty(HoplogConfig.COMPRESSION,
HoplogConfig.COMPRESSION_DEFAULT));
// ByteComparator bc = new ByteComparator();
writer = HFile.getWriterFactory(conf, cacheConf)
.withPath(fsProvider.getFS(), path)
.withBlockSize(hfileBlockSize)
// .withComparator(bc)
.withCompression(compress)
.create();
bfw = BloomFilterFactory.createGeneralBloomAtWrite(conf, cacheConf, BloomType.ROW, keys,
writer);
logger.fine("Created hoplog writer with compression " + compress);
} catch (IOException e) {
logger.fine("IO Error while creating writer");
throw e;
}
}
@Override
public void setUpCluster() throws Exception {
util = getTestingUtil(null);
Configuration conf = util.getConfiguration();
if (!util.isDistributedCluster()) {
// Inject required configuration if we are not running in distributed mode
conf.setInt(HFile.FORMAT_VERSION_KEY, 3);
conf.set(HConstants.CRYPTO_KEYPROVIDER_CONF_KEY, KeyProviderForTesting.class.getName());
conf.set(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, "hbase");
conf.setClass("hbase.regionserver.hlog.reader.impl", SecureProtobufLogReader.class,
Reader.class);
conf.setClass("hbase.regionserver.hlog.writer.impl", SecureProtobufLogWriter.class,
Writer.class);
conf.setBoolean(HConstants.ENABLE_WAL_ENCRYPTION, true);
}
// Check if the cluster configuration can support this test
try {
EncryptionTest.testEncryption(conf, "AES", null);
} catch (Exception e) {
LOG.warn("Encryption configuration test did not pass, skipping test", e);
return;
}
super.setUpCluster();
initialized = true;
}
public HFileSortedOplogWriter(int keys) throws IOException {
try {
int hfileBlockSize = Integer.getInteger(
HoplogConfig.HFILE_BLOCK_SIZE_CONF, (1 << 16));
Algorithm compress = Algorithm.valueOf(System.getProperty(HoplogConfig.COMPRESSION,
HoplogConfig.COMPRESSION_DEFAULT));
// ByteComparator bc = new ByteComparator();
writer = HFile.getWriterFactory(conf, cacheConf)
.withPath(fsProvider.getFS(), path)
.withBlockSize(hfileBlockSize)
// .withComparator(bc)
.withCompression(compress)
.create();
bfw = BloomFilterFactory.createGeneralBloomAtWrite(conf, cacheConf, BloomType.ROW, keys,
writer);
logger.fine("Created hoplog writer with compression " + compress);
} catch (IOException e) {
logger.fine("IO Error while creating writer");
throw e;
}
}
@Override
public void start(CoprocessorEnvironment env) throws IOException {
this.conf = env.getConfiguration();
authorizationEnabled = AccessChecker.isAuthorizationSupported(conf);
if (!authorizationEnabled) {
LOG.warn("The VisibilityController has been loaded with authorization checks disabled.");
}
if (HFile.getFormatVersion(conf) < HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
throw new RuntimeException("A minimum HFile version of " + HFile.MIN_FORMAT_VERSION_WITH_TAGS
+ " is required to persist visibility labels. Consider setting " + HFile.FORMAT_VERSION_KEY
+ " accordingly.");
}
// Do not create for master CPs
if (!(env instanceof MasterCoprocessorEnvironment)) {
visibilityLabelService = VisibilityLabelServiceManager.getInstance()
.getVisibilityLabelService(this.conf);
}
}
/**
* Creates a new Delete Family Bloom filter at the time of
* {@link org.apache.hadoop.hbase.regionserver.HStoreFile} writing.
* @param conf
* @param cacheConf
* @param maxKeys an estimate of the number of keys we expect to insert.
* Irrelevant if compound Bloom filters are enabled.
* @param writer the HFile writer
* @return the new Bloom filter, or null in case Bloom filters are disabled
* or when failed to create one.
*/
public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf,
CacheConfig cacheConf, int maxKeys, HFile.Writer writer) {
if (!isDeleteFamilyBloomEnabled(conf)) {
LOG.info("Delete Bloom filters are disabled by configuration for "
+ writer.getPath()
+ (conf == null ? " (configuration is null)" : ""));
return null;
}
float err = getErrorRate(conf);
int maxFold = getMaxFold(conf);
// In case of compound Bloom filters we ignore the maxKeys hint.
CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
null, BloomType.ROW);
writer.addInlineBlockWriter(bloomWriter);
return bloomWriter;
}
@Override
public void perform() throws Exception {
getLogger().info("Start deleting data files");
FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf());
Path rootDir = CommonFSUtils.getRootDir(getConf());
Path defaultDir = rootDir.suffix("/data/default");
RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(defaultDir, true);
while (iterator.hasNext()){
LocatedFileStatus status = iterator.next();
if(!HFile.isHFileFormat(fs, status.getPath())){
continue;
}
if(RandomUtils.nextFloat(0, 100) > chance){
continue;
}
fs.delete(status.getPath(), true);
getLogger().info("Deleting {}", status.getPath());
}
getLogger().info("Done deleting data files");
}
private HFileContext createFileContext(Compression.Algorithm compression,
boolean includeMVCCReadpoint, boolean includesTag, Encryption.Context cryptoContext) {
if (compression == null) {
compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
}
HFileContext hFileContext = new HFileContextBuilder()
.withIncludesMvcc(includeMVCCReadpoint)
.withIncludesTags(includesTag)
.withCompression(compression)
.withCompressTags(family.isCompressTags())
.withChecksumType(checksumType)
.withBytesPerCheckSum(bytesPerChecksum)
.withBlockSize(blocksize)
.withHBaseCheckSum(true)
.withDataBlockEncoding(family.getDataBlockEncoding())
.withEncryptionContext(cryptoContext)
.withCreateTime(EnvironmentEdgeManager.currentTime())
.withColumnFamily(family.getName())
.withTableName(region.getTableDescriptor()
.getTableName().getName())
.withCellComparator(this.comparator)
.build();
return hFileContext;
}
private void createHFile(Path path,
byte[] family, byte[] qualifier,
byte[] startKey, byte[] endKey, int numRows) throws IOException {
HFile.Writer writer = null;
long now = System.currentTimeMillis();
try {
HFileContext context = new HFileContextBuilder().build();
writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path)
.withFileContext(context).create();
// subtract 2 since numRows doesn't include boundary keys
for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, true, numRows - 2)) {
KeyValue kv = new KeyValue(key, family, qualifier, now, key);
writer.append(kv);
}
} finally {
if (writer != null) {
writer.close();
}
}
}
@Override
void setUp() throws Exception {
HFileContextBuilder builder = new HFileContextBuilder()
.withCompression(HFileWriterImpl.compressionByName(codec))
.withBlockSize(RFILE_BLOCKSIZE);
if (cipher == "aes") {
byte[] cipherKey = new byte[AES.KEY_LENGTH];
new SecureRandom().nextBytes(cipherKey);
builder.withEncryptionContext(Encryption.newContext(conf)
.setCipher(Encryption.getCipher(conf, cipher))
.setKey(cipherKey));
} else if (!"none".equals(cipher)) {
throw new IOException("Cipher " + cipher + " not supported.");
}
HFileContext hFileContext = builder.build();
writer = HFile.getWriterFactoryNoCache(conf)
.withPath(fs, mf)
.withFileContext(hFileContext)
.create();
}
private static void createHFile(Configuration conf, FileSystem fs, Path path, byte[] family,
byte[] qualifier) throws IOException {
HFileContext context = new HFileContextBuilder().build();
HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path)
.withFileContext(context).create();
long now = System.currentTimeMillis();
try {
for (int i = 1; i <= 9; i++) {
KeyValue kv =
new KeyValue(Bytes.toBytes(i + ""), family, qualifier, now, Bytes.toBytes(i + ""));
writer.append(kv);
}
} finally {
writer.close();
}
}
/**
* Create an HFile with the given number of rows with a specified value.
*/
public static void createHFile(FileSystem fs, Path path, byte[] family,
byte[] qualifier, byte[] value, int numRows) throws IOException {
HFileContext context = new HFileContextBuilder().withBlockSize(BLOCKSIZE)
.withCompression(COMPRESSION)
.build();
HFile.Writer writer = HFile
.getWriterFactory(conf, new CacheConfig(conf))
.withPath(fs, path)
.withFileContext(context)
.create();
long now = System.currentTimeMillis();
try {
// subtract 2 since iterateOnSplits doesn't include boundary keys
for (int i = 0; i < numRows; i++) {
KeyValue kv = new KeyValue(rowkey(i), family, qualifier, now, value);
writer.append(kv);
}
writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(now));
} finally {
writer.close();
}
}
private String createHFileForFamilies(byte[] family) throws IOException {
HFile.WriterFactory hFileFactory = HFile.getWriterFactoryNoCache(conf);
// TODO We need a way to do this without creating files
File hFileLocation = testFolder.newFile();
FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(hFileLocation), null);
try {
hFileFactory.withOutputStream(out);
hFileFactory.withFileContext(new HFileContextBuilder().build());
HFile.Writer writer = hFileFactory.create();
try {
writer.append(new KeyValue(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
.setRow(randomBytes)
.setFamily(family)
.setQualifier(randomBytes)
.setTimestamp(0L)
.setType(KeyValue.Type.Put.getCode())
.setValue(randomBytes)
.build()));
} finally {
writer.close();
}
} finally {
out.close();
}
return hFileLocation.getAbsoluteFile().getAbsolutePath();
}
/**
* Checks a path to see if it is a valid hfile.
*
* @param p
* full Path to an HFile
* @throws IOException
* This is a connectivity related exception
*/
protected void checkHFile(Path p) throws IOException {
HFile.Reader r = null;
try {
r = HFile.createReader(fs, p, cacheConf, true, conf);
} catch (CorruptHFileException che) {
LOG.warn("Found corrupt HFile " + p, che);
corrupted.add(p);
if (inQuarantineMode) {
Path dest = createQuarantinePath(p);
LOG.warn("Quarantining corrupt HFile " + p + " into " + dest);
boolean success = fs.mkdirs(dest.getParent());
success = success ? fs.rename(p, dest): false;
if (!success) {
failures.add(p);
} else {
quarantined.add(dest);
}
}
return;
} catch (FileNotFoundException fnfe) {
LOG.warn("HFile " + p + " was missing. Likely removed due to compaction/split?");
missing.add(p);
} finally {
hfilesChecked.addAndGet(1);
if (r != null) {
r.close(true);
}
}
}
/**
* Creates an inner HFileScanner object for a given HFile path
*/
public static HFileScanner createScanner(FileSystem fs, Path path) throws IOException {
Configuration config = fs.getConf();
HFile.Reader reader = HFile.createReader(fs, path, getCacheConfig(config), config);
HFileScanner scanner = reader.getScanner(false, false);
scanner.seekTo();
return scanner;
}
public HFileReader() throws IOException {
try {
FileSystem fs = fsProvider.getFS();
reader = HFile.createReader(fs, path, cacheConf);
fileInfo = reader.loadFileInfo();
closed = new AtomicBoolean(false);
validate();
if (reader.getComparator() instanceof DelegatingSerializedComparator) {
loadComparators((DelegatingSerializedComparator) reader.getComparator());
}
// read the old HLL if it exists so that a CardinalityMergeException will trigger a Major Compaction
byte[] hll = fileInfo.get(Meta.LOCAL_CARDINALITY_ESTIMATE.toBytes());
if (hll != null) {
entryCountEstimate = estimator = HyperLogLog.Builder.build(hll);
} else if ((hll = fileInfo.get(Meta.LOCAL_CARDINALITY_ESTIMATE_V2.toBytes())) != null) {
entryCountEstimate = estimator = HyperLogLog.Builder.build(hll);
} else {
estimator = new HyperLogLog(HdfsSortedOplogOrganizer.HLL_CONSTANT);
}
previousFS = fs;
} catch (IOException e) {
logger.fine("IO Error while creating reader", e);
throw e;
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
CacheConfig cacheConf = new CacheConfig(conf);
FileSystem fs = FileSystem.newInstance(conf);
FileStatus[] pathes = fs.globStatus(new Path(args[0]));
long bytes = 0L;
long cells = 0L;
for (FileStatus status: pathes) {
try {
HFile.Reader reader = HFile.createReader(fs, status.getPath(), cacheConf, conf);
bytes += reader.length();
cells += reader.getEntries();
System.out.println(status.getPath() + " >>> " + reader.length() + " bytes " + reader.getEntries() + " cells");
reader.close();
} catch (Exception e) {
continue;
}
}
System.out.println("TOTAL: " + cells + " cells " + bytes + " bytes " + (bytes/(double) cells) + " bytes/cell");
long ts = System.currentTimeMillis();
System.out.println(ts * 1000 + "// hbase.bytes{} " + bytes);
System.out.println(ts * 1000 + "// hbase.datapoints{} " + cells);
}
/**
* Validates the first non-empty partition hfile has right partitioning function.
* It reads several keys, then calculates the partition according to the partitioning function
* client offering. If the calculated partition number is different with actual partition number
* an exception is thrown. If all partition hfiles are empty, an exception is thrown.
*
* @param parts full absolute path for all partitions
* @param partitionerType type of paritioning function
* @param numShards total number of partitions
* @throws IOException if something goes wrong when reading the hfiles
* @throws IllegalArgumentException if the partitioner type is wrong or all partitions are empty
*/
public void validate(List<Path> parts, PartitionerType partitionerType, int numShards)
throws IOException {
boolean hasNonEmptyPartition = false;
HColumnDescriptor columnDescriptor = new HColumnDescriptor();
// Disable block cache to ensure it reads the actual file content.
columnDescriptor.setBlockCacheEnabled(false);
for (int shardIndex = 0; shardIndex < parts.size(); shardIndex++) {
Path fileToBeValidated = parts.get(shardIndex);
HFile.Reader reader = null;
try {
FileSystem fs = FileSystem.newInstance(fileToBeValidated.toUri(), conf);
CacheConfig cc = new CacheConfig(conf, columnDescriptor);
reader = HFile.createReader(fs, fileToBeValidated, cc);
Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType);
byte[] rowKey = reader.getFirstRowKey();
if (rowKey == null) {
LOG.warn(String.format("empty partition %s", fileToBeValidated.toString()));
reader.close();
continue;
}
hasNonEmptyPartition = true;
BytesWritable key = new BytesWritable(rowKey);
int partition = partitioner.getPartition(key, null, numShards);
if (partition != shardIndex) {
throw new IllegalArgumentException(
String.format("wrong partition type %s for key %s in partition %d, expected %d",
partitionerType.toString(), new String(key.getBytes()), shardIndex, partition)
);
}
} finally {
if (reader != null) {
reader.close();
}
}
}
if (!hasNonEmptyPartition) {
throw new IllegalArgumentException("all partitions are empty");
}
}
public HFileReader(FileSystem fs,
String path,
CacheConfig cacheConf,
FuturePool futurePool) throws IOException {
this.reader = HFile.createReader(fs, new TerrapinPath(path), cacheConf);
this.futurePool = futurePool;
this.fileSet = TerrapinUtil.extractFileSetFromPath(path);
setUpStatsKeys();
}
@Test
public void testGenerateHFiles() throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
int numOfPart = 10;
int numOfKeys = 1000;
HFileGenerator.generateHFiles(fs, conf, outputDir,
PartitionerType.CASCADING, numOfPart, numOfKeys);
FilenameFilter hfileFilter = new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.startsWith(Constants.FILE_PREFIX);
}
};
File[] hfiles = outputDir.listFiles(hfileFilter);
assertEquals(numOfPart, hfiles.length);
int count = 0;
for(File hfile : hfiles) {
HColumnDescriptor columnDescriptor = new HColumnDescriptor();
columnDescriptor.setBlockCacheEnabled(false);
HFile.Reader reader =
HFile.createReader(fs, new Path(hfile.toURI()), new CacheConfig(conf, columnDescriptor));
count += reader.getEntries();
reader.close();
}
assertEquals(numOfKeys, count);
}
public HFileReader() throws IOException {
try {
FileSystem fs = fsProvider.getFS();
reader = HFile.createReader(fs, path, cacheConf);
fileInfo = reader.loadFileInfo();
closed = new AtomicBoolean(false);
validate();
if (reader.getComparator() instanceof DelegatingSerializedComparator) {
loadComparators((DelegatingSerializedComparator) reader.getComparator());
}
// read the old HLL if it exists so that a CardinalityMergeException will trigger a Major Compaction
byte[] hll = fileInfo.get(Meta.LOCAL_CARDINALITY_ESTIMATE.toBytes());
if (hll != null) {
entryCountEstimate = estimator = HyperLogLog.Builder.build(hll);
} else if ((hll = fileInfo.get(Meta.LOCAL_CARDINALITY_ESTIMATE_V2.toBytes())) != null) {
entryCountEstimate = estimator = HyperLogLog.Builder.build(hll);
} else {
estimator = new HyperLogLog(HdfsSortedOplogOrganizer.HLL_CONSTANT);
}
previousFS = fs;
} catch (IOException e) {
logger.fine("IO Error while creating reader", e);
throw e;
}
}
private List<Path> getFilesRecursively(String fileBackupDir)
throws IllegalArgumentException, IOException {
FileSystem fs = FileSystem.get((new Path(fileBackupDir)).toUri(), new Configuration());
List<Path> list = new ArrayList<>();
RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(fileBackupDir), true);
while (it.hasNext()) {
Path p = it.next().getPath();
if (HFile.isHFileFormat(fs, p)) {
list.add(p);
}
}
return list;
}
private void addAllHFiles() throws IOException {
Path masterProcDir = new Path(CommonFSUtils.getRootDir(conf), MasterRegionFactory.MASTER_STORE_DIR);
Path tableDir = CommonFSUtils.getTableDir(masterProcDir, MasterRegionFactory.TABLE_NAME);
FileSystem fs = tableDir.getFileSystem(conf);
Path regionDir =
fs.listStatus(tableDir, p -> RegionInfo.isEncodedRegionName(Bytes.toBytes(p.getName())))[0]
.getPath();
List<Path> regionFiles = HFile.getStoreFiles(fs, regionDir);
files.addAll(regionFiles);
}
/**
* check configured hfile format version before to do compaction
* @throws IOException throw IOException
*/
private void checkHFileFormatVersionForMob() throws IOException {
if (HFile.getFormatVersion(master.getConfiguration()) < HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
LOG.error("A minimum HFile version of " + HFile.MIN_FORMAT_VERSION_WITH_TAGS
+ " is required for MOB compaction. Compaction will not run.");
throw new IOException("A minimum HFile version of " + HFile.MIN_FORMAT_VERSION_WITH_TAGS
+ " is required for MOB feature. Consider setting " + HFile.FORMAT_VERSION_KEY
+ " accordingly.");
}
}
public static void doSmokeTest(FileSystem fs, Path path, String codec)
throws Exception {
Configuration conf = HBaseConfiguration.create();
HFileContext context = new HFileContextBuilder()
.withCompression(HFileWriterImpl.compressionByName(codec)).build();
HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
.withPath(fs, path)
.withFileContext(context)
.create();
// Write any-old Cell...
final byte [] rowKey = Bytes.toBytes("compressiontestkey");
Cell c = ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
.setRow(rowKey)
.setFamily(HConstants.EMPTY_BYTE_ARRAY)
.setQualifier(HConstants.EMPTY_BYTE_ARRAY)
.setTimestamp(HConstants.LATEST_TIMESTAMP)
.setType(KeyValue.Type.Maximum.getCode())
.setValue(Bytes.toBytes("compressiontestval"))
.build();
writer.append(c);
writer.appendFileInfo(Bytes.toBytes("compressioninfokey"), Bytes.toBytes("compressioninfoval"));
writer.close();
Cell cc = null;
HFile.Reader reader = HFile.createReader(fs, path, CacheConfig.DISABLED, true, conf);
try {
HFileScanner scanner = reader.getScanner(false, true);
scanner.seekTo(); // position to the start of file
// Scanner does not do Cells yet. Do below for now till fixed.
cc = scanner.getCell();
if (CellComparator.getInstance().compareRows(c, cc) != 0) {
throw new Exception("Read back incorrect result: " + c.toString() + " vs " + cc.toString());
}
} finally {
reader.close();
}
}
/**
* Instantiates the correct Bloom filter class based on the version provided
* in the meta block data.
*
* @param meta the byte array holding the Bloom filter's metadata, including
* version information
* @param reader the {@link HFile} reader to use to lazily load Bloom filter
* blocks
* @return an instance of the correct type of Bloom filter
* @throws IllegalArgumentException
*/
public static BloomFilter
createFromMeta(DataInput meta, HFile.Reader reader)
throws IllegalArgumentException, IOException {
int version = meta.readInt();
switch (version) {
case CompoundBloomFilterBase.VERSION:
return new CompoundBloomFilter(meta, reader);
default:
throw new IllegalArgumentException(
"Bad bloom filter format version " + version
);
}
}
/**
* Creates a new general (Row or RowCol) Bloom filter at the time of
* {@link org.apache.hadoop.hbase.regionserver.HStoreFile} writing.
*
* @param conf
* @param cacheConf
* @param bloomType
* @param maxKeys an estimate of the number of keys we expect to insert.
* Irrelevant if compound Bloom filters are enabled.
* @param writer the HFile writer
* @return the new Bloom filter, or null in case Bloom filters are disabled
* or when failed to create one.
*/
public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf,
CacheConfig cacheConf, BloomType bloomType, int maxKeys,
HFile.Writer writer) {
if (!isGeneralBloomEnabled(conf)) {
LOG.trace("Bloom filters are disabled by configuration for "
+ writer.getPath()
+ (conf == null ? " (configuration is null)" : ""));
return null;
} else if (bloomType == BloomType.NONE) {
LOG.trace("Bloom filter is turned off for the column family");
return null;
}
float err = getErrorRate(conf);
// In case of row/column Bloom filter lookups, each lookup is an OR if two
// separate lookups. Therefore, if each lookup's false positive rate is p,
// the resulting false positive rate is err = 1 - (1 - p)^2, and
// p = 1 - sqrt(1 - err).
if (bloomType == BloomType.ROWCOL) {
err = (float) (1 - Math.sqrt(1 - err));
}
int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD,
MAX_ALLOWED_FOLD_FACTOR);
// Do we support compound bloom filters?
// In case of compound Bloom filters we ignore the maxKeys hint.
CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
bloomType == BloomType.ROWCOL ? CellComparatorImpl.COMPARATOR : null, bloomType);
writer.addInlineBlockWriter(bloomWriter);
return bloomWriter;
}
/**
* Checks a path to see if it is a valid hfile.
*
* @param p
* full Path to an HFile
* @throws IOException
* This is a connectivity related exception
*/
protected void checkHFile(Path p) throws IOException {
HFile.Reader r = null;
try {
r = HFile.createReader(fs, p, cacheConf, true, conf);
} catch (CorruptHFileException che) {
LOG.warn("Found corrupt HFile " + p, che);
corrupted.add(p);
if (inQuarantineMode) {
Path dest = createQuarantinePath(p);
LOG.warn("Quarantining corrupt HFile " + p + " into " + dest);
boolean success = fs.mkdirs(dest.getParent());
success = success ? fs.rename(p, dest): false;
if (!success) {
failures.add(p);
} else {
quarantined.add(dest);
}
}
return;
} catch (FileNotFoundException fnfe) {
LOG.warn("HFile " + p + " was missing. Likely removed due to compaction/split?");
missing.add(p);
} finally {
hfilesChecked.addAndGet(1);
if (r != null) {
r.close(true);
}
}
}
@Override
public void perform() throws Exception {
getLogger().info("Start corrupting data files");
FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf());
Path rootDir = CommonFSUtils.getRootDir(getConf());
Path defaultDir = rootDir.suffix("/data/default");
RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(defaultDir, true);
while (iterator.hasNext()){
LocatedFileStatus status = iterator.next();
if(!HFile.isHFileFormat(fs, status.getPath())){
continue;
}
if(RandomUtils.nextFloat(0, 100) > chance){
continue;
}
FSDataOutputStream out = fs.create(status.getPath(), true);
try {
out.write(0);
} finally {
out.close();
}
getLogger().info("Corrupting {}", status.getPath());
}
getLogger().info("Done corrupting data files");
}