类org.apache.hadoop.util.hash.Hash源码实例Demo

下面列出了怎么用org.apache.hadoop.util.hash.Hash的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: hadoop   文件: HashFunction.java
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
源代码2 项目: hadoop   文件: TestBloomFilters.java
@Test
public void testDynamicBloomFilter() {
  int hashId = Hash.JENKINS_HASH;    
  Filter filter = new DynamicBloomFilter(bitSize, hashFunctionNumber,
      Hash.JENKINS_HASH, 3);    
  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY))
              .test();
  
  assertNotNull("testDynamicBloomFilter error ", filter.toString());
}
 
源代码3 项目: hadoop   文件: TestBloomFilters.java
@Test
public void testFiltersWithJenkinsHash() {
  int hashId = Hash.JENKINS_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
源代码4 项目: hadoop   文件: TestBloomFilters.java
@Test
public void testFiltersWithMurmurHash() {
  int hashId = Hash.MURMUR_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
源代码5 项目: hadoop   文件: BloomFilterCommonTester.java
private BloomFilterCommonTester(int hashId, int numInsertions) {
  this.hashType = hashId;
  this.numInsertions = numInsertions;

  this.preAssertionHelper = new PreAssertionHelper() {

    @Override
    public ImmutableSet<Integer> falsePositives(int hashId) {
      switch (hashId) {
      case Hash.JENKINS_HASH: {
        // // false pos for odd and event under 1000
        return ImmutableSet.of(99, 963);
      }
      case Hash.MURMUR_HASH: {
        // false pos for odd and event under 1000
        return ImmutableSet.of(769, 772, 810, 874);
      }
      default: {
        // fail fast with unknown hash error !!!
        Assert.assertFalse("unknown hash error", true);
        return ImmutableSet.of();
      }
      }
    }
  };
}
 
源代码6 项目: big-c   文件: HashFunction.java
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
源代码7 项目: big-c   文件: TestBloomFilters.java
@Test
public void testDynamicBloomFilter() {
  int hashId = Hash.JENKINS_HASH;    
  Filter filter = new DynamicBloomFilter(bitSize, hashFunctionNumber,
      Hash.JENKINS_HASH, 3);    
  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY))
              .test();
  
  assertNotNull("testDynamicBloomFilter error ", filter.toString());
}
 
源代码8 项目: big-c   文件: TestBloomFilters.java
@Test
public void testFiltersWithJenkinsHash() {
  int hashId = Hash.JENKINS_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
源代码9 项目: big-c   文件: TestBloomFilters.java
@Test
public void testFiltersWithMurmurHash() {
  int hashId = Hash.MURMUR_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
源代码10 项目: big-c   文件: BloomFilterCommonTester.java
private BloomFilterCommonTester(int hashId, int numInsertions) {
  this.hashType = hashId;
  this.numInsertions = numInsertions;

  this.preAssertionHelper = new PreAssertionHelper() {

    @Override
    public ImmutableSet<Integer> falsePositives(int hashId) {
      switch (hashId) {
      case Hash.JENKINS_HASH: {
        // // false pos for odd and event under 1000
        return ImmutableSet.of(99, 963);
      }
      case Hash.MURMUR_HASH: {
        // false pos for odd and event under 1000
        return ImmutableSet.of(769, 772, 810, 874);
      }
      default: {
        // fail fast with unknown hash error !!!
        Assert.assertFalse("unknown hash error", true);
        return ImmutableSet.of();
      }
      }
    }
  };
}
 
源代码11 项目: streaminer   文件: HashFunction.java
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
源代码12 项目: RDFS   文件: HashFunction.java
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
源代码13 项目: hadoop-gpu   文件: HashFunction.java
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
源代码14 项目: hadoop   文件: Filter.java
@Override
public void readFields(DataInput in) throws IOException {
  int ver = in.readInt();
  if (ver > 0) { // old unversioned format
    this.nbHash = ver;
    this.hashType = Hash.JENKINS_HASH;
  } else if (ver == VERSION) {
    this.nbHash = in.readInt();
    this.hashType = in.readByte();
  } else {
    throw new IOException("Unsupported version: " + ver);
  }
  this.vectorSize = in.readInt();
  this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
 
源代码15 项目: hadoop   文件: BloomMapFile.java
private synchronized void initBloomFilter(Configuration conf) {
  numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
  // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
  // single key, where <code> is the number of hash functions,
  // <code>n</code> is the number of keys and <code>c</code> is the desired
  // max. error rate.
  // Our desired error rate is by default 0.005, i.e. 0.5%
  float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
  vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
      Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
  bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
      Hash.getHashType(conf), numKeys);
}
 
源代码16 项目: hadoop   文件: TestBloomFilters.java
@Test
public void testCountingBloomFilter() {
  int hashId = Hash.JENKINS_HASH;

  CountingBloomFilter filter = new CountingBloomFilter(bitSize,
      hashFunctionNumber, hashId);

  Key key = new Key(new byte[] { 48, 48 });

  filter.add(key);
  assertTrue("CountingBloomFilter.membership error ",
      filter.membershipTest(key));
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 1);

  filter.add(key);
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 2);

  filter.delete(key);
  assertTrue("CountingBloomFilter.membership error ",
      filter.membershipTest(key));

  filter.delete(key);
  assertFalse("CountingBloomFilter.membership error ",
      filter.membershipTest(key));
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 0);

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
源代码17 项目: hadoop   文件: TestBloomFilters.java
@Test
public void testRetouchedBloomFilterSpecific() {
  int numInsertions = 1000;
  int hashFunctionNumber = 5;

  ImmutableSet<Integer> hashes = ImmutableSet.of(Hash.MURMUR_HASH,
      Hash.JENKINS_HASH);

  for (Integer hashId : hashes) {      
    RetouchedBloomFilter filter = new RetouchedBloomFilter(bitSize,
        hashFunctionNumber, hashId);

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.MAXIMUM_FP);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.MAXIMUM_FP);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.MINIMUM_FN);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.MINIMUM_FN);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.RATIO);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.RATIO);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));
  }
}
 
源代码18 项目: hadoop   文件: TestBloomFilters.java
@Test
public void testNot() {
  BloomFilter bf = new BloomFilter(8, 1, Hash.JENKINS_HASH);
  bf.bits = BitSet.valueOf(new byte[] { (byte) 0x95 });
  BitSet origBitSet = (BitSet) bf.bits.clone();
  bf.not();
  assertFalse("BloomFilter#not should have inverted all bits",
              bf.bits.intersects(origBitSet));
}
 
源代码19 项目: big-c   文件: Filter.java
@Override
public void readFields(DataInput in) throws IOException {
  int ver = in.readInt();
  if (ver > 0) { // old unversioned format
    this.nbHash = ver;
    this.hashType = Hash.JENKINS_HASH;
  } else if (ver == VERSION) {
    this.nbHash = in.readInt();
    this.hashType = in.readByte();
  } else {
    throw new IOException("Unsupported version: " + ver);
  }
  this.vectorSize = in.readInt();
  this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
 
源代码20 项目: big-c   文件: BloomMapFile.java
private synchronized void initBloomFilter(Configuration conf) {
  numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
  // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
  // single key, where <code> is the number of hash functions,
  // <code>n</code> is the number of keys and <code>c</code> is the desired
  // max. error rate.
  // Our desired error rate is by default 0.005, i.e. 0.5%
  float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
  vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
      Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
  bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
      Hash.getHashType(conf), numKeys);
}
 
源代码21 项目: big-c   文件: TestBloomFilters.java
@Test
public void testCountingBloomFilter() {
  int hashId = Hash.JENKINS_HASH;

  CountingBloomFilter filter = new CountingBloomFilter(bitSize,
      hashFunctionNumber, hashId);

  Key key = new Key(new byte[] { 48, 48 });

  filter.add(key);
  assertTrue("CountingBloomFilter.membership error ",
      filter.membershipTest(key));
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 1);

  filter.add(key);
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 2);

  filter.delete(key);
  assertTrue("CountingBloomFilter.membership error ",
      filter.membershipTest(key));

  filter.delete(key);
  assertFalse("CountingBloomFilter.membership error ",
      filter.membershipTest(key));
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 0);

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
源代码22 项目: big-c   文件: TestBloomFilters.java
@Test
public void testRetouchedBloomFilterSpecific() {
  int numInsertions = 1000;
  int hashFunctionNumber = 5;

  ImmutableSet<Integer> hashes = ImmutableSet.of(Hash.MURMUR_HASH,
      Hash.JENKINS_HASH);

  for (Integer hashId : hashes) {      
    RetouchedBloomFilter filter = new RetouchedBloomFilter(bitSize,
        hashFunctionNumber, hashId);

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.MAXIMUM_FP);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.MAXIMUM_FP);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.MINIMUM_FN);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.MINIMUM_FN);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.RATIO);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.RATIO);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));
  }
}
 
源代码23 项目: big-c   文件: TestBloomFilters.java
@Test
public void testNot() {
  BloomFilter bf = new BloomFilter(8, 1, Hash.JENKINS_HASH);
  bf.bits = BitSet.valueOf(new byte[] { (byte) 0x95 });
  BitSet origBitSet = (BitSet) bf.bits.clone();
  bf.not();
  assertFalse("BloomFilter#not should have inverted all bits",
              bf.bits.intersects(origBitSet));
}
 
源代码24 项目: incubator-hivemall   文件: BloomFilterUtils.java
@Nonnull
public static BloomFilter newBloomFilter(@Nonnegative final int expectedNumberOfElements,
        @Nonnegative final float errorRate, @Nonnegative final int nbHash) {
    // vector size should be `-kn / (ln(1 - c^(1/k)))` bits for
    // single key, where `k` is the number of hash functions,
    // `n` is the number of keys and `c` is the desired max error rate.
    int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements)
            / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash)));
    return new BloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH);
}
 
源代码25 项目: incubator-hivemall   文件: BloomFilterUtils.java
@Nonnull
public static DynamicBloomFilter newDynamicBloomFilter(
        @Nonnegative final int expectedNumberOfElements, @Nonnegative final float errorRate,
        @Nonnegative final int nbHash) {
    int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements)
            / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash)));
    return new DynamicBloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH,
        expectedNumberOfElements);
}
 
源代码26 项目: hudi   文件: InternalFilter.java
@Override
public void readFields(DataInput in) throws IOException {
  int ver = in.readInt();
  if (ver > 0) { // old unversioned format
    this.nbHash = ver;
    this.hashType = Hash.JENKINS_HASH;
  } else if (ver == VERSION) {
    this.nbHash = in.readInt();
    this.hashType = in.readByte();
  } else {
    throw new IOException("Unsupported version: " + ver);
  }
  this.vectorSize = in.readInt();
  this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
 
源代码27 项目: hudi   文件: BloomFilterFactory.java
/**
 * Creates a new {@link BloomFilter} with the given args.
 *
 * @param numEntries          total number of entries
 * @param errorRate           max allowed error rate
 * @param bloomFilterTypeCode bloom filter type code
 * @return the {@link BloomFilter} thus created
 */
public static BloomFilter createBloomFilter(int numEntries, double errorRate, int maxNumberOfEntries,
                                            String bloomFilterTypeCode) {
  if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.SIMPLE.name())) {
    return new SimpleBloomFilter(numEntries, errorRate, Hash.MURMUR_HASH);
  } else if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.DYNAMIC_V0.name())) {
    return new HoodieDynamicBoundedBloomFilter(numEntries, errorRate, Hash.MURMUR_HASH, maxNumberOfEntries);
  } else {
    throw new IllegalArgumentException("Bloom Filter type code not recognizable " + bloomFilterTypeCode);
  }
}
 
源代码28 项目: hudi   文件: TestInternalDynamicBloomFilter.java
@Test
public void testBoundedSize() {

  int[] batchSizes = {1000, 10000, 10000, 100000, 100000, 10000};
  int indexForMaxGrowth = 3;
  int maxSize = batchSizes[0] * 100;
  BloomFilter filter = new HoodieDynamicBoundedBloomFilter(batchSizes[0], 0.000001, Hash.MURMUR_HASH, maxSize);
  int index = 0;
  int lastKnownBloomSize = 0;
  while (index < batchSizes.length) {
    for (int i = 0; i < batchSizes[index]; i++) {
      String key = UUID.randomUUID().toString();
      filter.add(key);
    }

    String serString = filter.serializeToString();
    if (index != 0) {
      int curLength = serString.length();
      if (index > indexForMaxGrowth) {
        assertEquals(curLength, lastKnownBloomSize, "Length should not increase after hitting max entries");
      } else {
        assertTrue(curLength > lastKnownBloomSize, "Length should increase until max entries are reached");
      }
    }
    lastKnownBloomSize = serString.length();
    index++;
  }
}
 
源代码29 项目: compiler   文件: DistinctAggregator.java
/** {@inheritDoc} */
@Override
public void start(final EmitKey key) {
	super.start(key);

	this.filter = new DynamicBloomFilter(this.vectorSize, HASH_COUNT, Hash.MURMUR_HASH, (int) this.getArg());
}
 
源代码30 项目: spork   文件: BuildBloomBase.java
private int convertHashType(String hashType) {
    if (hashType.toLowerCase().contains("jenkins")) {
        return Hash.JENKINS_HASH;
    } else if (hashType.toLowerCase().contains("murmur")) {
        return Hash.MURMUR_HASH;
    } else {
        throw new RuntimeException("Unknown hash type " + hashType +
            ".  Valid values are jenkins and murmur.");
    }
}
 
 类所在包
 类方法
 同包方法