类org.apache.hadoop.util.bloom.Key源码实例Demo

下面列出了怎么用org.apache.hadoop.util.bloom.Key的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: incubator-hivemall   文件: BloomContainsUDFTest.java
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(30);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        Text t = new Text(s);
        key.set(t.copyBytes(), 1.0);
        dbf.add(key);
    }

    return dbf;
}
 
源代码2 项目: incubator-hivemall   文件: BloomNotUDFTest.java
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);

        key.set(s.getBytes(), 1.0);
        dbf.add(key);
    }

    return dbf;
}
 
源代码3 项目: incubator-hivemall   文件: BloomOrUDFTest.java
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);

        key.set(s.getBytes(), 1.0);
        dbf.add(key);
    }

    return dbf;
}
 
源代码4 项目: incubator-hivemall   文件: BloomAndUDFTest.java
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);

        key.set(s.getBytes(), 1.0);
        dbf.add(key);
    }

    return dbf;
}
 
源代码5 项目: hudi   文件: InternalDynamicBloomFilter.java
@Override
public void add(Key key) {
  if (key == null) {
    throw new NullPointerException("Key can not be null");
  }

  org.apache.hadoop.util.bloom.BloomFilter bf = getActiveStandardBF();

  if (bf == null) {
    addRow();
    bf = matrix[matrix.length - 1];
    currentNbRecord = 0;
  }

  bf.add(key);

  currentNbRecord++;
}
 
源代码6 项目: compiler   文件: UniqueAggregator.java
/** {@inheritDoc} */
@Override
public void aggregate(final String data, final String metadata) throws IOException, InterruptedException {
	// instantiate a bloom filter input key initialized by the data
	final Key key = new Key(data.getBytes());

	// if the key is already in the filter, forget about it
	if (this.filter.membershipTest(key))
		return;

	// add the key to the bloom filter
	this.filter.add(key);

	if (this.isCombining())
		this.collect(data);
	else
		this.total++;
}
 
源代码7 项目: compiler   文件: DistinctAggregator.java
/** {@inheritDoc} */
@Override
public void aggregate(final String data, final String metadata) throws IOException, InterruptedException {
	// instantiate a bloom filter input key initialized by the data
	Key key = new Key(data.getBytes());

	// if the key is already in the filter, forget it
	if (this.filter.membershipTest(key))
		return;

	// add the key to the bloom filter
	this.filter.add(key);

	// and collect it
	this.collect(data);
}
 
源代码8 项目: spork   文件: BuildBloom.java
@Override
public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;

    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();

    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);

    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);

    return TupleFactory.getInstance().newTuple(bloomOut());
}
 
源代码9 项目: hadoop-map-reduce-patterns   文件: BloomFilter.java
@Override
public void map(Object key, Text value, Context context)
		throws IOException, InterruptedException {
	Map<String, String> parsed = transformXmlToMap(value.toString());

	String body = parsed.get("Text");
	if (isNullOrEmpty(body)) {
		return;
	}
	StringTokenizer tokenizer = new StringTokenizer(body);
	while (tokenizer.hasMoreTokens()) {
		String word = tokenizer.nextToken();
		if (filter.membershipTest(new Key(word.getBytes()))) {
			context.write(value, NullWritable.get());
			break;
		}
	}

}
 
源代码10 项目: incubator-hivemall   文件: BloomOrUDFTest.java
private static void assertEquals(@Nonnull Filter expected, @Nonnull Filter actual, long seed,
        int size) {
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);
        Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key));
    }
}
 
源代码11 项目: incubator-hivemall   文件: BloomAndUDFTest.java
private static void assertNotContains(@Nonnull Filter expected, @Nonnull Filter actual,
        long seed, int size) {
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);
        Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key));
    }
}
 
源代码12 项目: hudi   文件: InternalFilter.java
/**
 * Adds a list of keys to <i>this</i> filter.
 *
 * @param keys The list of keys.
 */
public void add(List<Key> keys) {
  if (keys == null) {
    throw new IllegalArgumentException("ArrayList<Key> may not be null");
  }

  for (Key key : keys) {
    add(key);
  }
}
 
源代码13 项目: hudi   文件: InternalFilter.java
/**
 * Adds a collection of keys to <i>this</i> filter.
 *
 * @param keys The collection of keys.
 */
public void add(Collection<Key> keys) {
  if (keys == null) {
    throw new IllegalArgumentException("Collection<Key> may not be null");
  }
  for (Key key : keys) {
    add(key);
  }
}
 
源代码14 项目: hudi   文件: InternalFilter.java
/**
 * Adds an array of keys to <i>this</i> filter.
 *
 * @param keys The array of keys.
 */
public void add(Key[] keys) {
  if (keys == null) {
    throw new IllegalArgumentException("Key[] may not be null");
  }
  for (Key key : keys) {
    add(key);
  }
}
 
源代码15 项目: hudi   文件: InternalDynamicBloomFilter.java
@Override
public boolean membershipTest(Key key) {
  if (key == null) {
    return true;
  }

  for (BloomFilter bloomFilter : matrix) {
    if (bloomFilter.membershipTest(key)) {
      return true;
    }
  }

  return false;
}
 
源代码16 项目: hudi   文件: SimpleBloomFilter.java
@Override
public void add(String key) {
  if (key == null) {
    throw new NullPointerException("Key cannot by null");
  }
  filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
 
源代码17 项目: hudi   文件: SimpleBloomFilter.java
@Override
public boolean mightContain(String key) {
  if (key == null) {
    throw new NullPointerException("Key cannot by null");
  }
  return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
 
源代码18 项目: hiped2   文件: BloomJoin.java
@Override
protected void map(Text key, Text value, Context context)
    throws IOException, InterruptedException {
  System.out.println("K[" + key + "]");
  if(filter.membershipTest(new Key(key.toString().getBytes()))) {
    context.write(key, value);
  }
}
 
源代码19 项目: hiped2   文件: BloomFilterCreator.java
@Override
public void map(Text key, Text value,
                OutputCollector<NullWritable, BloomFilter> output,
                Reporter reporter) throws IOException {

  System.out.println("K[" + key + "]");

  int age = Integer.valueOf(value.toString());
  if (age > 30) {
    filter.add(new Key(key.toString().getBytes()));
  }
  collector = output;
}
 
源代码20 项目: hiped2   文件: BloomJoin.java
@Override
protected void map(LongWritable offset, Text value, Context context)
    throws IOException, InterruptedException {
  String user = getUsername(value);
  if (filter.membershipTest(new Key(user.getBytes()))) {
    Tuple outputValue = new Tuple();
    outputValue.setInt(ValueFields.DATASET, getDataset());
    outputValue.setString(ValueFields.DATA, value.toString());

    context.write(new Text(user), outputValue);
  }
}
 
源代码21 项目: hiped2   文件: BloomFilterCreator.java
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  User user = User.fromText(value);
  if ("CA".equals(user.getState())) {
    filter.add(new Key(user.getName().getBytes()));
  }
}
 
源代码22 项目: spork   文件: Bloom.java
@Override
public Boolean exec(Tuple input) throws IOException {
    if (filter == null) {
        init();
    }
    byte[] b;
    if (input.size() == 1) b = DataType.toBytes(input.get(0));
    else b = DataType.toBytes(input, DataType.TUPLE);

    Key k = new Key(b);
    return filter.membershipTest(k);
}
 
public void map(Object key, Text value, Context context)
		throws IOException, InterruptedException {
	Map<String, String> parsed = MRDPUtils.transformXmlToMap(value
			.toString());
	String userId = parsed.get("UserId");
	if (userId == null) {
		return;
	}
	if (bfilter.membershipTest(new Key(userId.getBytes()))) {
		outkey.set(userId);
		outvalue.set("B" + value.toString());
		context.write(outkey, outvalue);
	}
}
 
源代码24 项目: incubator-hivemall   文件: BloomFilterUDAF.java
@Override
public void init() {
    this.filter = BloomFilterUtils.newDynamicBloomFilter();
    this.key = new Key();
}
 
源代码25 项目: hudi   文件: HoodieDynamicBoundedBloomFilter.java
@Override
public void add(String key) {
  internalDynamicBloomFilter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
 
源代码26 项目: hudi   文件: HoodieDynamicBoundedBloomFilter.java
@Override
public boolean mightContain(String key) {
  return internalDynamicBloomFilter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
 
源代码27 项目: hudi   文件: InternalFilter.java
/**
 * Adds a key to <i>this</i> filter.
 *
 * @param key The key to add.
 */
public abstract void add(Key key);
 
源代码28 项目: hudi   文件: InternalFilter.java
/**
 * Determines wether a specified key belongs to <i>this</i> filter.
 *
 * @param key The key to test.
 * @return boolean True if the specified key belongs to <i>this</i> filter. False otherwise.
 */
public abstract boolean membershipTest(Key key);
 
 类所在包
 类方法
 同包方法