下面列出了怎么用org.apache.hadoop.util.bloom.Key的API类实例代码及写法,或者点击链接到github查看源代码。
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(30);
final Key key = new Key();
final Random rnd1 = new Random(seed);
for (int i = 0; i < size; i++) {
double d = rnd1.nextGaussian();
String s = Double.toHexString(d);
Text t = new Text(s);
key.set(t.copyBytes(), 1.0);
dbf.add(key);
}
return dbf;
}
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000);
final Key key = new Key();
final Random rnd1 = new Random(seed);
for (int i = 0; i < size; i++) {
double d = rnd1.nextGaussian();
String s = Double.toHexString(d);
key.set(s.getBytes(), 1.0);
dbf.add(key);
}
return dbf;
}
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000);
final Key key = new Key();
final Random rnd1 = new Random(seed);
for (int i = 0; i < size; i++) {
double d = rnd1.nextGaussian();
String s = Double.toHexString(d);
key.set(s.getBytes(), 1.0);
dbf.add(key);
}
return dbf;
}
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000);
final Key key = new Key();
final Random rnd1 = new Random(seed);
for (int i = 0; i < size; i++) {
double d = rnd1.nextGaussian();
String s = Double.toHexString(d);
key.set(s.getBytes(), 1.0);
dbf.add(key);
}
return dbf;
}
@Override
public void add(Key key) {
if (key == null) {
throw new NullPointerException("Key can not be null");
}
org.apache.hadoop.util.bloom.BloomFilter bf = getActiveStandardBF();
if (bf == null) {
addRow();
bf = matrix[matrix.length - 1];
currentNbRecord = 0;
}
bf.add(key);
currentNbRecord++;
}
/** {@inheritDoc} */
@Override
public void aggregate(final String data, final String metadata) throws IOException, InterruptedException {
// instantiate a bloom filter input key initialized by the data
final Key key = new Key(data.getBytes());
// if the key is already in the filter, forget about it
if (this.filter.membershipTest(key))
return;
// add the key to the bloom filter
this.filter.add(key);
if (this.isCombining())
this.collect(data);
else
this.total++;
}
/** {@inheritDoc} */
@Override
public void aggregate(final String data, final String metadata) throws IOException, InterruptedException {
// instantiate a bloom filter input key initialized by the data
Key key = new Key(data.getBytes());
// if the key is already in the filter, forget it
if (this.filter.membershipTest(key))
return;
// add the key to the bloom filter
this.filter.add(key);
// and collect it
this.collect(data);
}
@Override
public Tuple exec(Tuple input) throws IOException {
if (input == null || input.size() == 0) return null;
// Strip off the initial level of bag
DataBag values = (DataBag)input.get(0);
Iterator<Tuple> it = values.iterator();
Tuple t = it.next();
// If the input tuple has only one field, then we'll extract
// that field and serialize it into a key. If it has multiple
// fields, we'll serialize the whole tuple.
byte[] b;
if (t.size() == 1) b = DataType.toBytes(t.get(0));
else b = DataType.toBytes(t, DataType.TUPLE);
Key k = new Key(b);
filter = new BloomFilter(vSize, numHash, hType);
filter.add(k);
return TupleFactory.getInstance().newTuple(bloomOut());
}
@Override
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
Map<String, String> parsed = transformXmlToMap(value.toString());
String body = parsed.get("Text");
if (isNullOrEmpty(body)) {
return;
}
StringTokenizer tokenizer = new StringTokenizer(body);
while (tokenizer.hasMoreTokens()) {
String word = tokenizer.nextToken();
if (filter.membershipTest(new Key(word.getBytes()))) {
context.write(value, NullWritable.get());
break;
}
}
}
private static void assertEquals(@Nonnull Filter expected, @Nonnull Filter actual, long seed,
int size) {
final Key key = new Key();
final Random rnd1 = new Random(seed);
for (int i = 0; i < size; i++) {
double d = rnd1.nextGaussian();
String s = Double.toHexString(d);
key.set(s.getBytes(), 1.0);
Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key));
}
}
private static void assertNotContains(@Nonnull Filter expected, @Nonnull Filter actual,
long seed, int size) {
final Key key = new Key();
final Random rnd1 = new Random(seed);
for (int i = 0; i < size; i++) {
double d = rnd1.nextGaussian();
String s = Double.toHexString(d);
key.set(s.getBytes(), 1.0);
Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key));
}
}
/**
* Adds a list of keys to <i>this</i> filter.
*
* @param keys The list of keys.
*/
public void add(List<Key> keys) {
if (keys == null) {
throw new IllegalArgumentException("ArrayList<Key> may not be null");
}
for (Key key : keys) {
add(key);
}
}
/**
* Adds a collection of keys to <i>this</i> filter.
*
* @param keys The collection of keys.
*/
public void add(Collection<Key> keys) {
if (keys == null) {
throw new IllegalArgumentException("Collection<Key> may not be null");
}
for (Key key : keys) {
add(key);
}
}
/**
* Adds an array of keys to <i>this</i> filter.
*
* @param keys The array of keys.
*/
public void add(Key[] keys) {
if (keys == null) {
throw new IllegalArgumentException("Key[] may not be null");
}
for (Key key : keys) {
add(key);
}
}
@Override
public boolean membershipTest(Key key) {
if (key == null) {
return true;
}
for (BloomFilter bloomFilter : matrix) {
if (bloomFilter.membershipTest(key)) {
return true;
}
}
return false;
}
@Override
public void add(String key) {
if (key == null) {
throw new NullPointerException("Key cannot by null");
}
filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
@Override
public boolean mightContain(String key) {
if (key == null) {
throw new NullPointerException("Key cannot by null");
}
return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
@Override
protected void map(Text key, Text value, Context context)
throws IOException, InterruptedException {
System.out.println("K[" + key + "]");
if(filter.membershipTest(new Key(key.toString().getBytes()))) {
context.write(key, value);
}
}
@Override
public void map(Text key, Text value,
OutputCollector<NullWritable, BloomFilter> output,
Reporter reporter) throws IOException {
System.out.println("K[" + key + "]");
int age = Integer.valueOf(value.toString());
if (age > 30) {
filter.add(new Key(key.toString().getBytes()));
}
collector = output;
}
@Override
protected void map(LongWritable offset, Text value, Context context)
throws IOException, InterruptedException {
String user = getUsername(value);
if (filter.membershipTest(new Key(user.getBytes()))) {
Tuple outputValue = new Tuple();
outputValue.setInt(ValueFields.DATASET, getDataset());
outputValue.setString(ValueFields.DATA, value.toString());
context.write(new Text(user), outputValue);
}
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
User user = User.fromText(value);
if ("CA".equals(user.getState())) {
filter.add(new Key(user.getName().getBytes()));
}
}
@Override
public Boolean exec(Tuple input) throws IOException {
if (filter == null) {
init();
}
byte[] b;
if (input.size() == 1) b = DataType.toBytes(input.get(0));
else b = DataType.toBytes(input, DataType.TUPLE);
Key k = new Key(b);
return filter.membershipTest(k);
}
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
Map<String, String> parsed = MRDPUtils.transformXmlToMap(value
.toString());
String userId = parsed.get("UserId");
if (userId == null) {
return;
}
if (bfilter.membershipTest(new Key(userId.getBytes()))) {
outkey.set(userId);
outvalue.set("B" + value.toString());
context.write(outkey, outvalue);
}
}
@Override
public void init() {
this.filter = BloomFilterUtils.newDynamicBloomFilter();
this.key = new Key();
}
@Override
public void add(String key) {
internalDynamicBloomFilter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
@Override
public boolean mightContain(String key) {
return internalDynamicBloomFilter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
/**
* Adds a key to <i>this</i> filter.
*
* @param key The key to add.
*/
public abstract void add(Key key);
/**
* Determines wether a specified key belongs to <i>this</i> filter.
*
* @param key The key to test.
* @return boolean True if the specified key belongs to <i>this</i> filter. False otherwise.
*/
public abstract boolean membershipTest(Key key);