下面列出了com.google.common.collect.ConcurrentHashMultiset#create ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public static void main(String[] args) {
// Parse text to separate words
String INPUT_TEXT = "Hello World! Hello All! Hi World!";
// Create Multiset
Multiset<String> multiset = ConcurrentHashMultiset.create(Arrays.asList(INPUT_TEXT.split(" ")));
// Print count words
System.out.println(multiset); // print [Hi, Hello x 2, World! x 2, All!] - in random orders
// Print all unique words
System.out.println(multiset.elementSet()); // print [Hi, Hello, World!, All!] - in random orders
// Print count occurrences of words
System.out.println("Hello = " + multiset.count("Hello")); // print 2
System.out.println("World = " + multiset.count("World!")); // print 2
System.out.println("All = " + multiset.count("All!")); // print 1
System.out.println("Hi = " + multiset.count("Hi")); // print 1
System.out.println("Empty = " + multiset.count("Empty")); // print 0
// Print count all words
System.out.println(multiset.size()); //print 6
// Print count unique words
System.out.println(multiset.elementSet().size()); //print 4
}
public static void main(String[] args) {
// Разберем текст на слова
String INPUT_TEXT = "Hello World! Hello All! Hi World!";
// Создаем Multiset
Multiset<String> multiset = ConcurrentHashMultiset.create(Arrays.asList(INPUT_TEXT.split(" ")));
// Выводим кол-вом вхождений слов
System.out.println(multiset); // напечатает [Hi, Hello x 2, World! x 2, All!] - в произвольном порядке
// Выводим все уникальные слова
System.out.println(multiset.elementSet()); // напечатает [Hi, Hello, World!, All!] - в произвольном порядке
// Выводим количество по каждому слову
System.out.println("Hello = " + multiset.count("Hello")); // напечатает 2
System.out.println("World = " + multiset.count("World!")); // напечатает 2
System.out.println("All = " + multiset.count("All!")); // напечатает 1
System.out.println("Hi = " + multiset.count("Hi")); // напечатает 1
System.out.println("Empty = " + multiset.count("Empty")); // напечатает 0
// Выводим общее количества всех слов в тексте
System.out.println(multiset.size()); //напечатает 6
// Выводим общее количество всех уникальных слов
System.out.println(multiset.elementSet().size()); //напечатает 4
}
public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template, int minDF){
Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
String field = template.getField();
Arrays.stream(ids).parallel().forEach(id -> {
Map<Integer,String> termVector = index.getTermVectorFromIndex(field, id);
add(termVector,multiset,template);
});
Multiset<Ngram> filtered = ConcurrentHashMultiset.create();
for (Multiset.Entry entry: multiset.entrySet()){
Ngram ngram = (Ngram)entry.getElement();
int count = entry.getCount();
if (count>=minDF){
filtered.add(ngram,count);
}
}
return filtered;
}
private static void add(List<String> source, Multiset<Ngram> multiset, String field, int slop, List<Integer> template){
Multiset<Ngram> multiSetForDoc = ConcurrentHashMultiset.create();
for (int i=0;i<source.size();i++){
if(i+template.get(template.size()-1)<source.size()){
List<String> list = new ArrayList<>();
for (int j: template){
list.add(source.get(i+j));
}
Ngram ngram = new Ngram();
ngram.setNgram(Ngram.toNgramString(list));
ngram.setSlop(slop);
ngram.setField(field);
ngram.setInOrder(true);
multiSetForDoc.setCount(ngram,1);
}
}
multiset.addAll(multiSetForDoc);
}
/**
*
* @param numClasses
* @param samples sampled multi-labels; can have duplicates; their empirical probabilities will be estimated
* @return
*/
public MultiLabel predict(int numClasses, List<MultiLabel> samples){
Multiset<MultiLabel> multiset = ConcurrentHashMultiset.create();
for (MultiLabel multiLabel: samples){
multiset.add(multiLabel);
}
int sampleSize = samples.size();
List<MultiLabel> uniqueOnes = new ArrayList<>();
List<Double> probs = new ArrayList<>();
for (Multiset.Entry<MultiLabel> entry: multiset.entrySet()){
uniqueOnes.add(entry.getElement());
probs.add((double)entry.getCount()/sampleSize);
}
return predict(numClasses,uniqueOnes,probs);
}
/**
* Log the top N action rewind events and clear the history of failed actions' lost inputs and
* rewind plans.
*/
void reset(ExtendedEventHandler eventHandler) {
ImmutableList<ActionRewindEvent> topActionRewindEvents =
rewindPlansStats.stream()
.collect(
greatest(
MAX_ACTION_REWIND_EVENTS, comparing(RewindPlanStats::invalidatedNodesCount)))
.stream()
.map(ActionRewindingStats::toActionRewindEventProto)
.collect(toImmutableList());
ActionRewindingStats rewindingStats =
new ActionRewindingStats(lostInputRecords.size(), topActionRewindEvents);
eventHandler.post(rewindingStats);
lostInputRecords = ConcurrentHashMultiset.create();
rewindPlansStats = new ConcurrentLinkedQueue<>();
}
public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template){
Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
String field = template.getField();
Arrays.stream(ids).parallel().forEach(id -> {
Map<Integer,String> termVector = index.getTermVectorFromIndex(field, id);
add(termVector,multiset,template);
});
return multiset;
}
private static void test8(){
NgramTemplate template = new NgramTemplate("body",3,1);
Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
List<String> source = new ArrayList<>();
for (int i=0;i<10;i++){
source.add(""+i);
}
NgramEnumerator.add(source,multiset,template);
System.out.println(multiset.elementSet().stream().map(Ngram::getNgram).collect(Collectors.toList()));
}
static Set<Ngram> gather(Config config, ESIndex index,
String[] ids, Logger logger) throws Exception{
File metaDataFolder = new File(config.getString("output.folder"),"meta_data");
metaDataFolder.mkdirs();
Multiset<Ngram> allNgrams = ConcurrentHashMultiset.create();
List<Integer> ns = config.getIntegers("train.feature.ngram.n");
double minDf = config.getDouble("train.feature.ngram.minDf");
int minDFrequency = (int)Math.floor(ids.length*minDf);
List<String> fields = config.getStrings("train.feature.ngram.extractionFields");
List<Integer> slops = config.getIntegers("train.feature.ngram.slop");
boolean inorder = config.getBoolean("train.feature.ngram.inOrder");
boolean allowDuplicates = config.getBoolean("train.feature.ngram.allowDuplicateWords");
for (String field: fields){
for (int n: ns){
for (int slop:slops){
logger.info("gathering "+n+ "-grams from field "+field+" with slop "+slop+" and minDf "+minDf+ ", (actual frequency threshold = "+minDFrequency+")");
NgramTemplate template = new NgramTemplate(field,n,slop);
Multiset<Ngram> ngrams = NgramEnumerator.gatherNgram(index, ids, template, minDFrequency);
logger.info("gathered "+ngrams.elementSet().size()+ " ngrams");
int newCounter = 0;
for (Multiset.Entry<Ngram> entry: ngrams.entrySet()){
Ngram ngram = entry.getElement();
ngram.setInOrder(inorder);
int count = entry.getCount();
if (interesting(allNgrams,ngram,count)){
if (allowDuplicates) {
allNgrams.add(ngram, count);
newCounter += 1;
}else{
if (!ngram.hasDuplicate()){
allNgrams.add(ngram, count);
newCounter += 1;
}
}
}
}
logger.info(newCounter+" are really new");
}
}
}
logger.info("there are "+allNgrams.elementSet().size()+" ngrams in total");
// BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(metaDataFolder,"all_ngrams.txt")));
// for (Multiset.Entry<Ngram> ngramEntry: allNgrams.entrySet()){
// bufferedWriter.write(ngramEntry.getElement().toString());
// bufferedWriter.write("\t");
// bufferedWriter.write(""+ngramEntry.getCount());
// bufferedWriter.newLine();
// }
//
// bufferedWriter.close();
//
// //for serialization
// Set<Ngram> uniques = new HashSet<>();
// uniques.addAll(allNgrams.elementSet());
// Serialization.serialize(uniques, new File(metaDataFolder, "all_ngrams.ser"));
return allNgrams.elementSet();
}
@Test
public void testConcurrentRace_AllSameSizedCombinations() throws Exception {
// When we have n values
int n = 10;
ImmutableSet.Builder<String> valsBuilder = ImmutableSet.builder();
for (int i = 0; i < n; i++) {
valsBuilder.add("val-" + i);
}
ImmutableSet<String> vals = valsBuilder.build();
int k = 5;
// And we have all combinations of size k of these n values
Set<Set<String>> combinations = Sets.combinations(vals, k);
int numCombinations = combinations.size();
// And we have a MultisetSemaphore
final MultisetSemaphore<String> multisetSemaphore = MultisetSemaphore.newBuilder()
// with K max num unique values,
.maxNumUniqueValues(k)
.build();
// And a ExecutorService with nCk threads,
ExecutorService executorService = Executors.newFixedThreadPool(numCombinations);
// And a recorder for thrown exceptions,
ThrowableRecordingRunnableWrapper wrapper =
new ThrowableRecordingRunnableWrapper("testConcurrentRace_AllSameSizedCombinations");
// And a ConcurrentHashMultiset for counting the multiplicities of the values ourselves,
ConcurrentHashMultiset<String> counts = ConcurrentHashMultiset.create();
for (Set<String> combination : combinations) {
// And, for each of the nCk combinations, we submit a Runnable, that
@SuppressWarnings("unused")
Future<?> possiblyIgnoredError =
executorService.submit(
wrapper.wrap(
new Runnable() {
@Override
public void run() {
try {
// Tries to acquire permits for its set of k values,
multisetSemaphore.acquireAll(combination);
// And then verifies that the multiplicities are as expected,
combination.forEach(counts::add);
assertThat(counts.entrySet().size()).isAtMost(k);
combination.forEach(counts::remove);
// And then releases the permits.
multisetSemaphore.releaseAll(combination);
} catch (InterruptedException e) {
throw new IllegalStateException(e);
}
}
}));
}
// Then all of our Runnables completed (without deadlock!), as expected,
boolean interrupted = ExecutorUtil.interruptibleShutdown(executorService);
// And also none of them threw any Exceptions.
assertThat(wrapper.getFirstThrownError()).isNull();
if (interrupted) {
Thread.currentThread().interrupt();
throw new InterruptedException();
}
}