下面列出了com.google.common.collect.Multiset#entrySet ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Returns true if {@code atLeastM} of the expressions in the given column are the same kind.
*/
private static boolean expressionsAreParallel(
List<List<ExpressionTree>> rows, int column, int atLeastM) {
Multiset<Tree.Kind> nodeTypes = HashMultiset.create();
for (List<? extends ExpressionTree> row : rows) {
if (column >= row.size()) {
continue;
}
nodeTypes.add(row.get(column).getKind());
}
for (Multiset.Entry<Tree.Kind> nodeType : nodeTypes.entrySet()) {
if (nodeType.getCount() >= atLeastM) {
return true;
}
}
return false;
}
private static void validateMetadataKeyUniqueness(SingleVmDeploymentPackageSpec spec) {
// Ensures that metadata keys are unique.
Multiset<String> metadataKeyCounts = HashMultiset.create();
for (PasswordSpec password : spec.getPasswordsList()) {
metadataKeyCounts.add(password.getMetadataKey());
}
for (GceMetadataItem metadataItem : spec.getGceMetadataItemsList()) {
metadataKeyCounts.add(metadataItem.getKey());
}
for (Multiset.Entry<String> entry : metadataKeyCounts.entrySet()) {
if (entry.getCount() > 1) {
throw new IllegalArgumentException(
String.format("Metadata key '%s' is not unique", entry.getElement()));
}
}
}
private static void addEntryForWord(Multiset<Category> countForCategory,
Map<String, Collection<Category>> result, String word)
{
List<Entry<Category>> cats = new ArrayList<Entry<Category>>();
for (Entry<Category> catToCount : countForCategory.entrySet()) {
cats.add(catToCount);
}
Collections.sort(cats, comparator);
List<Category> cats2 = new ArrayList<Category>();
for (Entry<Category> entry : cats) {
cats2.add(entry.getElement());
}
result.put(word, cats2);
}
/**
* Handling containers that are completed.
*/
private void handleCompleted(List<YarnContainerStatus> completedContainersStatuses) {
Multiset<String> restartRunnables = HashMultiset.create();
for (YarnContainerStatus status : completedContainersStatuses) {
LOG.info("Container {} completed with {}:{}.",
status.getContainerId(), status.getState(), status.getDiagnostics());
runningContainers.handleCompleted(status, restartRunnables);
}
for (Multiset.Entry<String> entry : restartRunnables.entrySet()) {
LOG.info("Re-request container for {} with {} instances.", entry.getElement(), entry.getCount());
runnableContainerRequests.add(createRunnableContainerRequest(entry.getElement(), entry.getCount()));
}
// For all runnables that needs to re-request for containers, update the expected count timestamp
// so that the EventHandler would triggered with the right expiration timestamp.
expectedContainers.updateRequestTime(restartRunnables.elementSet());
}
public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template, int minDF){
Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
String field = template.getField();
Arrays.stream(ids).parallel().forEach(id -> {
Map<Integer,String> termVector = index.getTermVectorFromIndex(field, id);
add(termVector,multiset,template);
});
Multiset<Ngram> filtered = ConcurrentHashMultiset.create();
for (Multiset.Entry entry: multiset.entrySet()){
Ngram ngram = (Ngram)entry.getElement();
int count = entry.getCount();
if (count>=minDF){
filtered.add(ngram,count);
}
}
return filtered;
}
public ScorerContext context(List<String> words) {
Multiset<String> counter = TreeMultiset.create();
counter.addAll(words);
int word_dim = kb.getEmbeddingsSize();
// word_vecs is the concatenation of all word vectors of the word list
float[] word_vecs = new float[counter.size() * word_dim];
IntArrayList word_counts = new IntArrayList();
int n_words = 0;
for(Multiset.Entry<String> entry : counter.entrySet()) {
short[] vector = kb.getWordEmbeddings(entry.getElement());
if (vector != null) {
word_counts.add(entry.getCount());
for (int i=0; i<kb.getEmbeddingsSize(); i++) {
word_vecs[n_words * word_dim + i] = vector[i];
}
n_words += 1;
}
}
word_counts.trim();
return create_context(word_vecs, word_counts.elements());
}
private static Map<String, Collection<Category>> makeDict(final Multiset<String> wordCounts,
final Map<String, Multiset<Category>> wordToCatToCount) {
// Now, save off a sorted list of categories
final Multiset<Category> countsForOtherWords = HashMultiset.create();
final Map<String, Collection<Category>> result = new HashMap<>();
for (final Entry<String> wordAndCount : wordCounts.entrySet()) {
final Multiset<Category> countForCategory = wordToCatToCount.get(wordAndCount.getElement());
if (wordAndCount.getCount() > MIN_OCCURENCES_OF_WORD) {
// Frequent word
addEntryForWord(countForCategory, result, wordAndCount.getElement());
} else {
// Group stats for all rare words together.
for (final Entry<Category> catToCount : countForCategory.entrySet()) {
countsForOtherWords.add(catToCount.getElement(), catToCount.getCount());
}
}
}
addEntryForWord(countsForOtherWords, result, OTHER_WORDS);
return ImmutableMap.copyOf(result);
}
private void checkAttributeNamesForDuplicates(ValueType type, Protoclass protoclass) {
if (!type.attributes.isEmpty()) {
Multiset<String> attributeNames = HashMultiset.create(type.attributes.size());
for (ValueAttribute attribute : type.attributes) {
if (attribute.isGenerateLazy) {
attributeNames.add(attribute.name() + "$lazy"); // making lazy compare in it's own scope
} else {
attributeNames.add(attribute.name());
}
}
List<String> duplicates = Lists.newArrayList();
for (Multiset.Entry<String> entry : attributeNames.entrySet()) {
if (entry.getCount() > 1) {
duplicates.add(entry.getElement().replace("$lazy", ""));
}
}
if (!duplicates.isEmpty()) {
protoclass.report()
.error("Duplicate attribute names %s. You should check if correct @Value.Style applied",
duplicates);
}
}
}
static void writeToChart(final Multiset<Symbol> counts, final File outFile,
final GnuPlotRenderer renderer,
final String chartTitle, final String xAxisLabel, final String yAxisLabel)
throws IOException {
final Axis X_AXIS = Axis.xAxis().setLabel(xAxisLabel).rotateLabels().build();
final Axis Y_AXIS = Axis.yAxis().setLabel(yAxisLabel).build();
final BarChart.Builder chartBuilder =
BarChart.builder().setTitle(chartTitle).setXAxis(X_AXIS).setYAxis(Y_AXIS).hideKey();
for (final Multiset.Entry<Symbol> e : counts.entrySet()) {
chartBuilder
.addBar(BarChart.Bar.builder(e.getCount()).setLabel(e.getElement().toString()).build());
}
renderer.renderTo(chartBuilder.build(), outFile);
}
@Override
public SortedSet<Renaming> calculateScores(
final Multiset<NGram<String>> ngrams,
final Set<String> alternatives, final Scope scope) {
final SortedSet<Renaming> scoreMap = Sets.newTreeSet();
for (final String identifierName : alternatives) {
double score = 0;
for (final Entry<NGram<String>> ngram : ngrams.entrySet()) {
try {
final NGram<String> identNGram = NGram.substituteTokenWith(
ngram.getElement(), WILDCARD_TOKEN, identifierName);
final double ngramScore = scoreNgram(identNGram);
score += DoubleMath.log2(ngramScore) * ngram.getCount();
} catch (final Throwable e) {
LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
}
}
scoreMap.add(new Renaming(identifierName, (addScopePriors(
identifierName, scope) - score) / ngrams.size(), ngrams
.size() / ngramLM.getN(), scope));
}
return scoreMap;
}
/**
* Keeps the cumulated distance for all the common raw super types of the given references.
* Interfaces that are more directly implemented will get a lower total count than more general
* interfaces.
*/
protected void cumulateDistance(final List<LightweightTypeReference> references, Multimap<JvmType, LightweightTypeReference> all,
Multiset<JvmType> cumulatedDistance) {
for(LightweightTypeReference other: references) {
Multiset<JvmType> otherDistance = LinkedHashMultiset.create();
initializeDistance(other, all, otherDistance);
cumulatedDistance.retainAll(otherDistance);
for(Multiset.Entry<JvmType> typeToDistance: otherDistance.entrySet()) {
if (cumulatedDistance.contains(typeToDistance.getElement()))
cumulatedDistance.add(typeToDistance.getElement(), typeToDistance.getCount());
}
}
}
public static <T> ObjectNode toJson(Multiset<T> counts) {
ObjectNode jmap = newObject();
for (Multiset.Entry<T> e : counts.entrySet()) {
jmap.put(e.getElement().toString(), e.getCount());
}
return jmap;
}
private void serializeMultiset(Multiset<String> collection, ByteArrayDataOutput output) {
output.writeInt(collection.elementSet().size());
for (Multiset.Entry<String> entry : collection.entrySet()) {
output.writeUTF(entry.getElement());
output.writeInt(entry.getCount());
}
}
private void check(List<Msg> msgs) {
List<Msg> dbMsgs = new ArrayList<Msg>();
Multiset<Integer> ms = HashMultiset.create();
for (Msg msg : msgs) {
ms.add(msg.getUid());
}
for (Multiset.Entry<Integer> entry : ms.entrySet()) {
dbMsgs.addAll(dao.getMsgs(entry.getElement()));
}
assertThat(dbMsgs, hasSize(msgs.size()));
assertThat(dbMsgs, containsInAnyOrder(msgs.toArray()));
}
/**
* Get a uniformly random element from a Multiset.
*
* @param set
* @return
*/
public static <T> T getRandomElement(final Multiset<T> set) {
final int randPos = RandomUtils.nextInt(checkNotNull(set).size());
T selected = null;
int i = 0;
for (final Multiset.Entry<T> entry : set.entrySet()) {
i += entry.getCount();
if (i > randPos) {
selected = entry.getElement();
break;
}
}
return selected;
}
@Test
public void most_frequent_char_guava() throws IOException {
Multiset<String> frequentCharacters = HashMultiset.create(Splitter
.fixedLength(1).split(sentence.toLowerCase()));
for (Entry<String> item : frequentCharacters.entrySet()) {
System.out.println(item.getElement() + ":" + item.getCount());
}
assertEquals(7, frequentCharacters.count("e"), 0);
}
/**
* Get a uniformly random element from a Multiset.
*
* @param set
* @return
*/
public static <T> T getRandomElement(final Multiset<T> set) {
final int randPos = RandomUtils.nextInt(checkNotNull(set).size());
T selected = null;
int i = 0;
for (final Multiset.Entry<T> entry : set.entrySet()) {
i += entry.getCount();
if (i > randPos) {
selected = entry.getElement();
break;
}
}
return selected;
}
static Set<Ngram> gather(Config config, ESIndex index,
String[] ids, Logger logger) throws Exception{
File metaDataFolder = new File(config.getString("output.folder"),"meta_data");
metaDataFolder.mkdirs();
Multiset<Ngram> allNgrams = ConcurrentHashMultiset.create();
List<Integer> ns = config.getIntegers("train.feature.ngram.n");
double minDf = config.getDouble("train.feature.ngram.minDf");
int minDFrequency = (int)Math.floor(ids.length*minDf);
List<String> fields = config.getStrings("train.feature.ngram.extractionFields");
List<Integer> slops = config.getIntegers("train.feature.ngram.slop");
boolean inorder = config.getBoolean("train.feature.ngram.inOrder");
boolean allowDuplicates = config.getBoolean("train.feature.ngram.allowDuplicateWords");
for (String field: fields){
for (int n: ns){
for (int slop:slops){
logger.info("gathering "+n+ "-grams from field "+field+" with slop "+slop+" and minDf "+minDf+ ", (actual frequency threshold = "+minDFrequency+")");
NgramTemplate template = new NgramTemplate(field,n,slop);
Multiset<Ngram> ngrams = NgramEnumerator.gatherNgram(index, ids, template, minDFrequency);
logger.info("gathered "+ngrams.elementSet().size()+ " ngrams");
int newCounter = 0;
for (Multiset.Entry<Ngram> entry: ngrams.entrySet()){
Ngram ngram = entry.getElement();
ngram.setInOrder(inorder);
int count = entry.getCount();
if (interesting(allNgrams,ngram,count)){
if (allowDuplicates) {
allNgrams.add(ngram, count);
newCounter += 1;
}else{
if (!ngram.hasDuplicate()){
allNgrams.add(ngram, count);
newCounter += 1;
}
}
}
}
logger.info(newCounter+" are really new");
}
}
}
logger.info("there are "+allNgrams.elementSet().size()+" ngrams in total");
// BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(metaDataFolder,"all_ngrams.txt")));
// for (Multiset.Entry<Ngram> ngramEntry: allNgrams.entrySet()){
// bufferedWriter.write(ngramEntry.getElement().toString());
// bufferedWriter.write("\t");
// bufferedWriter.write(""+ngramEntry.getCount());
// bufferedWriter.newLine();
// }
//
// bufferedWriter.close();
//
// //for serialization
// Set<Ngram> uniques = new HashSet<>();
// uniques.addAll(allNgrams.elementSet());
// Serialization.serialize(uniques, new File(metaDataFolder, "all_ngrams.ser"));
return allNgrams.elementSet();
}
@Override
public Set<com.google.common.collect.Multiset.Entry<E>> entrySet() {
final Multiset<E> multiset = get();
return (multiset == null) ? EMPTY_MULTISET.entrySet()
: multiset.entrySet();
}
/**
* Finds the set of categories used for each word in a corpus
*/
public static Map<String, Collection<Category>> makeDict(Iterable<InputToParser> input) {
Multiset<String> wordCounts = HashMultiset.create();
Map<String, Multiset<Category>> wordToCatToCount = new HashMap<String, Multiset<Category>>();
// First, count how many times each word occurs with each category
for (InputToParser sentence : input) {
for (int i=0; i<sentence.getInputWords().size(); i++) {
String word = sentence.getInputWords().get(i).word;
Category cat = sentence.getGoldCategories().get(i);
wordCounts.add(word);
if (!wordToCatToCount.containsKey(word)) {
Multiset<Category> tmp = HashMultiset.create();
wordToCatToCount.put(word, tmp);
}
wordToCatToCount.get(word).add(cat);
}
}
// Now, save off a sorted list of categories
Multiset<Category> countsForOtherWords = HashMultiset.create();
Map<String, Collection<Category>> result = new HashMap<String, Collection<Category>>();
for (Entry<String> wordAndCount : wordCounts.entrySet()) {
Multiset<Category> countForCategory = wordToCatToCount.get(wordAndCount.getElement());
if (wordAndCount.getCount() > MIN_OCCURENCES_OF_WORD) {
// Frequent word
addEntryForWord(countForCategory, result, wordAndCount.getElement());
} else {
// Group stats for all rare words together.
for (Entry<Category> catToCount : countForCategory.entrySet()) {
countsForOtherWords.add(catToCount.getElement(), catToCount.getCount());
}
}
}
addEntryForWord(countsForOtherWords, result, OTHER_WORDS);
return ImmutableMap.copyOf(result);
}