下面列出了怎么用org.apache.lucene.search.LeafCollector的API类实例代码及写法,或者点击链接到github查看源代码。
private void collectHit(LeafCollector collector, DocsAndCost[] dims) throws IOException {
//if (DEBUG) {
// System.out.println(" hit");
//}
collector.collect(collectDocID);
if (drillDownCollector != null) {
drillDownLeafCollector.collect(collectDocID);
}
// TODO: we could "fix" faceting of the sideways counts
// to do this "union" (of the drill down hits) in the
// end instead:
// Tally sideways counts:
for (DocsAndCost dim : dims) {
dim.sidewaysLeafCollector.collect(collectDocID);
}
}
static GenericTermsCollector wrap(final TermsCollector<?> collector) {
return new GenericTermsCollector() {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
return collector.getLeafCollector(context);
}
@Override
public org.apache.lucene.search.ScoreMode scoreMode() {
return collector.scoreMode();
}
@Override
public BytesRefHash getCollectedTerms() {
return collector.getCollectorTerms();
}
@Override
public float[] getScoresPerTerm() {
throw new UnsupportedOperationException("scores are not available for "+collector);
}
};
}
private Collector getInsanityWrapper(final String field, Collector collector) {
SchemaField sf = searcher.getSchema().getFieldOrNull(field);
if (sf != null && !sf.hasDocValues() && !sf.multiValued() && sf.getType().getNumberType() != null) {
// it's a single-valued numeric field: we must currently create insanity :(
// there isn't a GroupedFacetCollector that works on numerics right now...
return new FilterCollector(collector) {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
LeafReader insane = Insanity.wrapInsanity(context.reader(), field);
return in.getLeafCollector(insane.getContext());
}
};
} else {
return collector;
}
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final FixedBitSet set = new FixedBitSet(context.reader().maxDoc());
this.sets[context.ord] = set;
return new LeafCollector() {
@Override
public void setScorer(Scorable scorer) throws IOException {}
@Override
public void collect(int docId) throws IOException{
++totalHits;
set.set(docId);
}
};
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
Sort segmentSort = context.reader().getMetaData().getSort();
if (segmentSort != null && canEarlyTerminate(sort, segmentSort) == false) {
throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + segmentSort);
}
if (segmentSort != null) {
// segment is sorted, can early-terminate
return new FilterLeafCollector(super.getLeafCollector(context)) {
private int numCollected;
@Override
public void collect(int doc) throws IOException {
super.collect(doc);
if (++numCollected >= numDocsToCollect) {
terminatedEarly.set(true);
throw new CollectionTerminatedException();
}
}
};
} else {
return super.getLeafCollector(context);
}
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
throws IOException {
prevReaderCumulativeSize += currentReaderSize; // not current any more
currentReaderSize = context.reader().maxDoc() - 1;
return new FilterLeafCollector(super.getLeafCollector(context)) {
@Override
public void collect(int doc) throws IOException {
super.collect(doc);
numCollected++;
if (maxDocsToCollect <= numCollected) {
throw new EarlyTerminatingCollectorException
(numCollected, prevReaderCumulativeSize + (doc + 1));
}
}
};
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final int base = context.docBase;
final NumericDocValues values = DocValues.getNumeric(context.reader(), "sort_i");
return new LeafCollector() {
@Override
public void setScorer(Scorable scorer) throws IOException {}
public void collect(int doc) throws IOException {
long value;
if (values.advanceExact(doc)) {
value = values.longValue();
} else {
value = 0;
}
list.add(new ScoreDoc(doc+base, (float) value));
}
};
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext lrc) throws IOException {
LeafReader reader = lrc.reader( );
for (int i = 0; i < fields.length; i ++) {
if (groups[i][0] >= 1) {
if (groups[i][1] == 1) {
values[i] = reader.getSortedNumericDocValues("%"+fields[i]);
} else {
values[i] = reader. getNumericDocValues("#"+fields[i]);
}
} else {
if (groups[i][1] == 1) {
values[i] = reader.getSortedSetDocValues("%"+fields[i]);
} else {
values[i] = reader. getSortedDocValues("#"+fields[i]);
}
}
}
return this;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final SortedDocValues values = globalIfd.load(context).getOrdinalsValues(parentType);
if (values == null) {
throw new CollectionTerminatedException();
}
return new LeafCollector() {
Scorer scorer;
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
long globalOrdinal = values.getOrd(doc);
if (globalOrdinal != SortedSetDocValues.NO_MORE_ORDS) {
long parentIdx = parentIdxs.add(globalOrdinal);
if (parentIdx >= 0) {
scores = bigArrays.grow(scores, parentIdx + 1);
scores.set(parentIdx, scorer.score());
} else {
assert false : "parent id should only match once, since there can only be one parent doc";
}
}
}
};
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final Scorer filterScorer = filter.scorer(context);
final LeafCollector in = collector.getLeafCollector(context);
final Bits bits = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorer);
return new FilterLeafCollector(in) {
@Override
public void collect(int doc) throws IOException {
if (bits.get(doc)) {
in.collect(doc);
}
}
};
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
throws IOException {
set = new FixedBitSet(context.reader().maxDoc());
sets.add(set);
return this;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
throws IOException {
set = new FixedBitSet(context.reader().maxDoc());
sets.add(set);
return this;
}
@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
if (!(collector instanceof TopSuggestDocsCollector)) {
throw new IllegalArgumentException("collector is not of type TopSuggestDocsCollector");
}
suggester.lookup(this, acceptDocs, ((TopSuggestDocsCollector) collector));
return max;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
if (ordinalMap != null) {
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
} else {
return new SegmentOrdinalCollector(docTermOrds);
}
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
if (ordinalMap != null) {
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
} else {
return new SegmentOrdinalCollector(docTermOrds);
}
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
throws IOException {
final int docBase = context.docBase;
return new FilterLeafCollector(super.getLeafCollector(context)) {
@Override
public void collect(int doc) throws IOException {
matches++;
if (filter.exists(doc + docBase)) {
super.collect(doc);
}
}
};
}
public static void collectSortedDocSet(DocSet docs, IndexReader reader, Collector collector) throws IOException {
// TODO add SortedDocSet sub-interface and take that.
// TODO collectUnsortedDocSet: iterate segment, then all docSet per segment.
final List<LeafReaderContext> leaves = reader.leaves();
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
int segBase = 0;
int segMax;
int adjustedMax = 0;
LeafReaderContext ctx = null;
LeafCollector leafCollector = null;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (doc >= adjustedMax) {
do {
ctx = ctxIt.next();
segBase = ctx.docBase;
segMax = ctx.reader().maxDoc();
adjustedMax = segBase + segMax;
} while (doc >= adjustedMax);
leafCollector = collector.getLeafCollector(ctx);
}
if (doc < segBase) {
throw new IllegalStateException("algorithm expects sorted DocSet but wasn't: " + docs.getClass());
}
leafCollector.collect(doc - segBase); // per-seg collectors
}
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext lrc) throws IOException {
LeafReader reader = lrc.reader( );
for (int i = 0; i < fields.length; i ++) {
if (groups[i][1] == 1) {
values[i] = reader.getSortedNumericDocValues("%"+fields[i]);
} else {
values[i] = reader. getNumericDocValues("#"+fields[i]);
}
}
return this;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) {
return new LeafCollector() {
@Override
public void setScorer(Scorable scorer) throws IOException {
// we don't use scoring, so this is NO-OP
}
@Override
public void collect(int doc) throws IOException {
_docIds.add(_docIdTranslator.getPinotDocId(doc));
}
};
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) {
return new LeafCollector() {
@Override
public void setScorer(Scorable scorer) throws IOException {
// we don't use scoring, so this is NO-OP
}
@Override
public void collect(int doc) throws IOException {
_docIds.add(doc);
}
};
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
return collector.getLeafCollector(context);
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
throws IOException {
return mainCollector.getLeafCollector(context);
}
public LeafCollector getLeafCollector(LeafReaderContext context)
{
this.docBase = context.docBase;
return this;
}
/** Returns the grouped results. Returns null if the
* number of groups collected is <= groupOffset.
*
* <p><b>NOTE</b>: This collector is unable to compute
* the groupValue per group so it will always be null.
* This is normally not a problem, as you can obtain the
* value just like you obtain other values for each
* matching document (eg, via stored fields, via
* DocValues, etc.)
*
* @param withinGroupSort The {@link Sort} used to sort
* documents within each group.
* @param groupOffset Which group to start from
* @param withinGroupOffset Which document to start from
* within each group
* @param maxDocsPerGroup How many top documents to keep
* within each group.
*/
public TopGroups<?> getTopGroups(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup) throws IOException {
//if (queueFull) {
//System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups);
//}
if (subDocUpto != 0) {
processGroup();
}
if (groupOffset >= groupQueue.size()) {
return null;
}
int totalGroupedHitCount = 0;
final ScoreAndDoc fakeScorer = new ScoreAndDoc();
float maxScore = Float.MIN_VALUE;
@SuppressWarnings({"unchecked","rawtypes"})
final GroupDocs<Object>[] groups = new GroupDocs[groupQueue.size() - groupOffset];
for(int downTo=groupQueue.size()-groupOffset-1;downTo>=0;downTo--) {
final OneGroup og = groupQueue.pop();
// At this point we hold all docs w/ in each group,
// unsorted; we now sort them:
final TopDocsCollector<?> collector;
if (withinGroupSort.equals(Sort.RELEVANCE)) {
// Sort by score
if (!needsScores) {
throw new IllegalArgumentException("cannot sort by relevance within group: needsScores=false");
}
collector = TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE);
} else {
// Sort by fields
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
}
float groupMaxScore = needsScores ? Float.NEGATIVE_INFINITY : Float.NaN;
LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
leafCollector.setScorer(fakeScorer);
for(int docIDX=0;docIDX<og.count;docIDX++) {
final int doc = og.docs[docIDX];
fakeScorer.doc = doc;
if (needsScores) {
fakeScorer.score = og.scores[docIDX];
groupMaxScore = Math.max(groupMaxScore, fakeScorer.score);
}
leafCollector.collect(doc);
}
totalGroupedHitCount += og.count;
final Object[] groupSortValues;
groupSortValues = new Comparable<?>[comparators.length];
for(int sortFieldIDX=0;sortFieldIDX<comparators.length;sortFieldIDX++) {
groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.comparatorSlot);
}
final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup);
// TODO: we could aggregate scores across children
// by Sum/Avg instead of passing NaN:
groups[downTo] = new GroupDocs<>(Float.NaN,
groupMaxScore,
new TotalHits(og.count, TotalHits.Relation.EQUAL_TO),
topDocs.scoreDocs,
null,
groupSortValues);
maxScore = Math.max(maxScore, groupMaxScore);
}
/*
while (groupQueue.size() != 0) {
final OneGroup og = groupQueue.pop();
//System.out.println(" leftover: og ord=" + og.groupOrd + " count=" + og.count);
totalGroupedHitCount += og.count;
}
*/
return new TopGroups<>(new TopGroups<>(groupSort.getSort(),
withinGroupSort.getSort(),
totalHitCount, totalGroupedHitCount, groups, maxScore),
totalGroupCount);
}
/** Used when base query is highly constraining vs the
* drilldowns, or when the docs must be scored at once
* (i.e., like BooleanScorer2, not BooleanScorer). In
* this case we just .next() on base and .advance() on
* the dim filters. */
private void doQueryFirstScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims) throws IOException {
//if (DEBUG) {
// System.out.println(" doQueryFirstScoring");
//}
int docID = baseScorer.docID();
nextDoc: while (docID != PostingsEnum.NO_MORE_DOCS) {
if (acceptDocs != null && acceptDocs.get(docID) == false) {
docID = baseIterator.nextDoc();
continue;
}
LeafCollector failedCollector = null;
for (DocsAndCost dim : dims) {
// TODO: should we sort this 2nd dimension of
// docsEnums from most frequent to least?
if (dim.approximation.docID() < docID) {
dim.approximation.advance(docID);
}
boolean matches = false;
if (dim.approximation.docID() == docID) {
if (dim.twoPhase == null) {
matches = true;
} else {
matches = dim.twoPhase.matches();
}
}
if (matches == false) {
if (failedCollector != null) {
// More than one dim fails on this document, so
// it's neither a hit nor a near-miss; move to
// next doc:
docID = baseIterator.nextDoc();
continue nextDoc;
} else {
failedCollector = dim.sidewaysLeafCollector;
}
}
}
collectDocID = docID;
// TODO: we could score on demand instead since we are
// daat here:
collectScore = baseScorer.score();
if (failedCollector == null) {
// Hit passed all filters, so it's "real":
collectHit(collector, dims);
} else {
// Hit missed exactly one filter:
collectNearMiss(failedCollector);
}
docID = baseIterator.nextDoc();
}
}
private void collectNearMiss(LeafCollector sidewaysCollector) throws IOException {
//if (DEBUG) {
// System.out.println(" missingDim=" + dim);
//}
sidewaysCollector.collect(collectDocID);
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
return mainCollector.getLeafCollector(context);
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
raiseIfKilled.run();
return new KillableLeafCollector(delegate.getLeafCollector(context), raiseIfKilled);
}
public KillableLeafCollector(LeafCollector delegate, Runnable raiseIfKilled) {
this.delegate = delegate;
this.raiseIfKilled = raiseIfKilled;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) {
return new ReservoirLeafCollector(reservoir, readerIdx, context);
}