下面列出了怎么用org.apache.lucene.search.TopFieldCollector的API类实例代码及写法,或者点击链接到github查看源代码。
public ReRankCollector(int reRankDocs,
int length,
Query reRankQuery,
double reRankWeight,
QueryCommand cmd,
IndexSearcher searcher,
Map<BytesRef, Integer> boostedPriority,
boolean scale) throws IOException {
super(null);
this.reRankQuery = reRankQuery;
this.reRankDocs = reRankDocs;
this.length = length;
this.boostedPriority = boostedPriority;
this.scale = scale;
Sort sort = cmd.getSort();
if(sort == null) {
this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), null);
} else {
sort = sort.rewrite(searcher);
this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), null, false, true, true);
}
this.searcher = searcher;
this.reRankWeight = reRankWeight;
}
@Override
protected Collector createFirstPassCollector() throws IOException {
DocSet groupFilt = searcher.getDocSet(query);
int groupDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
Collector subCollector;
if (withinGroupSort == null || withinGroupSort.equals(Sort.RELEVANCE)) {
subCollector = topCollector = TopScoreDocCollector.create(groupDocsToCollect, Integer.MAX_VALUE);
} else {
topCollector = TopFieldCollector.create(searcher.weightSort(withinGroupSort), groupDocsToCollect, Integer.MAX_VALUE);
if (needScores) {
maxScoreCollector = new MaxScoreCollector();
subCollector = MultiCollector.wrap(topCollector, maxScoreCollector);
} else {
subCollector = topCollector;
}
}
collector = new FilterCollector(groupFilt, subCollector);
return collector;
}
@Override
protected void finish() throws IOException {
TopDocs topDocs = topCollector.topDocs();
float maxScore;
if (withinGroupSort == null || withinGroupSort.equals(Sort.RELEVANCE)) {
maxScore = topDocs.scoreDocs.length == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
} else if (needScores) {
// use top-level query to populate the scores
TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, Grouping.this.query);
maxScore = maxScoreCollector.getMaxScore();
} else {
maxScore = Float.NaN;
}
GroupDocs<String> groupDocs = new GroupDocs<>(Float.NaN, maxScore, topDocs.totalHits, topDocs.scoreDocs, query.toString(), null);
if (main) {
mainResult = getDocList(groupDocs);
} else {
NamedList rsp = commonResponse();
addDocList(rsp, groupDocs);
}
}
@Override
@SuppressWarnings({"unchecked", "rawtypes"})
public void postCollect(IndexSearcher searcher) throws IOException {
if (firstPhaseGroups.isEmpty()) {
topGroups = new TopGroups<>(groupSort.getSort(), withinGroupSort.getSort(), 0, 0, new GroupDocs[0], Float.NaN);
return;
}
FieldType fieldType = field.getType();
if (fieldType.getNumberType() != null) {
topGroups = GroupConverter.fromMutable(field, secondPassCollector.getTopGroups(0));
} else {
topGroups = secondPassCollector.getTopGroups(0);
}
if (needScores) {
for (GroupDocs<?> group : topGroups.groups) {
TopFieldCollector.populateScores(group.scoreDocs, searcher, query);
}
}
}
@Override
public List<Collector> create() throws IOException {
Collector subCollector;
if (sort == null || sort.equals(Sort.RELEVANCE)) {
subCollector = topDocsCollector = TopScoreDocCollector.create(docsToCollect, Integer.MAX_VALUE);
} else {
topDocsCollector = TopFieldCollector.create(sort, docsToCollect, Integer.MAX_VALUE);
if (needScores) {
maxScoreCollector = new MaxScoreCollector();
subCollector = MultiCollector.wrap(topDocsCollector, maxScoreCollector);
} else {
subCollector = topDocsCollector;
}
}
filterCollector = new FilterCollector(docSet, subCollector);
return Arrays.asList((Collector) filterCollector);
}
@SuppressWarnings({"unchecked"})
public ReRankCollector(int reRankDocs,
int length,
Rescorer reRankQueryRescorer,
QueryCommand cmd,
IndexSearcher searcher,
Set<BytesRef> boostedPriority) throws IOException {
super(null);
this.reRankDocs = reRankDocs;
this.length = length;
this.boostedPriority = boostedPriority;
this.query = cmd.getQuery();
Sort sort = cmd.getSort();
if(sort == null) {
this.sort = null;
this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), cmd.getMinExactCount());
} else {
this.sort = sort = sort.rewrite(searcher);
//scores are needed for Rescorer (regardless of whether sort needs it)
this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), cmd.getMinExactCount());
}
this.searcher = searcher;
this.reRankQueryRescorer = reRankQueryRescorer;
}
private KeyIterable<ShardId, Row> initialSearch() throws IOException {
if (batchSize > OPTIMIZE_BATCH_SIZE_THRESHOLD && !batchSizeReduced) {
batchSizeReduced = true;
// + 1 because TopFieldCollector doesn't work with size=0 and we need to set the `exhausted` flag properly.
batchSize = Math.min(batchSize, searcher.count(query) + 1);
}
for (LuceneCollectorExpression<?> expression : expressions) {
expression.startCollect(collectorContext);
expression.setScorer(scorer);
}
ramAccounting.addBytes(batchSize * FIELD_DOC_SIZE);
TopFieldCollector topFieldCollector = TopFieldCollector.create(
sort,
batchSize,
0 // do not process any hits
);
return doSearch(topFieldCollector, minScore, query);
}
private KeyIterable<ShardId, Row> searchMore() throws IOException {
if (exhausted()) {
LOGGER.trace("searchMore but EXHAUSTED");
return empty();
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("searchMore from [{}]", lastDoc);
}
ramAccounting.addBytes(batchSize * FIELD_DOC_SIZE);
TopFieldCollector topFieldCollector = TopFieldCollector.create(
sort,
batchSize,
lastDoc,
0 // do not process any hits
);
return doSearch(topFieldCollector, minScore, query(lastDoc));
}
TopDocsReducer(Sort withinGroupSort,
int maxDocsPerGroup, boolean getMaxScores) {
this.needsScores = getMaxScores || withinGroupSort.needsScores();
if (withinGroupSort == Sort.RELEVANCE) {
supplier = () -> new TopDocsAndMaxScoreCollector(true, TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE), null);
} else {
supplier = () -> {
TopFieldCollector topDocsCollector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
MaxScoreCollector maxScoreCollector = getMaxScores ? new MaxScoreCollector() : null;
return new TopDocsAndMaxScoreCollector(false, topDocsCollector, maxScoreCollector);
};
}
}
protected void populateScoresIfNecessary() throws IOException {
if (needScores) {
for (GroupDocs<?> groups : result.groups) {
TopFieldCollector.populateScores(groups.scoreDocs, searcher, query);
}
}
}
@Override
public void postCollect(IndexSearcher searcher) throws IOException {
topDocs = topDocsCollector.topDocs();
if (needScores) {
// use mainQuery to populate the scores
TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, mainQuery);
}
}
/** test for both constant score and boolean query, the other tests only use the constant score mode */
private void testRange(int precisionStep) throws Exception {
String field="field"+precisionStep;
int count=3000;
int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
LegacyNumericRangeQuery<Integer> q = LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
for (byte i=0; i<2; i++) {
TopFieldCollector collector = TopFieldCollector.create(Sort.INDEXORDER, noDocs, Integer.MAX_VALUE);
String type;
switch (i) {
case 0:
type = " (constant score filter rewrite)";
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
break;
case 1:
type = " (constant score boolean rewrite)";
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
break;
default:
return;
}
searcher.search(q, collector);
TopDocs topDocs = collector.topDocs();
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
assertEquals("Score doc count"+type, count, sd.length );
Document doc=searcher.doc(sd[0].doc);
assertEquals("First doc"+type, 2*distance+startOffset, doc.getField(field).numericValue().intValue());
doc=searcher.doc(sd[sd.length-1].doc);
assertEquals("Last doc"+type, (1+count)*distance+startOffset, doc.getField(field).numericValue().intValue());
}
}
private void testRightOpenRange(int precisionStep) throws Exception {
String field="field"+precisionStep;
int count=3000;
int lower=(count-1)*distance + (distance/3) +startOffset;
LegacyNumericRangeQuery<Integer> q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, null, true, true);
TopFieldCollector collector = TopFieldCollector.create(Sort.INDEXORDER, noDocs, Integer.MAX_VALUE);
searcher.search(q, collector);
TopDocs topDocs = collector.topDocs();
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
assertEquals("Score doc count", noDocs-count, sd.length );
Document doc=searcher.doc(sd[0].doc);
assertEquals("First doc", count*distance+startOffset, doc.getField(field).numericValue().intValue());
doc=searcher.doc(sd[sd.length-1].doc);
assertEquals("Last doc", (noDocs-1)*distance+startOffset, doc.getField(field).numericValue().intValue());
q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, null, true, false);
collector = TopFieldCollector.create(Sort.INDEXORDER, noDocs, Integer.MAX_VALUE);
searcher.search(q, collector);
topDocs = collector.topDocs();
sd = topDocs.scoreDocs;
assertNotNull(sd);
assertEquals("Score doc count", noDocs-count, sd.length );
doc=searcher.doc(sd[0].doc);
assertEquals("First doc", count*distance+startOffset, doc.getField(field).numericValue().intValue() );
doc=searcher.doc(sd[sd.length-1].doc);
assertEquals("Last doc", (noDocs-1)*distance+startOffset, doc.getField(field).numericValue().intValue() );
}
/** test for constant score + boolean query + filter, the other tests only use the constant score mode */
private void testRange(int precisionStep) throws Exception {
String field="field"+precisionStep;
int count=3000;
long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
LegacyNumericRangeQuery<Long> q = LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
for (byte i=0; i<2; i++) {
TopFieldCollector collector = TopFieldCollector.create(Sort.INDEXORDER, noDocs, Integer.MAX_VALUE);
String type;
switch (i) {
case 0:
type = " (constant score filter rewrite)";
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
break;
case 1:
type = " (constant score boolean rewrite)";
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
break;
default:
return;
}
searcher.search(q, collector);
TopDocs topDocs = collector.topDocs();
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
assertEquals("Score doc count"+type, count, sd.length );
Document doc=searcher.doc(sd[0].doc);
assertEquals("First doc"+type, 2*distance+startOffset, doc.getField(field).numericValue().longValue() );
doc=searcher.doc(sd[sd.length-1].doc);
assertEquals("Last doc"+type, (1+count)*distance+startOffset, doc.getField(field).numericValue().longValue() );
}
}
@Override
public Collector newCollector() throws IOException {
TopFieldCollector collector = TopFieldCollector.create(_sort, _numHitsToCollect, _after, true, true, false, true);
Collector col = new StopExecutionCollector(collector, _running);
if (_runSlow) {
return new SlowCollector(col);
}
return col;
}
private TopFieldCollector getTopFieldCollector(Collector collector) {
if (collector instanceof SlowCollector) {
SlowCollector slowCollector = (SlowCollector) collector;
return getTopFieldCollector(slowCollector.getCollector());
} else if (collector instanceof StopExecutionCollector) {
StopExecutionCollector stopExecutionCollector = (StopExecutionCollector) collector;
return getTopFieldCollector(stopExecutionCollector.getCollector());
} else if (collector instanceof TopFieldCollector) {
TopFieldCollector topFieldCollector = (TopFieldCollector) collector;
return topFieldCollector;
} else {
throw new RuntimeException("Collector type [" + collector + "] not supported.");
}
}
private static int runQuery(IndexReader indexReader, int count, Query q) throws IOException {
long start = System.currentTimeMillis();
IndexSearcher searcher = new IndexSearcher(indexReader);
Sort sort = new Sort();
sort.setSort(new SortedSetSortField("category", false));
TopFieldCollector collector = TopFieldCollector.create(sort, count, null, true, true, true);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
int totalHits = collector.getTotalHits();
@SuppressWarnings("unused") long searchTime = System.currentTimeMillis() - start;
start = System.currentTimeMillis();
List<String> ids = new ArrayList<>();
for (ScoreDoc hit : hits) {
int docId = hit.doc;
Document d = searcher.doc(docId);
ids.add(d.get("uid"));
}
@SuppressWarnings("unused") long fetchTime = System.currentTimeMillis() - start;
return totalHits;
}
private KeyIterable<ShardId, Row> doSearch(TopFieldCollector topFieldCollector,
Float minScore,
Query query) throws IOException {
Collector collector = topFieldCollector;
if (minScore != null) {
collector = new MinimumScoreCollector(collector, minScore);
}
collector = new KillableCollector(collector, this::raiseIfKilled);
searcher.search(query, collector);
ScoreDoc[] scoreDocs = topFieldCollector.topDocs().scoreDocs;
if (doDocsScores) {
TopFieldCollector.populateScores(scoreDocs, searcher, query);
}
return scoreDocToIterable(scoreDocs);
}
@Override
public List<Transaction> getCascades(int num) throws IOException
{
RefCounted<SolrIndexSearcher> refCounted = null;
try
{
refCounted = this.core.getSearcher();
SolrIndexSearcher searcher = refCounted.get();
Collector collector;
TopFieldCollector topFieldCollector = TopFieldCollector.create(new Sort(new SortField(FIELD_TXID, SortField.Type.LONG)),
num,
null,
false,
false,
false);
collector = topFieldCollector;
LegacyNumericRangeQuery q = LegacyNumericRangeQuery.newIntRange(FIELD_CASCADE_FLAG, 1, 1, true, true);
DelegatingCollector delegatingCollector = new TxnCacheFilter(cleanCascadeCache);
delegatingCollector.setLastDelegate(collector);
collector = delegatingCollector;
searcher.search(q, collector);
ScoreDoc[] scoreDocs = topFieldCollector.topDocs().scoreDocs;
Set<String> fields = new HashSet<>();
fields.add(FIELD_S_TXID);
fields.add(FIELD_S_TXCOMMITTIME);
List<Transaction> transactions = new ArrayList<>(scoreDocs.length);
for(ScoreDoc scoreDoc : scoreDocs)
{
Transaction transaction = new Transaction();
Document doc = searcher.doc(scoreDoc.doc, fields);
IndexableField txID = doc.getField(FIELD_S_TXID);
long txnID = txID.numericValue().longValue();
cleanCascadeCache.put(txnID, null);
transaction.setId(txnID);
IndexableField txnCommitTime = doc.getField(FIELD_S_TXCOMMITTIME);
transaction.setCommitTimeMs(txnCommitTime.numericValue().longValue());
transactions.add(transaction);
}
return transactions;
}
finally
{
ofNullable(refCounted).ifPresent(RefCounted::decref);
}
}
/** Returns the grouped results. Returns null if the
* number of groups collected is <= groupOffset.
*
* <p><b>NOTE</b>: This collector is unable to compute
* the groupValue per group so it will always be null.
* This is normally not a problem, as you can obtain the
* value just like you obtain other values for each
* matching document (eg, via stored fields, via
* DocValues, etc.)
*
* @param withinGroupSort The {@link Sort} used to sort
* documents within each group.
* @param groupOffset Which group to start from
* @param withinGroupOffset Which document to start from
* within each group
* @param maxDocsPerGroup How many top documents to keep
* within each group.
*/
public TopGroups<?> getTopGroups(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup) throws IOException {
//if (queueFull) {
//System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups);
//}
if (subDocUpto != 0) {
processGroup();
}
if (groupOffset >= groupQueue.size()) {
return null;
}
int totalGroupedHitCount = 0;
final ScoreAndDoc fakeScorer = new ScoreAndDoc();
float maxScore = Float.MIN_VALUE;
@SuppressWarnings({"unchecked","rawtypes"})
final GroupDocs<Object>[] groups = new GroupDocs[groupQueue.size() - groupOffset];
for(int downTo=groupQueue.size()-groupOffset-1;downTo>=0;downTo--) {
final OneGroup og = groupQueue.pop();
// At this point we hold all docs w/ in each group,
// unsorted; we now sort them:
final TopDocsCollector<?> collector;
if (withinGroupSort.equals(Sort.RELEVANCE)) {
// Sort by score
if (!needsScores) {
throw new IllegalArgumentException("cannot sort by relevance within group: needsScores=false");
}
collector = TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE);
} else {
// Sort by fields
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
}
float groupMaxScore = needsScores ? Float.NEGATIVE_INFINITY : Float.NaN;
LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
leafCollector.setScorer(fakeScorer);
for(int docIDX=0;docIDX<og.count;docIDX++) {
final int doc = og.docs[docIDX];
fakeScorer.doc = doc;
if (needsScores) {
fakeScorer.score = og.scores[docIDX];
groupMaxScore = Math.max(groupMaxScore, fakeScorer.score);
}
leafCollector.collect(doc);
}
totalGroupedHitCount += og.count;
final Object[] groupSortValues;
groupSortValues = new Comparable<?>[comparators.length];
for(int sortFieldIDX=0;sortFieldIDX<comparators.length;sortFieldIDX++) {
groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.comparatorSlot);
}
final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup);
// TODO: we could aggregate scores across children
// by Sum/Avg instead of passing NaN:
groups[downTo] = new GroupDocs<>(Float.NaN,
groupMaxScore,
new TotalHits(og.count, TotalHits.Relation.EQUAL_TO),
topDocs.scoreDocs,
null,
groupSortValues);
maxScore = Math.max(maxScore, groupMaxScore);
}
/*
while (groupQueue.size() != 0) {
final OneGroup og = groupQueue.pop();
//System.out.println(" leftover: og ord=" + og.groupOrd + " count=" + og.count);
totalGroupedHitCount += og.count;
}
*/
return new TopGroups<>(new TopGroups<>(groupSort.getSort(),
withinGroupSort.getSort(),
totalHitCount, totalGroupedHitCount, groups, maxScore),
totalGroupCount);
}