下面列出了org.apache.lucene.search.DocIdSet#iterator ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, null);
// we forcefully apply live docs here so that deleted children don't give matching parents
childrenDocSet = BitsFilteredDocIdSet.wrap(childrenDocSet, context.reader().getLiveDocs());
if (Lucene.isEmpty(childrenDocSet)) {
return null;
}
final DocIdSetIterator childIterator = childrenDocSet.iterator();
if (childIterator == null) {
return null;
}
SortedDocValues bytesValues = globalIfd.load(context).getOrdinalsValues(parentType);
if (bytesValues == null) {
return null;
}
return new ChildScorer(this, parentIdxs, scores, childIterator, bytesValues);
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSet childrenDocIdSet = childrenFilter.getDocIdSet(context, null);
if (Lucene.isEmpty(childrenDocIdSet)) {
return null;
}
SortedDocValues globalValues = globalIfd.load(context).getOrdinalsValues(parentType);
if (globalValues != null) {
// we forcefully apply live docs here so that deleted children don't give matching parents
childrenDocIdSet = BitsFilteredDocIdSet.wrap(childrenDocIdSet, context.reader().getLiveDocs());
DocIdSetIterator innerIterator = childrenDocIdSet.iterator();
if (innerIterator != null) {
ChildrenDocIdIterator childrenDocIdIterator = new ChildrenDocIdIterator(
innerIterator, parentOrds, globalValues
);
return ConstantScorer.create(childrenDocIdIterator, this, queryWeight);
}
}
return null;
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSet docSet = getDocIdSet(context);
if (docSet == null) {
return null;
}
DocIdSetIterator disi = docSet.iterator();
if (disi == null) {
return null;
}
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
};
}
private void testOr(float load) throws IOException {
final int numBits = 1 + random().nextInt(100000);
BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits); // empty
T set2 = copyOf(set1, numBits);
final int iterations = atLeast(10);
for (int iter = 0; iter < iterations; ++iter) {
DocIdSet otherSet = randomCopy(new JavaUtilBitSet(randomSet(numBits, load), numBits), numBits);
DocIdSetIterator otherIterator = otherSet.iterator();
if (otherIterator != null) {
set1.or(otherIterator);
set2.or(otherSet.iterator());
assertEquals(set1, set2, numBits);
}
}
}
private void assertEquals(DocIdSet d1, DocIdSet d2) throws IOException {
if (d1 == null) {
if (d2 != null) {
assertEquals(DocIdSetIterator.NO_MORE_DOCS, d2.iterator().nextDoc());
}
} else if (d2 == null) {
assertEquals(DocIdSetIterator.NO_MORE_DOCS, d1.iterator().nextDoc());
} else {
DocIdSetIterator i1 = d1.iterator();
DocIdSetIterator i2 = d2.iterator();
for (int doc = i1.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = i1.nextDoc()) {
assertEquals(doc, i2.nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, i2.nextDoc());
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
if (filter == null) {
filter = getDocSet().getTopFilter();
}
DocIdSet readerSet = filter.getDocIdSet(context, null);
if (readerSet == null) {
return null;
}
DocIdSetIterator readerSetIterator = readerSet.iterator();
if (readerSetIterator == null) {
return null;
}
return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator);
}
private DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader, String segmentName, Directory directory)
throws IOException {
if (docIdSet == null) {
// this is better than returning null, as the nonnull result can be cached
return DocIdSet.EMPTY_DOCIDSET;
} else if (docIdSet.isCacheable()) {
return docIdSet;
} else {
final DocIdSetIterator it = docIdSet.iterator();
// null is allowed to be returned by iterator(),
// in this case we wrap with the empty set,
// which is cacheable.
if (it == null) {
return DocIdSet.EMPTY_DOCIDSET;
} else {
final IndexFileBitSet bits = new IndexFileBitSet(reader.maxDoc(), _id, segmentName, directory);
if (!bits.exists()) {
bits.create(it);
}
bits.load();
return bits;
}
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
if (remaining == 0) {
return null;
}
if (shortCircuitFilter != null) {
DocIdSet docIdSet = shortCircuitFilter.getDocIdSet(context, null);
if (!Lucene.isEmpty(docIdSet)) {
DocIdSetIterator iterator = docIdSet.iterator();
if (iterator != null) {
return ConstantScorer.create(iterator, this, queryWeight);
}
}
return null;
}
DocIdSet parentDocIdSet = this.parentFilter.getDocIdSet(context, null);
if (!Lucene.isEmpty(parentDocIdSet)) {
// We can't be sure of the fact that liveDocs have been applied, so we apply it here. The "remaining"
// count down (short circuit) logic will then work as expected.
parentDocIdSet = BitsFilteredDocIdSet.wrap(parentDocIdSet, context.reader().getLiveDocs());
DocIdSetIterator innerIterator = parentDocIdSet.iterator();
if (innerIterator != null) {
LongBitSet parentOrds = collector.parentOrds;
SortedDocValues globalValues = globalIfd.load(context).getOrdinalsValues(parentType);
if (globalValues != null) {
DocIdSetIterator parentIdIterator = new ParentOrdIterator(innerIterator, parentOrds, globalValues, this);
return ConstantScorer.create(parentIdIterator, this, queryWeight);
}
}
}
return null;
}
/**
* Drive the collection of reduction data. This includes overall data as well as faceted data.
*
* @param manager of the request to drive
* @param searcher the results of the query
* @param filter that represents the overall query
* @param queryRequest used for the search request
* @throws IOException if an error occurs while reading from Solr
*/
public static void drive(AnalyticsRequestManager manager, SolrIndexSearcher searcher, Filter filter, SolrQueryRequest queryRequest) throws IOException {
StreamingInfo streamingInfo = manager.getStreamingFacetInfo();
Iterable<StreamingFacet> streamingFacets = streamingInfo.streamingFacets;
ReductionCollectionManager collectionManager = streamingInfo.streamingCollectionManager;
Iterable<FacetValueQueryExecuter> facetExecuters = manager.getFacetExecuters(filter, queryRequest);
// Streaming phase (Overall results & Value/Pivot Facets)
// Loop through all documents and collect reduction data for streaming facets and overall results
if (collectionManager.needsCollection()) {
List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
for (int leafNum = 0; leafNum < contexts.size(); leafNum++) {
LeafReaderContext context = contexts.get(leafNum);
DocIdSet dis = filter.getDocIdSet(context, null); // solr docsets already exclude any deleted docs
if (dis == null) {
continue;
}
DocIdSetIterator disi = dis.iterator();
if (disi != null) {
collectionManager.doSetNextReader(context);
int doc = disi.nextDoc();
while( doc != DocIdSetIterator.NO_MORE_DOCS){
// Add a document to the statistics being generated
collectionManager.collect(doc);
streamingFacets.forEach( facet -> facet.addFacetValueCollectionTargets() );
collectionManager.apply();
doc = disi.nextDoc();
}
}
}
}
// Executing phase (Query/Range Facets)
// Send additional Solr Queries to compute facet values
for (FacetValueQueryExecuter executer : facetExecuters) {
executer.execute(searcher);
}
}
private Scorer scorer(DocIdSet set) throws IOException {
if (set == null) {
return null;
}
final DocIdSetIterator disi = set.iterator();
if (disi == null) {
return null;
}
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
@Override
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
Filter filter;
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
if (filter == null) {
DocSet set = getDocSet(searcher);
filter = set.getTopFilter();
}
// Although this set only includes live docs, other filters can be pushed down to queries.
DocIdSet readerSet = filter.getDocIdSet(context, null);
if (readerSet == null) {
return null;
}
DocIdSetIterator readerSetIterator = readerSet.iterator();
if (readerSetIterator == null) {
return null;
}
return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
};
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSet docIdSet = filter instanceof SolrFilter ? ((SolrFilter)filter).getDocIdSet(this.context, context, null) : filter.getDocIdSet(context, null);
if (docIdSet == null) {
return null;
}
DocIdSetIterator iterator = docIdSet.iterator();
if (iterator == null) {
return null;
}
return new ConstantScoreScorer(this, score(), scoreMode, iterator);
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
if (filter == null) {
resultSet = getDocSet();
filter = resultSet.getTopFilter();
}
DocIdSet readerSet = filter.getDocIdSet(context,context.reader().getLiveDocs());
// create a scrorer on the result set, if results from right query are empty, use empty iterator.
return new GraphScorer(this, readerSet == null ? DocIdSetIterator.empty() : readerSet.iterator(), 1);
}
public void doTestIteratorEqual(DocIdSet a, DocIdSet b) throws IOException {
DocIdSetIterator ia = a.iterator();
DocIdSetIterator ib = b.iterator();
// test for next() equivalence
for(;;) {
int da = ia.nextDoc();
int db = ib.nextDoc();
assertEquals(da, db);
assertEquals(ia.docID(), ib.docID());
if (da==DocIdSetIterator.NO_MORE_DOCS) break;
}
for (int i=0; i<10; i++) {
// test random skipTo() and next()
ia = a.iterator();
ib = b.iterator();
int doc = -1;
for (;;) {
int da,db;
if (rand.nextBoolean()) {
da = ia.nextDoc();
db = ib.nextDoc();
} else {
int target = doc + rand.nextInt(10) + 1; // keep in mind future edge cases like probing (increase if necessary)
da = ia.advance(target);
db = ib.advance(target);
}
assertEquals(da, db);
assertEquals(ia.docID(), ib.docID());
if (da==DocIdSetIterator.NO_MORE_DOCS) break;
doc = da;
}
}
}
@SuppressWarnings("unchecked")
private static boolean isFiltered(int notAdjustedDocId, IndexReader reader, Filter filter) throws IOException {
if (filter == null) {
return false;
}
if (reader instanceof BaseCompositeReader) {
BaseCompositeReader<IndexReader> indexReader = (BaseCompositeReader<IndexReader>) reader;
List<? extends IndexReader> sequentialSubReaders = BaseCompositeReaderUtil.getSequentialSubReaders(indexReader);
int readerIndex = BaseCompositeReaderUtil.readerIndex(indexReader, notAdjustedDocId);
int readerBase = BaseCompositeReaderUtil.readerBase(indexReader, readerIndex);
int docId = notAdjustedDocId - readerBase;
IndexReader orgReader = sequentialSubReaders.get(readerIndex);
SegmentReader sReader = AtomicReaderUtil.getSegmentReader(orgReader);
if (sReader != null) {
SegmentReader segmentReader = (SegmentReader) sReader;
DocIdSet docIdSet = filter.getDocIdSet(segmentReader.getContext(), segmentReader.getLiveDocs());
DocIdSetIterator iterator = docIdSet.iterator();
if (iterator == null) {
return true;
}
if (iterator.advance(docId) == docId) {
return false;
}
return true;
}
throw new RuntimeException("Reader has to be a SegmentReader [" + orgReader + "]");
} else {
throw new RuntimeException("Reader has to be a BaseCompositeReader [" + reader + "]");
}
}
private static OpenBitSet getMask(DocIdSet docIdSet, int primeDocRowId, int numberOfDocsInRow) throws IOException {
OpenBitSet mask = new OpenBitSet(numberOfDocsInRow);
DocIdSetIterator iterator = docIdSet.iterator();
if (iterator == null) {
return mask;
}
int docId = iterator.advance(primeDocRowId);
int end = numberOfDocsInRow + primeDocRowId;
while (docId < end) {
mask.set(docId - primeDocRowId);
docId = iterator.nextDoc();
}
return mask;
}
@Override
protected void collectDocs() throws IOException {
int domainSize = fcontext.base.size();
if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
return;
}
// TODO: refactor some of this logic into a base class
boolean countOnly = collectAcc==null && allBucketsAcc==null;
boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
// Are we expecting many hits per bucket?
// FUTURE: pro-rate for nTerms?
// FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields.
// FUTURE: take into account that bigger ord maps are more expensive than smaller ones
// One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
// than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each.
// this was for heap docvalues produced by UninvertingReader
// Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
long domainMultiplier = multiValuedField ? 4L : 2L;
boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests
// If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
// then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings.
// FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
// the docid is not used)
boolean canDoPerSeg = countOnly && fullRange;
boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic
final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
Filter filter = fcontext.base.getTopFilter();
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
setNextReaderFirstPhase(subCtx);
DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
DocIdSetIterator disi = dis.iterator();
SortedDocValues singleDv = null;
SortedSetDocValues multiDv = null;
if (multiValuedField) {
// TODO: get sub from multi?
multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
if (multiDv == null) {
multiDv = DocValues.emptySortedSet();
}
// some codecs may optimize SortedSet storage for single-valued fields
// this will be null if this is not a wrapped single valued docvalues.
if (unwrap_singleValued_multiDv) {
singleDv = DocValues.unwrapSingleton(multiDv);
}
} else {
singleDv = subCtx.reader().getSortedDocValues(sf.getName());
if (singleDv == null) {
singleDv = DocValues.emptySorted();
}
}
LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
if (singleDv != null) {
if (accumSeg) {
collectPerSeg(singleDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(singleDv, disi, toGlobal);
} else {
collectDocs(singleDv, disi, toGlobal);
}
}
} else {
if (accumSeg) {
collectPerSeg(multiDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(multiDv, disi, toGlobal);
} else {
collectDocs(multiDv, disi, toGlobal);
}
}
}
}
reuse = null; // better GC
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
if (filter == null) {
boolean debug = rb != null && rb.isDebug();
RTimer timer = (debug ? new RTimer() : null);
resultSet = getDocSet();
if (timer != null) timer.stop();
if (debug) {
SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<>();
dbg.add("time", (long) timer.getTime());
dbg.add("fromSetSize", fromSetSize); // the input
dbg.add("toSetSize", resultSet.size()); // the output
dbg.add("fromTermCount", fromTermCount);
dbg.add("fromTermTotalDf", fromTermTotalDf);
dbg.add("fromTermDirectCount", fromTermDirectCount);
dbg.add("fromTermHits", fromTermHits);
dbg.add("fromTermHitsTotalDf", fromTermHitsTotalDf);
dbg.add("toTermHits", toTermHits);
dbg.add("toTermHitsTotalDf", toTermHitsTotalDf);
dbg.add("toTermDirectCount", toTermDirectCount);
dbg.add("smallSetsDeferred", smallSetsDeferred);
dbg.add("toSetDocsAdded", resultListDocs);
// TODO: perhaps synchronize addDebug in the future...
rb.addDebug(dbg, "join", JoinQuery.this.toString());
}
filter = resultSet.getTopFilter();
}
// Although this set only includes live docs, other filters can be pushed down to queries.
DocIdSet readerSet = filter.getDocIdSet(context, null);
if (readerSet == null) {
return null;
}
DocIdSetIterator readerSetIterator = readerSet.iterator();
if (readerSetIterator == null) {
return null;
}
return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator);
}