org.apache.lucene.search.DocIdSet#iterator ( )源码实例Demo

下面列出了org.apache.lucene.search.DocIdSet#iterator ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: Elasticsearch   文件: ParentQuery.java
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
    DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, null);
    // we forcefully apply live docs here so that deleted children don't give matching parents
    childrenDocSet = BitsFilteredDocIdSet.wrap(childrenDocSet, context.reader().getLiveDocs());
    if (Lucene.isEmpty(childrenDocSet)) {
        return null;
    }
    final DocIdSetIterator childIterator = childrenDocSet.iterator();
    if (childIterator == null) {
        return null;
    }
    SortedDocValues bytesValues = globalIfd.load(context).getOrdinalsValues(parentType);
    if (bytesValues == null) {
        return null;
    }

    return new ChildScorer(this, parentIdxs, scores, childIterator, bytesValues);
}
 
源代码2 项目: Elasticsearch   文件: ParentConstantScoreQuery.java
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
    DocIdSet childrenDocIdSet = childrenFilter.getDocIdSet(context, null);
    if (Lucene.isEmpty(childrenDocIdSet)) {
        return null;
    }

    SortedDocValues globalValues = globalIfd.load(context).getOrdinalsValues(parentType);
    if (globalValues != null) {
        // we forcefully apply live docs here so that deleted children don't give matching parents
        childrenDocIdSet = BitsFilteredDocIdSet.wrap(childrenDocIdSet, context.reader().getLiveDocs());
        DocIdSetIterator innerIterator = childrenDocIdSet.iterator();
        if (innerIterator != null) {
            ChildrenDocIdIterator childrenDocIdIterator = new ChildrenDocIdIterator(
                    innerIterator, parentOrds, globalValues
            );
            return ConstantScorer.create(childrenDocIdIterator, this, queryWeight);
        }
    }
    return null;
}
 
源代码3 项目: lucene-solr   文件: AbstractPrefixTreeQuery.java
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  return new ConstantScoreWeight(this, boost) {
    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      DocIdSet docSet = getDocIdSet(context);
      if (docSet == null) {
        return null;
      }
      DocIdSetIterator disi = docSet.iterator();
      if (disi == null) {
        return null;
      }
      return new ConstantScoreScorer(this, score(), scoreMode, disi);
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return true;
    }
  };
}
 
源代码4 项目: lucene-solr   文件: BaseBitSetTestCase.java
private void testOr(float load) throws IOException {
  final int numBits = 1 + random().nextInt(100000);
  BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits); // empty
  T set2 = copyOf(set1, numBits);
  
  final int iterations = atLeast(10);
  for (int iter = 0; iter < iterations; ++iter) {
    DocIdSet otherSet = randomCopy(new JavaUtilBitSet(randomSet(numBits, load), numBits), numBits);
    DocIdSetIterator otherIterator = otherSet.iterator();
    if (otherIterator != null) {
      set1.or(otherIterator);
      set2.or(otherSet.iterator());
      assertEquals(set1, set2, numBits);
    }
  }
}
 
源代码5 项目: lucene-solr   文件: TestDocIdSetBuilder.java
private void assertEquals(DocIdSet d1, DocIdSet d2) throws IOException {
  if (d1 == null) {
    if (d2 != null) {
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, d2.iterator().nextDoc());
    }
  } else if (d2 == null) {
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, d1.iterator().nextDoc());
  } else {
    DocIdSetIterator i1 = d1.iterator();
    DocIdSetIterator i2 = d2.iterator();
    for (int doc = i1.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = i1.nextDoc()) {
      assertEquals(doc, i2.nextDoc());
    }
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, i2.nextDoc());
  }
}
 
源代码6 项目: lucene-solr   文件: CrossCollectionJoinQuery.java
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
  if (filter == null) {
    filter = getDocSet().getTopFilter();
  }

  DocIdSet readerSet = filter.getDocIdSet(context, null);
  if (readerSet == null) {
    return null;
  }
  DocIdSetIterator readerSetIterator = readerSet.iterator();
  if (readerSetIterator == null) {
    return null;
  }
  return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator);
}
 
源代码7 项目: incubator-retired-blur   文件: FilterCache.java
private DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader, String segmentName, Directory directory)
    throws IOException {
  if (docIdSet == null) {
    // this is better than returning null, as the nonnull result can be cached
    return DocIdSet.EMPTY_DOCIDSET;
  } else if (docIdSet.isCacheable()) {
    return docIdSet;
  } else {
    final DocIdSetIterator it = docIdSet.iterator();
    // null is allowed to be returned by iterator(),
    // in this case we wrap with the empty set,
    // which is cacheable.
    if (it == null) {
      return DocIdSet.EMPTY_DOCIDSET;
    } else {
      final IndexFileBitSet bits = new IndexFileBitSet(reader.maxDoc(), _id, segmentName, directory);
      if (!bits.exists()) {
        bits.create(it);
      }
      bits.load();
      return bits;
    }
  }
}
 
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
    if (remaining == 0) {
        return null;
    }

    if (shortCircuitFilter != null) {
        DocIdSet docIdSet = shortCircuitFilter.getDocIdSet(context, null);
        if (!Lucene.isEmpty(docIdSet)) {
            DocIdSetIterator iterator = docIdSet.iterator();
            if (iterator != null) {
                return ConstantScorer.create(iterator, this, queryWeight);
            }
        }
        return null;
    }

    DocIdSet parentDocIdSet = this.parentFilter.getDocIdSet(context, null);
    if (!Lucene.isEmpty(parentDocIdSet)) {
        // We can't be sure of the fact that liveDocs have been applied, so we apply it here. The "remaining"
        // count down (short circuit) logic will then work as expected.
        parentDocIdSet = BitsFilteredDocIdSet.wrap(parentDocIdSet, context.reader().getLiveDocs());
        DocIdSetIterator innerIterator = parentDocIdSet.iterator();
        if (innerIterator != null) {
            LongBitSet parentOrds = collector.parentOrds;
            SortedDocValues globalValues = globalIfd.load(context).getOrdinalsValues(parentType);
            if (globalValues != null) {
                DocIdSetIterator parentIdIterator = new ParentOrdIterator(innerIterator, parentOrds, globalValues, this);
                return ConstantScorer.create(parentIdIterator, this, queryWeight);
            }
        }
    }
    return null;
}
 
源代码9 项目: lucene-solr   文件: AnalyticsDriver.java
/**
 * Drive the collection of reduction data. This includes overall data as well as faceted data.
 *
 * @param manager of the request to drive
 * @param searcher the results of the query
 * @param filter that represents the overall query
 * @param queryRequest used for the search request
 * @throws IOException if an error occurs while reading from Solr
 */
public static void drive(AnalyticsRequestManager manager, SolrIndexSearcher searcher, Filter filter, SolrQueryRequest queryRequest) throws IOException {
  StreamingInfo streamingInfo = manager.getStreamingFacetInfo();
  Iterable<StreamingFacet> streamingFacets = streamingInfo.streamingFacets;
  ReductionCollectionManager collectionManager = streamingInfo.streamingCollectionManager;

  Iterable<FacetValueQueryExecuter> facetExecuters = manager.getFacetExecuters(filter, queryRequest);

  // Streaming phase (Overall results & Value/Pivot Facets)
  // Loop through all documents and collect reduction data for streaming facets and overall results
  if (collectionManager.needsCollection()) {
    List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
    for (int leafNum = 0; leafNum < contexts.size(); leafNum++) {
      LeafReaderContext context = contexts.get(leafNum);
      DocIdSet dis = filter.getDocIdSet(context, null); // solr docsets already exclude any deleted docs
      if (dis == null) {
        continue;
      }
      DocIdSetIterator disi = dis.iterator();
      if (disi != null) {
        collectionManager.doSetNextReader(context);
        int doc = disi.nextDoc();
        while( doc != DocIdSetIterator.NO_MORE_DOCS){
          // Add a document to the statistics being generated
          collectionManager.collect(doc);
          streamingFacets.forEach( facet -> facet.addFacetValueCollectionTargets() );
          collectionManager.apply();
          doc = disi.nextDoc();
        }
      }
    }
  }

  // Executing phase (Query/Range Facets)
  // Send additional Solr Queries to compute facet values
  for (FacetValueQueryExecuter executer : facetExecuters) {
    executer.execute(searcher);
  }
}
 
源代码10 项目: lucene-solr   文件: SolrRangeQuery.java
private Scorer scorer(DocIdSet set) throws IOException {
  if (set == null) {
    return null;
  }
  final DocIdSetIterator disi = set.iterator();
  if (disi == null) {
    return null;
  }
  return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
 
源代码11 项目: lucene-solr   文件: GraphTermsQParserPlugin.java
@Override
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  return new ConstantScoreWeight(this, boost) {
    Filter filter;

    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      if (filter == null) {
        DocSet set = getDocSet(searcher);
        filter = set.getTopFilter();
      }

      // Although this set only includes live docs, other filters can be pushed down to queries.
      DocIdSet readerSet = filter.getDocIdSet(context, null);
      if (readerSet == null) {
        return null;
      }
      DocIdSetIterator readerSetIterator = readerSet.iterator();
      if (readerSetIterator == null) {
        return null;
      }
      return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator);
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return true;
    }
  };
}
 
源代码12 项目: lucene-solr   文件: SolrConstantScoreQuery.java
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
  DocIdSet docIdSet = filter instanceof SolrFilter ? ((SolrFilter)filter).getDocIdSet(this.context, context, null) : filter.getDocIdSet(context, null);
  if (docIdSet == null) {
    return null;
  }
  DocIdSetIterator iterator = docIdSet.iterator();
  if (iterator == null) {
    return null;
  }
  return new ConstantScoreScorer(this, score(), scoreMode, iterator);
}
 
源代码13 项目: lucene-solr   文件: GraphQuery.java
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
  if (filter == null) {
    resultSet = getDocSet();
    filter = resultSet.getTopFilter();
  }
  DocIdSet readerSet = filter.getDocIdSet(context,context.reader().getLiveDocs());
  // create a scrorer on the result set, if results from right query are empty, use empty iterator.
  return new GraphScorer(this, readerSet == null ? DocIdSetIterator.empty() : readerSet.iterator(), 1);
}
 
源代码14 项目: lucene-solr   文件: TestDocSet.java
public void doTestIteratorEqual(DocIdSet a, DocIdSet b) throws IOException {
  DocIdSetIterator ia = a.iterator();
  DocIdSetIterator ib = b.iterator();

  // test for next() equivalence
  for(;;) {
    int da = ia.nextDoc();
    int db = ib.nextDoc();
    assertEquals(da, db);
    assertEquals(ia.docID(), ib.docID());
    if (da==DocIdSetIterator.NO_MORE_DOCS) break;
  }

  for (int i=0; i<10; i++) {
    // test random skipTo() and next()
    ia = a.iterator();
    ib = b.iterator();
    int doc = -1;
    for (;;) {
      int da,db;
      if (rand.nextBoolean()) {
        da = ia.nextDoc();
        db = ib.nextDoc();
      } else {
        int target = doc + rand.nextInt(10) + 1;  // keep in mind future edge cases like probing (increase if necessary)
        da = ia.advance(target);
        db = ib.advance(target);
      }

      assertEquals(da, db);
      assertEquals(ia.docID(), ib.docID());
      if (da==DocIdSetIterator.NO_MORE_DOCS) break;
      doc = da;
    }
  }
}
 
源代码15 项目: incubator-retired-blur   文件: IndexManager.java
@SuppressWarnings("unchecked")
private static boolean isFiltered(int notAdjustedDocId, IndexReader reader, Filter filter) throws IOException {
  if (filter == null) {
    return false;
  }
  if (reader instanceof BaseCompositeReader) {
    BaseCompositeReader<IndexReader> indexReader = (BaseCompositeReader<IndexReader>) reader;
    List<? extends IndexReader> sequentialSubReaders = BaseCompositeReaderUtil.getSequentialSubReaders(indexReader);
    int readerIndex = BaseCompositeReaderUtil.readerIndex(indexReader, notAdjustedDocId);
    int readerBase = BaseCompositeReaderUtil.readerBase(indexReader, readerIndex);
    int docId = notAdjustedDocId - readerBase;
    IndexReader orgReader = sequentialSubReaders.get(readerIndex);
    SegmentReader sReader = AtomicReaderUtil.getSegmentReader(orgReader);
    if (sReader != null) {
      SegmentReader segmentReader = (SegmentReader) sReader;
      DocIdSet docIdSet = filter.getDocIdSet(segmentReader.getContext(), segmentReader.getLiveDocs());
      DocIdSetIterator iterator = docIdSet.iterator();
      if (iterator == null) {
        return true;
      }
      if (iterator.advance(docId) == docId) {
        return false;
      }
      return true;
    }
    throw new RuntimeException("Reader has to be a SegmentReader [" + orgReader + "]");
  } else {
    throw new RuntimeException("Reader has to be a BaseCompositeReader [" + reader + "]");
  }
}
 
源代码16 项目: incubator-retired-blur   文件: BlurUtil.java
private static OpenBitSet getMask(DocIdSet docIdSet, int primeDocRowId, int numberOfDocsInRow) throws IOException {
  OpenBitSet mask = new OpenBitSet(numberOfDocsInRow);
  DocIdSetIterator iterator = docIdSet.iterator();
  if (iterator == null) {
    return mask;
  }
  int docId = iterator.advance(primeDocRowId);
  int end = numberOfDocsInRow + primeDocRowId;
  while (docId < end) {
    mask.set(docId - primeDocRowId);
    docId = iterator.nextDoc();
  }
  return mask;
}
 
源代码17 项目: lucene-solr   文件: FacetFieldProcessorByArrayDV.java
@Override
protected void collectDocs() throws IOException {
  int domainSize = fcontext.base.size();

  if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
    return;
  }

  // TODO: refactor some of this logic into a base class
  boolean countOnly = collectAcc==null && allBucketsAcc==null;
  boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();

  // Are we expecting many hits per bucket?
  // FUTURE: pro-rate for nTerms?
  // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
  // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
  // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
  // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
  // this was for heap docvalues produced by UninvertingReader
  // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
  long domainMultiplier = multiValuedField ? 4L : 2L;
  boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);  // +3 to increase test coverage with small tests

  // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
  // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
  // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
  // the docid is not used)
  boolean canDoPerSeg = countOnly && fullRange;
  boolean accumSeg = manyHitsPerBucket && canDoPerSeg;

  if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg;  // internal - override perSeg heuristic

  final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
  Filter filter = fcontext.base.getTopFilter();

  for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
    LeafReaderContext subCtx = leaves.get(subIdx);

    setNextReaderFirstPhase(subCtx);

    DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
    DocIdSetIterator disi = dis.iterator();

    SortedDocValues singleDv = null;
    SortedSetDocValues multiDv = null;
    if (multiValuedField) {
      // TODO: get sub from multi?
      multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
      if (multiDv == null) {
        multiDv = DocValues.emptySortedSet();
      }
      // some codecs may optimize SortedSet storage for single-valued fields
      // this will be null if this is not a wrapped single valued docvalues.
      if (unwrap_singleValued_multiDv) {
        singleDv = DocValues.unwrapSingleton(multiDv);
      }
    } else {
      singleDv = subCtx.reader().getSortedDocValues(sf.getName());
      if (singleDv == null) {
        singleDv = DocValues.emptySorted();
      }
    }

    LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);

    if (singleDv != null) {
      if (accumSeg) {
        collectPerSeg(singleDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(singleDv, disi, toGlobal);
        } else {
          collectDocs(singleDv, disi, toGlobal);
        }
      }
    } else {
      if (accumSeg) {
        collectPerSeg(multiDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(multiDv, disi, toGlobal);
        } else {
          collectDocs(multiDv, disi, toGlobal);
        }
      }
    }
  }

  reuse = null;  // better GC
}
 
源代码18 项目: lucene-solr   文件: JoinQuery.java
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
  if (filter == null) {
    boolean debug = rb != null && rb.isDebug();
    RTimer timer = (debug ? new RTimer() : null);
    resultSet = getDocSet();
    if (timer != null) timer.stop();

    if (debug) {
      SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<>();
      dbg.add("time", (long) timer.getTime());
      dbg.add("fromSetSize", fromSetSize);  // the input
      dbg.add("toSetSize", resultSet.size());    // the output

      dbg.add("fromTermCount", fromTermCount);
      dbg.add("fromTermTotalDf", fromTermTotalDf);
      dbg.add("fromTermDirectCount", fromTermDirectCount);
      dbg.add("fromTermHits", fromTermHits);
      dbg.add("fromTermHitsTotalDf", fromTermHitsTotalDf);
      dbg.add("toTermHits", toTermHits);
      dbg.add("toTermHitsTotalDf", toTermHitsTotalDf);
      dbg.add("toTermDirectCount", toTermDirectCount);
      dbg.add("smallSetsDeferred", smallSetsDeferred);
      dbg.add("toSetDocsAdded", resultListDocs);

      // TODO: perhaps synchronize  addDebug in the future...
      rb.addDebug(dbg, "join", JoinQuery.this.toString());
    }

    filter = resultSet.getTopFilter();
  }

  // Although this set only includes live docs, other filters can be pushed down to queries.
  DocIdSet readerSet = filter.getDocIdSet(context, null);
  if (readerSet == null) {
    return null;
  }
  DocIdSetIterator readerSetIterator = readerSet.iterator();
  if (readerSetIterator == null) {
    return null;
  }
  return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator);
}
 
 同类方法