org.apache.lucene.search.SimpleCollector#org.apache.lucene.util.Bits源码实例Demo

下面列出了org.apache.lucene.search.SimpleCollector#org.apache.lucene.util.Bits 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: lucene-solr   文件: Tagger.java
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream,
              TagClusterReducer tagClusterReducer, boolean skipAltTokens,
              boolean ignoreStopWords) throws IOException {
  this.terms = terms;
  this.liveDocs = liveDocs;
  this.tokenStream = tokenStream;
  this.skipAltTokens = skipAltTokens;
  this.ignoreStopWords = ignoreStopWords;
  byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
  posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
  offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
  taggingAtt = tokenStream.addAttribute(TaggingAttribute.class);
  tokenStream.reset();

  this.tagClusterReducer = tagClusterReducer;
}
 
源代码2 项目: Elasticsearch   文件: MissingAggregator.java
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
        final LeafBucketCollector sub) throws IOException {

    final Bits docsWithValue;
    if (valuesSource != null) {
        docsWithValue = valuesSource.docsWithValue(ctx);
    } else {
        docsWithValue = new Bits.MatchNoBits(ctx.reader().maxDoc());
    }
    return new LeafBucketCollectorBase(sub, docsWithValue) {
        @Override
        public void collect(int doc, long bucket) throws IOException {
            if (docsWithValue != null && !docsWithValue.get(doc)) {
                collectBucket(sub, doc, bucket);
            }
        }
    };
}
 
源代码3 项目: lucene-solr   文件: Weight.java
/** Specialized method to bulk-score all hits; we
 *  separate this from {@link #scoreRange} to help out
 *  hotspot.
 *  See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a> */
static void scoreAll(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, Bits acceptDocs) throws IOException {
  if (twoPhase == null) {
    for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
      if (acceptDocs == null || acceptDocs.get(doc)) {
        collector.collect(doc);
      }
    }
  } else {
    // The scorer has an approximation, so run the approximation first, then check acceptDocs, then confirm
    final DocIdSetIterator approximation = twoPhase.approximation();
    for (int doc = approximation.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = approximation.nextDoc()) {
      if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
        collector.collect(doc);
      }
    }
  }
}
 
源代码4 项目: SolrTextTagger   文件: Tagger.java
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream,
              TagClusterReducer tagClusterReducer, boolean skipAltTokens,
              boolean ignoreStopWords) throws IOException {
  this.terms = terms;
  this.liveDocs = liveDocs;
  this.tokenStream = tokenStream;
  this.skipAltTokens = skipAltTokens;
  this.ignoreStopWords = ignoreStopWords;
  byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
  posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
  offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
  taggingAtt = tokenStream.addAttribute(TaggingAttribute.class);
  tokenStream.reset();

  this.tagClusterReducer = tagClusterReducer;
}
 
源代码5 项目: lucene-solr   文件: TestTransactionRollback.java
private void checkExpecteds(BitSet expecteds) throws Exception {
  IndexReader r = DirectoryReader.open(dir);

  //Perhaps not the most efficient approach but meets our
  //needs here.
  final Bits liveDocs = MultiBits.getLiveDocs(r);
  for (int i = 0; i < r.maxDoc(); i++) {
    if (liveDocs == null || liveDocs.get(i)) {
      String sval=r.document(i).get(FIELD_RECORD_ID);
      if(sval!=null) {
        int val=Integer.parseInt(sval);
        assertTrue("Did not expect document #"+val, expecteds.get(val));
        expecteds.set(val,false);
      }
    }
  }
  r.close();
  assertEquals("Should have 0 docs remaining ", 0 ,expecteds.cardinality());
}
 
/**
 * Check the behaviour of compareBottom when docsWithField is null (this happens when all documents contain the
 * field).
 */
@Test
public void testCompareBottom_nullDocsWithField()
{
    // Set docsWithField to null to simulate all documents containing the field.
    Bits oldValue = textSortFieldComparator.docsWithField;
    textSortFieldComparator.docsWithField = null;

    // Set up the document to have an empty term.
    when(mockDocTerms.get(DOC)).thenReturn(new BytesRef());

    // Call the method under test.
    textSortFieldComparator.compareBottom(DOC);

    // Expect the EMPTY_TERM to be compared
    verify(mockCollator).compare(BOTTOM_STRING, "");

    // Reset docsWithField with the mock after the test.
    textSortFieldComparator.docsWithField = oldValue;
}
 
源代码7 项目: lucene-solr   文件: ContainsPrefixTreeQuery.java
/** Get prefix & leaf docs at this cell. */
private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException {
  assert indexedCell.compareToNoLeaf(cell) == 0;
  //called when we've reached detailLevel.
  if (indexedCell.isLeaf()) {//only a leaf
    SmallDocSet result = collectDocs(acceptContains);
    nextTerm();
    return result;
  } else {
    SmallDocSet docsAtPrefix = collectDocs(acceptContains);
    if (!nextTerm()) {
      return docsAtPrefix;
    }
    //collect leaf too
    if (indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0) {
      SmallDocSet docsAtLeaf = collectDocs(acceptContains);
      nextTerm();
      return union(docsAtPrefix, docsAtLeaf);
    } else {
      return docsAtPrefix;
    }
  }
}
 
源代码8 项目: lucene-solr   文件: DocSetUtil.java
private static DocSet createSmallSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxPossible, int firstReader) throws IOException {
  int[] docs = new int[maxPossible];
  int sz = 0;
  for (int i = firstReader; i < postList.length; i++) {
    PostingsEnum postings = postList[i];
    if (postings == null) continue;
    LeafReaderContext ctx = leaves.get(i);
    Bits liveDocs = ctx.reader().getLiveDocs();
    int base = ctx.docBase;
    for (; ; ) {
      int subId = postings.nextDoc();
      if (subId == DocIdSetIterator.NO_MORE_DOCS) break;
      if (liveDocs != null && !liveDocs.get(subId)) continue;
      int globalId = subId + base;
      docs[sz++] = globalId;
    }
  }

  return new SortedIntDocSet(docs, sz);
}
 
public static DocIdSet getFullySetDocIdSet(int maxDoc) {
  Bits bits = getFullySetBits(maxDoc);
  return new DocIdSet() {
    @Override
    public DocIdSetIterator iterator() throws IOException {
      return getFullySetDocIdSetIterator(maxDoc);
    }

    @Override
    public Bits bits() throws IOException {
      return bits;
    }

    @Override
    public boolean isCacheable() {
      return true;
    }
  };
}
 
/**
 * Creates a new {@link SoftDeletesRetentionMergePolicy}
 * @param field the soft deletes field
 * @param retentionQuerySupplier a query supplier for the retention query
 * @param in the wrapped MergePolicy
 */
public SoftDeletesRetentionMergePolicy(String field, Supplier<Query> retentionQuerySupplier, MergePolicy in) {
  super(in, toWrap -> new MergePolicy.OneMerge(toWrap.segments) {
    @Override
    public CodecReader wrapForMerge(CodecReader reader) throws IOException {
      CodecReader wrapped = toWrap.wrapForMerge(reader);
      Bits liveDocs = reader.getLiveDocs();
      if (liveDocs == null) { // no deletes - just keep going
        return wrapped;
      }
      return applyRetentionQuery(field, retentionQuerySupplier.get(), wrapped);
    }
  });
  Objects.requireNonNull(field, "field must not be null");
  Objects.requireNonNull(retentionQuerySupplier, "retentionQuerySupplier must not be null");
  this.field = field;
  this.retentionQuerySupplier = retentionQuerySupplier;
}
 
源代码11 项目: lucene-solr   文件: TestStressIndexing2.java
private static void printDocs(DirectoryReader r) throws Throwable {
  for(LeafReaderContext ctx : r.leaves()) {
    // TODO: improve this
    LeafReader sub = ctx.reader();
    Bits liveDocs = sub.getLiveDocs();
    System.out.println("  " + ((SegmentReader) sub).getSegmentInfo());
    for(int docID=0;docID<sub.maxDoc();docID++) {
      Document doc = sub.document(docID);
      if (liveDocs == null || liveDocs.get(docID)) {
        System.out.println("    docID=" + docID + " id:" + doc.get("id"));
      } else {
        System.out.println("    DEL docID=" + docID + " id:" + doc.get("id"));
      }
    }
  }
}
 
源代码12 项目: lucene-solr   文件: MultiBits.java
/** Returns a single {@link Bits} instance for this
 *  reader, merging live Documents on the
 *  fly.  This method will return null if the reader
 *  has no deletions.
 *
 *  <p><b>NOTE</b>: this is a very slow way to access live docs.
 *  For example, each Bits access will require a binary search.
 *  It's better to get the sub-readers and iterate through them
 *  yourself. */
public static Bits getLiveDocs(IndexReader reader) {
  if (reader.hasDeletions()) {
    final List<LeafReaderContext> leaves = reader.leaves();
    final int size = leaves.size();
    assert size > 0 : "A reader with deletions must have at least one leave";
    if (size == 1) {
      return leaves.get(0).reader().getLiveDocs();
    }
    final Bits[] liveDocs = new Bits[size];
    final int[] starts = new int[size + 1];
    for (int i = 0; i < size; i++) {
      // record all liveDocs, even if they are null
      final LeafReaderContext ctx = leaves.get(i);
      liveDocs[i] = ctx.reader().getLiveDocs();
      starts[i] = ctx.docBase;
    }
    starts[size] = reader.maxDoc();
    return new MultiBits(liveDocs, starts, true);
  } else {
    return null;
  }
}
 
源代码13 项目: lucene-solr   文件: TestPrefixCompletionQuery.java
@Override
public Bits getBits(final LeafReaderContext context) throws IOException {
  final int maxDoc = context.reader().maxDoc();
  FixedBitSet bits = new FixedBitSet(maxDoc);
  final SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field);
  int docID;
  while ((docID = values.nextDoc()) != NO_MORE_DOCS) {
    final int count = values.docValueCount();
    for (int i = 0; i < count; ++i) {
      final long v = values.nextValue();
      if (v >= min && v <= max) {
        bits.set(docID);
        break;
      }
    }
  }
  return bits;
}
 
源代码14 项目: Elasticsearch   文件: CrateDocCollector.java
@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
    // TODO: figure out if min/max can be used to optimize this and still work correctly with pause/resume
    // and also check if twoPhaseIterator can be used
    collector.setScorer(scorer);
    for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
        if (acceptDocs == null || acceptDocs.get(doc)) {
            collector.collect(doc);
        }
    }
    return DocIdSetIterator.NO_MORE_DOCS;
}
 
源代码15 项目: lucene-solr   文件: AssertingBulkScorer.java
@Override
public void score(LeafCollector collector, Bits acceptDocs) throws IOException {
  assert max == 0;
  collector = new AssertingLeafCollector(collector, 0, PostingsEnum.NO_MORE_DOCS);
  if (random.nextBoolean()) {
    try {
      final int next = score(collector, acceptDocs, 0, PostingsEnum.NO_MORE_DOCS);
      assert next == DocIdSetIterator.NO_MORE_DOCS;
    } catch (UnsupportedOperationException e) {
      in.score(collector, acceptDocs);
    }
  } else {
    in.score(collector, acceptDocs);
  }
}
 
源代码16 项目: Elasticsearch   文件: ValuesSource.java
@Override
public Bits docsWithValue(LeafReaderContext context) {
    final MultiGeoPointValues geoPoints = geoPointValues(context);
    if (org.elasticsearch.index.fielddata.FieldData.unwrapSingleton(geoPoints) != null) {
        return org.elasticsearch.index.fielddata.FieldData.unwrapSingletonBits(geoPoints);
    } else {
        return org.elasticsearch.index.fielddata.FieldData.docsWithValue(geoPoints, context.reader().maxDoc());
    }
}
 
源代码17 项目: lucene-solr   文件: TestBooleanOr.java
private static BulkScorer scorer(int... matches) {
  return new BulkScorer() {
    final ScoreAndDoc scorer = new ScoreAndDoc();
    int i = 0;
    @Override
    public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
      collector.setScorer(scorer);
      while (i < matches.length && matches[i] < min) {
        i += 1;
      }
      while (i < matches.length && matches[i] < max) {
        scorer.doc = matches[i];
        if (acceptDocs == null || acceptDocs.get(scorer.doc)) {
          collector.collect(scorer.doc);
        }
        i += 1;
      }
      if (i == matches.length) {
        return DocIdSetIterator.NO_MORE_DOCS;
      }
      return RandomNumbers.randomIntBetween(random(), max, matches[i]);
    }
    @Override
    public long cost() {
      return matches.length;
    }
  };
}
 
源代码18 项目: incubator-retired-blur   文件: FacetQuery.java
private Scorer[] getScorers(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer,
    Bits acceptDocs) throws IOException {
  Scorer[] scorers = new Scorer[_facets.length];
  for (int i = 0; i < scorers.length; i++) {
    scorers[i] = _facets[i].scorer(context, scoreDocsInOrder, topScorer, acceptDocs);
  }
  return scorers;
}
 
源代码19 项目: Elasticsearch   文件: BinaryDVAtomicFieldData.java
@Override
public SortedBinaryDocValues getBytesValues() {
    try {
        final BinaryDocValues values = DocValues.getBinary(reader, field);
        final Bits docsWithField = DocValues.getDocsWithField(reader, field);
        return FieldData.singleton(values, docsWithField);
    } catch (IOException e) {
        throw new IllegalStateException("Cannot load doc values", e);
    }
}
 
源代码20 项目: lucene-solr   文件: DocumentsWriterPerThread.java
private FixedBitSet sortLiveDocs(Bits liveDocs, Sorter.DocMap sortMap) {
  assert liveDocs != null && sortMap != null;
  FixedBitSet sortedLiveDocs = new FixedBitSet(liveDocs.length());
  sortedLiveDocs.set(0, liveDocs.length());
  for (int i = 0; i < liveDocs.length(); i++) {
    if (liveDocs.get(i) == false) {
      sortedLiveDocs.clear(sortMap.oldToNew(i));
    }
  }
  return sortedLiveDocs;
}
 
源代码21 项目: incubator-retired-blur   文件: BlurUtil.java
private static OpenBitSet getDocsToFetch(AtomicReader atomicReader, Selector selector, int primeDocRowId,
    int numberOfDocsInRow, Bits liveDocs, Filter filter, AtomicInteger totalRecords) throws IOException {
  Set<String> alreadyProcessed = new HashSet<String>();
  OpenBitSet bits = new OpenBitSet(numberOfDocsInRow);
  OpenBitSet mask = null;
  if (filter != null) {
    DocIdSet docIdSet = filter.getDocIdSet(atomicReader.getContext(), liveDocs);
    mask = getMask(docIdSet, primeDocRowId, numberOfDocsInRow);
  }
  Set<String> columnFamiliesToFetch = selector.getColumnFamiliesToFetch();
  boolean fetchAll = true;
  if (columnFamiliesToFetch != null) {
    fetchAll = false;
    applyFamilies(alreadyProcessed, bits, columnFamiliesToFetch, atomicReader, primeDocRowId, numberOfDocsInRow,
        liveDocs);
  }
  Map<String, Set<String>> columnsToFetch = selector.getColumnsToFetch();
  if (columnsToFetch != null) {
    fetchAll = false;
    applyColumns(alreadyProcessed, bits, columnsToFetch, atomicReader, primeDocRowId, numberOfDocsInRow, liveDocs);
  }
  if (fetchAll) {
    bits.set(0, numberOfDocsInRow);
  }
  if (mask != null) {
    bits.intersect(mask);
  }
  totalRecords.set((int) bits.cardinality());
  return bits;
}
 
源代码22 项目: Elasticsearch   文件: FilteredCollector.java
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
    final Scorer filterScorer = filter.scorer(context);
    final LeafCollector in = collector.getLeafCollector(context);
    final Bits bits = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorer);

    return new FilterLeafCollector(in) {
        @Override
        public void collect(int doc) throws IOException {
            if (bits.get(doc)) {
                in.collect(doc);
            }
        }
    };
}
 
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException
{
    docTerms = DocValues.getBinary(context.reader(), field);
    docsWithField = DocValues.getDocsWithField(context.reader(), field);
    if (docsWithField instanceof Bits.MatchAllBits) {
      docsWithField = null;
    }
    return this;
}
 
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException
{
    docTerms = DocValues.getBinary(context.reader(), field);
    docsWithField = DocValues.getDocsWithField(context.reader(), field);
    if (docsWithField instanceof Bits.MatchAllBits)
    {
        docsWithField = null;
    }
    return this;
}
 
@Test
public void testQueryFilterWrap1() throws IOException {
  IndexReader r = getIndexReader();
  AccessControlFactory accessControlFactory = new FilterAccessControlFactory();
  Collection<String> readAuthorizations = new ArrayList<String>();
  Collection<String> discoverAuthorizations = new ArrayList<String>();
  Set<String> discoverableFields = new HashSet<String>(Arrays.asList("rowid"));
  BlurSecureIndexSearcher blurSecureIndexSearcher = new BlurSecureIndexSearcher(r, null, accessControlFactory,
      readAuthorizations, discoverAuthorizations, discoverableFields, null);
  Query wrapFilter;
  Query query = new TermQuery(new Term("a", "b"));
  Filter filter = new Filter() {
    @Override
    public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
      throw new RuntimeException("Not implemented.");
    }
  };
  {
    Term primeDocTerm = new Term(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE);
    ScoreType scoreType = ScoreType.SUPER;
    SuperQuery superQuery = new SuperQuery(query, scoreType, primeDocTerm);
    wrapFilter = blurSecureIndexSearcher.wrapFilter(superQuery, filter);
    System.out.println(wrapFilter);
  }
  {
    assertTrue(wrapFilter instanceof SuperQuery);
    SuperQuery sq = (SuperQuery) wrapFilter;
    Query inner = sq.getQuery();
    assertTrue(inner instanceof FilteredQuery);
    FilteredQuery filteredQuery = (FilteredQuery) inner;
    Query innerFilteredQuery = filteredQuery.getQuery();
    assertEquals(innerFilteredQuery, query);
    assertTrue(filteredQuery.getFilter() == filter);
  }
}
 
public static Bits getFullyEmptyBits(int maxDoc) {
  return new Bits() {
    @Override
    public boolean get(int index) {
      return false;
    }

    @Override
    public int length() {
      return maxDoc;
    }
  };
}
 
public static Bits getMatchAll(final int length) {
  return new Bits() {

    @Override
    public int length() {
      return length;
    }

    @Override
    public boolean get(int index) {
      return true;
    }
  };
}
 
源代码28 项目: lucene-solr   文件: ContainsPrefixTreeQuery.java
/** This is the primary algorithm; recursive.  Returns null if finds none. */
private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException {

  if (thisTerm == null)//signals all done
    return null;

  // Get the AND of all child results (into combinedSubResults)
  SmallDocSet combinedSubResults = null;
  //   Optimization: use null subCellsFilter when we know cell is within the query shape.
  Shape subCellsFilter = queryShape;
  if (cell.getLevel() != 0 && ((cell.getShapeRel() == null || cell.getShapeRel() == SpatialRelation.WITHIN))) {
    subCellsFilter = null;
    assert cell.getShape().relate(queryShape) == SpatialRelation.WITHIN;
  }
  CellIterator subCells = cell.getNextLevelCells(subCellsFilter);
  while (subCells.hasNext()) {
    Cell subCell = subCells.next();
    if (!seek(subCell)) {
      combinedSubResults = null;
    } else if (subCell.getLevel() == detailLevel) {
      combinedSubResults = getDocs(subCell, acceptContains);
    } else if (!multiOverlappingIndexedShapes &&
        subCell.getShapeRel() == SpatialRelation.WITHIN) {
      combinedSubResults = getLeafDocs(subCell, acceptContains);
    } else {
      //OR the leaf docs with all child results
      SmallDocSet leafDocs = getLeafDocs(subCell, acceptContains);
      SmallDocSet subDocs = visit(subCell, acceptContains); //recursion
      combinedSubResults = union(leafDocs, subDocs);
    }

    if (combinedSubResults == null)
      break;
    acceptContains = combinedSubResults;//has the 'AND' effect on next iteration
  }

  return combinedSubResults;
}
 
源代码29 项目: lucene-solr   文件: Lucene50LiveDocsFormat.java
@Override
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
  long gen = info.getDelGen();
  String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
  final int length = info.info.maxDoc();
  try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) {
    Throwable priorE = null;
    try {
      CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT, 
                                   info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
      long data[] = new long[FixedBitSet.bits2words(length)];
      for (int i = 0; i < data.length; i++) {
        data[i] = input.readLong();
      }
      FixedBitSet fbs = new FixedBitSet(data, length);
      if (fbs.length() - fbs.cardinality() != info.getDelCount()) {
        throw new CorruptIndexException("bits.deleted=" + (fbs.length() - fbs.cardinality()) + 
                                        " info.delcount=" + info.getDelCount(), input);
      }
      return fbs.asReadOnlyBits();
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(input, priorE);
    }
  }
  throw new AssertionError();
}
 
源代码30 项目: lucene-solr   文件: FacetHeatmap.java
private Bits getTopAcceptDocs(DocSet docSet, SolrIndexSearcher searcher) throws IOException {
  if (docSet.size() == searcher.numDocs()) {
    return null; // means match everything (all live docs). This can speedup things a lot.
  } else if (docSet.size() == 0) {
    return new Bits.MatchNoBits(searcher.maxDoc()); // can speedup things a lot
  } else {
    return docSet.getBits();
  }
}