org.apache.lucene.index.IndexReader#leaves ( )源码实例Demo

下面列出了org.apache.lucene.index.IndexReader#leaves ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: crate   文件: VersionsAndSeqNoResolver.java
/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term, boolean loadSeqNo) throws IOException {
    PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, term.field());
    List<LeafReaderContext> leaves = reader.leaves();
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        final LeafReaderContext leaf = leaves.get(i);
        PerThreadIDVersionAndSeqNoLookup lookup = lookups[leaf.ord];
        DocIdAndVersion result = lookup.lookupVersion(term.bytes(), loadSeqNo, leaf);
        if (result != null) {
            return result;
        }
    }
    return null;
}
 
源代码2 项目: incubator-retired-blur   文件: MutatableAction.java
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
  IndexReader indexReader = searcher.getIndexReader();
  BytesRef rowIdRef = new BytesRef(rowId);
  List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
  for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
    AtomicReader atomicReader = atomicReaderContext.reader();
    Fields fields = atomicReader.fields();
    if (fields == null) {
      continue;
    }
    Terms terms = fields.terms(BlurConstants.ROW_ID);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(rowIdRef, true)) {
      continue;
    }
    // need atomic read as well...
    possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
  }
  if (possibleRowIds.isEmpty()) {
    return null;
  }
  return new IterableRow(rowId, getRecords(possibleRowIds));
}
 
源代码3 项目: lucene-solr   文件: LongValueFacetCounts.java
private void countAllMultiValued(IndexReader reader, String field) throws IOException {

    for (LeafReaderContext context : reader.leaves()) {

      SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
      if (values == null) {
        // this field has no doc values for this segment
        continue;
      }
      NumericDocValues singleValues = DocValues.unwrapSingleton(values);
      if (singleValues != null) {
        countAllOneSegment(singleValues);
      } else {
        int doc;
        while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
          int limit = values.docValueCount();
          totCount += limit;
          for (int i = 0; i < limit; i++) {
            increment(values.nextValue());
          }
        }
      }
    }
  }
 
源代码4 项目: crate   文件: Engine.java
protected final DocsStats docsStats(IndexReader indexReader) {
    long numDocs = 0;
    long numDeletedDocs = 0;
    long sizeInBytes = 0;
    // we don't wait for a pending refreshes here since it's a stats call instead we mark it as accessed only which will cause
    // the next scheduled refresh to go through and refresh the stats as well
    for (LeafReaderContext readerContext : indexReader.leaves()) {
        // we go on the segment level here to get accurate numbers
        final SegmentReader segmentReader = Lucene.segmentReader(readerContext.reader());
        SegmentCommitInfo info = segmentReader.getSegmentInfo();
        numDocs += readerContext.reader().numDocs();
        numDeletedDocs += readerContext.reader().numDeletedDocs();
        try {
            sizeInBytes += info.sizeInBytes();
        } catch (IOException e) {
            logger.trace(() -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
        }
    }
    return new DocsStats(numDocs, numDeletedDocs, sizeInBytes);
}
 
源代码5 项目: lucene-solr   文件: NearestFuzzyQuery.java
private Query newTermQuery(IndexReader reader, Term term) throws IOException {
  // we build an artificial TermStates that will give an overall df and ttf
  // equal to 1
  TermStates termStates = new TermStates(reader.getContext());
  for (LeafReaderContext leafContext : reader.leaves()) {
    Terms terms = leafContext.reader().terms(term.field());
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator();
      if (termsEnum.seekExact(term.bytes())) {
        int freq = 1 - termStates.docFreq(); // we want the total df and ttf to be 1
        termStates.register(termsEnum.termState(), leafContext.ord, freq, freq);
      }
    }
  }
  return new TermQuery(term, termStates);
}
 
源代码6 项目: lucene-solr   文件: TestMergePolicyConfig.java
/**
 * Given an IndexReader, asserts that there is at least one AtomcReader leaf,
 * and that all LeafReader leaves are SegmentReader's that have a compound 
 * file status that matches the expected input.
 */
private static void assertCompoundSegments(IndexReader reader, 
                                           boolean compound) {

  assertNotNull("Null leaves", reader.leaves());
  assertTrue("no leaves", 0 < reader.leaves().size());

  for (LeafReaderContext atomic : reader.leaves()) {
    assertTrue("not a segment reader: " + atomic.reader().toString(), 
               atomic.reader() instanceof SegmentReader);
    
    assertEquals("Compound status incorrect for: " + 
                 atomic.reader().toString(),
                 compound,
                 ((SegmentReader)atomic.reader()).getSegmentInfo().info.getUseCompoundFile());
  }
}
 
源代码7 项目: rdf4j   文件: LuceneIndex.java
/**
 * Returns a Document representing the specified document ID (combination of resource and context), or null when no
 * such Document exists yet.
 */
private Document getDocument(Term idTerm) throws IOException {
	IndexReader reader = getIndexReader();
	List<LeafReaderContext> leaves = reader.leaves();
	int size = leaves.size();
	for (int i = 0; i < size; i++) {
		LeafReader lreader = leaves.get(i).reader();
		Document document = getDocument(lreader, idTerm);
		if (document != null) {
			return document;
		}
	}
	// no such Document
	return null;
}
 
源代码8 项目: rdf4j   文件: LuceneIndex.java
/**
 * Returns a list of Documents representing the specified Resource (empty when no such Document exists yet). Each
 * document represent a set of statements with the specified Resource as a subject, which are stored in a specific
 * context
 */
private List<Document> getDocuments(Term uriTerm) throws IOException {
	List<Document> result = new ArrayList<>();

	IndexReader reader = getIndexReader();
	List<LeafReaderContext> leaves = reader.leaves();
	int size = leaves.size();
	for (int i = 0; i < size; i++) {
		LeafReader lreader = leaves.get(i).reader();
		addDocuments(lreader, uriTerm, result);
	}

	return result;
}
 
源代码9 项目: lucene-solr   文件: CommonTermsQuery.java
@Override
public Query rewrite(IndexReader reader) throws IOException {
  if (this.terms.isEmpty()) {
    return new MatchNoDocsQuery("CommonTermsQuery with no terms");
  } else if (this.terms.size() == 1) {
    return newTermQuery(this.terms.get(0), null);
  }
  final List<LeafReaderContext> leaves = reader.leaves();
  final int maxDoc = reader.maxDoc();
  final TermStates[] contextArray = new TermStates[terms.size()];
  final Term[] queryTerms = this.terms.toArray(new Term[0]);
  collectTermStates(reader, leaves, contextArray, queryTerms);
  return buildQuery(maxDoc, contextArray, queryTerms);
}
 
源代码10 项目: lucene-solr   文件: LongValueFacetCounts.java
private void countAll(LongValuesSource valueSource, String field, IndexReader reader) throws IOException {

    for (LeafReaderContext context : reader.leaves()) {
      LongValues fv = valueSource.getValues(context, null);
      int maxDoc = context.reader().maxDoc();

      for (int doc = 0; doc < maxDoc; doc++) {
        // Skip missing docs:
        if (fv.advanceExact(doc)) {
          increment(fv.longValue());
          totCount++;
        }
      }
    }
  }
 
源代码11 项目: lucene-solr   文件: CheckJoinIndex.java
/**
 * Check that the given index is good to use for block joins.
 * @throws IllegalStateException if the index does not have an appropriate structure
 */
public static void check(IndexReader reader, BitSetProducer parentsFilter) throws IOException {
  for (LeafReaderContext context : reader.leaves()) {
    if (context.reader().maxDoc() == 0) {
      continue;
    }
    final BitSet parents = parentsFilter.getBitSet(context);
    if (parents == null || parents.cardinality() == 0) {
      throw new IllegalStateException("Every segment should have at least one parent, but " + context.reader() + " does not have any");
    }
    if (parents.get(context.reader().maxDoc() - 1) == false) {
      throw new IllegalStateException("The last document of a segment must always be a parent, but " + context.reader() + " has a child as a last doc");
    }
    final Bits liveDocs = context.reader().getLiveDocs();
    if (liveDocs != null) {
      int prevParentDoc = -1;
      DocIdSetIterator it = new BitSetIterator(parents, 0L);
      for (int parentDoc = it.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS; parentDoc = it.nextDoc()) {
        final boolean parentIsLive = liveDocs.get(parentDoc);
        for (int child = prevParentDoc + 1; child != parentDoc; child++) {
          final boolean childIsLive = liveDocs.get(child);
          if (parentIsLive != childIsLive) {
            if (childIsLive) {
              throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is live but has a deleted child document " + child);
            } else {
              throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is deleted but has a live child document " + child);
            }
          }
        }
        prevParentDoc = parentDoc;
      }
    }
  }
}
 
源代码12 项目: lucene-solr   文件: TestBlockJoin.java
private Document getParentDoc(IndexReader reader, BitSetProducer parents, int childDocID) throws IOException {
  final List<LeafReaderContext> leaves = reader.leaves();
  final int subIndex = ReaderUtil.subIndex(childDocID, leaves);
  final LeafReaderContext leaf = leaves.get(subIndex);
  final BitSet bits = parents.getBitSet(leaf);
  return leaf.reader().document(bits.nextSetBit(childDocID - leaf.docBase));
}
 
源代码13 项目: lucene-solr   文件: QueryIndex.java
QueryTermFilter(IndexReader reader) throws IOException {
  for (LeafReaderContext ctx : reader.leaves()) {
    for (FieldInfo fi : ctx.reader().getFieldInfos()) {
      BytesRefHash terms = termsHash.computeIfAbsent(fi.name, f -> new BytesRefHash());
      Terms t = ctx.reader().terms(fi.name);
      if (t != null) {
        TermsEnum te = t.iterator();
        BytesRef term;
        while ((term = te.next()) != null) {
          terms.add(term);
        }
      }
    }
  }
}
 
源代码14 项目: crate   文件: MultiPhrasePrefixQuery.java
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
 
源代码15 项目: Elasticsearch   文件: FilterableTermsEnum.java
public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }
    this.docsEnumFlag = docsEnumFlag;
    if (filter == null) {
        // Important - need to use the doc count that includes deleted docs
        // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
        numDocs = reader.maxDoc();
    }
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
            builder.or(docs);
            bits = builder.build().bits();

            // Count how many docs are in our filtered set
            // TODO make this lazy-loaded only for those that need it?
            numDocs += bits.cardinality();
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}
 
源代码16 项目: lucene-solr   文件: CompletionQuery.java
@Override
public Query rewrite(IndexReader reader) throws IOException {
  byte type = 0;
  boolean first = true;
  Terms terms;
  for (LeafReaderContext context : reader.leaves()) {
    LeafReader leafReader = context.reader();
    try {
      if ((terms = leafReader.terms(getField())) == null) {
        continue;
      }
    } catch (IOException e) {
      continue;
    }
    if (terms instanceof CompletionTerms) {
      CompletionTerms completionTerms = (CompletionTerms) terms;
      byte t = completionTerms.getType();
      if (first) {
        type = t;
        first = false;
      } else if (type != t) {
        throw new IllegalStateException(getField() + " has values of multiple types");
      }
    }
  }

  if (first == false) {
    if (this instanceof ContextQuery) {
      if (type == SuggestField.TYPE) {
        throw new IllegalStateException(this.getClass().getSimpleName()
            + " can not be executed against a non context-enabled SuggestField: "
            + getField());
      }
    } else {
      if (type == ContextSuggestField.TYPE) {
        return new ContextQuery(this);
      }
    }
  }
  return super.rewrite(reader);
}
 
源代码17 项目: lucene-solr   文件: SpellChecker.java
/**
 * Indexes the data from the given {@link Dictionary}.
 * @param dict Dictionary to index
 * @param config {@link IndexWriterConfig} to use
 * @param fullMerge whether or not the spellcheck index should be fully merged
 * @throws AlreadyClosedException if the Spellchecker is already closed
 * @throws IOException If there is a low-level I/O error.
 */
public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge) throws IOException {
  synchronized (modifyCurrentIndexLock) {
    ensureOpen();
    final Directory dir = this.spellIndex;
    final IndexWriter writer = new IndexWriter(dir, config);
    IndexSearcher indexSearcher = obtainSearcher();
    final List<TermsEnum> termsEnums = new ArrayList<>();

    final IndexReader reader = searcher.getIndexReader();
    if (reader.maxDoc() > 0) {
      for (final LeafReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(F_WORD);
        if (terms != null)
          termsEnums.add(terms.iterator());
      }
    }
    
    boolean isEmpty = termsEnums.isEmpty();

    try { 
      BytesRefIterator iter = dict.getEntryIterator();
      BytesRef currentTerm;
      
      terms: while ((currentTerm = iter.next()) != null) {

        String word = currentTerm.utf8ToString();
        int len = word.length();
        if (len < 3) {
          continue; // too short we bail but "too long" is fine...
        }

        if (!isEmpty) {
          for (TermsEnum te : termsEnums) {
            if (te.seekExact(currentTerm)) {
              continue terms;
            }
          }
        }

        // ok index the word
        Document doc = createDocument(word, getMin(len), getMax(len));
        writer.addDocument(doc);
      }
    } finally {
      releaseSearcher(indexSearcher);
    }
    if (fullMerge) {
      writer.forceMerge(1);
    }
    // close writer
    writer.close();
    // TODO: this isn't that great, maybe in the future SpellChecker should take
    // IWC in its ctor / keep its writer open?
    
    // also re-open the spell index to see our own changes when the next suggestion
    // is fetched:
    swapSearcher(dir);
  }
}
 
源代码18 项目: crate   文件: VersionsAndSeqNoResolver.java
private static PerThreadIDVersionAndSeqNoLookup[] getLookupState(IndexReader reader, String uidField) throws IOException {
    // We cache on the top level
    // This means cache entries have a shorter lifetime, maybe as low as 1s with the
    // default refresh interval and a steady indexing rate, but on the other hand it
    // proved to be cheaper than having to perform a CHM and a TL get for every segment.
    // See https://github.com/elastic/elasticsearch/pull/19856.
    IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
    CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> ctl = LOOKUP_STATES.get(cacheHelper.getKey());
    if (ctl == null) {
        // First time we are seeing this reader's core; make a new CTL:
        ctl = new CloseableThreadLocal<>();
        CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> other = LOOKUP_STATES.putIfAbsent(cacheHelper.getKey(), ctl);
        if (other == null) {
            // Our CTL won, we must remove it when the reader is closed:
            cacheHelper.addClosedListener(REMOVE_LOOKUP_STATE);
        } else {
            // Another thread beat us to it: just use their CTL:
            ctl = other;
        }
    }

    PerThreadIDVersionAndSeqNoLookup[] lookupState = ctl.get();
    if (lookupState == null) {
        lookupState = new PerThreadIDVersionAndSeqNoLookup[reader.leaves().size()];
        for (LeafReaderContext leaf : reader.leaves()) {
            lookupState[leaf.ord] = new PerThreadIDVersionAndSeqNoLookup(leaf.reader(), uidField);
        }
        ctl.set(lookupState);
    }

    if (lookupState.length != reader.leaves().size()) {
        throw new AssertionError("Mismatched numbers of leaves: " + lookupState.length + " != " + reader.leaves().size());
    }

    if (lookupState.length > 0 && Objects.equals(lookupState[0].uidField, uidField) == false) {
        throw new AssertionError("Index does not consistently use the same uid field: ["
                + uidField + "] != [" + lookupState[0].uidField + "]");
    }

    return lookupState;
}
 
源代码19 项目: lucene-solr   文件: TestUtil.java
/** This runs the CheckIndex tool on the Reader.  If any
 *  issues are hit, a RuntimeException is thrown */
public static void checkReader(IndexReader reader) throws IOException {
  for (LeafReaderContext context : reader.leaves()) {
    checkReader(context.reader(), true);
  }
}
 
源代码20 项目: lucene-solr   文件: IndexSizeEstimatorTest.java
@Test
public void testEstimator() throws Exception {
  JettySolrRunner jetty = cluster.getRandomJetty(random());
  String randomCoreName = jetty.getCoreContainer().getAllCoreNames().iterator().next();
  SolrCore core = jetty.getCoreContainer().getCore(randomCoreName);
  RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
  try {
    SolrIndexSearcher searcher = searcherRef.get();
    // limit the max length
    IndexSizeEstimator estimator = new IndexSizeEstimator(searcher.getRawReader(), 20, 50, true, true);
    IndexSizeEstimator.Estimate estimate = estimator.estimate();
    Map<String, Long> fieldsBySize = estimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", fieldsBySize.isEmpty());
    assertEquals(fieldsBySize.toString(), fields.size(), fieldsBySize.size());
    fieldsBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Long> typesBySize = estimate.getTypesBySize();
    assertFalse("empty typesBySize", typesBySize.isEmpty());
    assertTrue("expected at least 8 types: " + typesBySize.toString(), typesBySize.size() >= 8);
    typesBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Object> summary = estimate.getSummary();
    assertNotNull("summary", summary);
    assertFalse("empty summary", summary.isEmpty());
    assertEquals(summary.keySet().toString(), fields.size(), summary.keySet().size());
    Map<String, Object> details = estimate.getDetails();
    assertNotNull("details", details);
    assertFalse("empty details", details.isEmpty());
    // by type
    assertEquals(details.keySet().toString(), 6, details.keySet().size());

    // check sampling
    estimator.setSamplingThreshold(searcher.getRawReader().maxDoc() / 2);
    IndexSizeEstimator.Estimate sampledEstimate = estimator.estimate();
    Map<String, Long> sampledFieldsBySize = sampledEstimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", sampledFieldsBySize.isEmpty());
    // verify that the sampled values are within 50% of the original values
    fieldsBySize.forEach((field, size) -> {
      Long sampledSize = sampledFieldsBySize.get(field);
      assertNotNull("sampled size for " + field + " is missing in " + sampledFieldsBySize, sampledSize);
      double delta = (double) size * 0.5;
      assertEquals("sampled size of " + field + " is wildly off", (double)size, (double)sampledSize, delta);
    });
    // verify the reader is still usable - SOLR-13694
    IndexReader reader = searcher.getRawReader();
    for (LeafReaderContext context : reader.leaves()) {
      LeafReader leafReader = context.reader();
      assertTrue("unexpected LeafReader class: " + leafReader.getClass().getName(), leafReader instanceof CodecReader);
      Bits liveDocs = leafReader.getLiveDocs();
      CodecReader codecReader = (CodecReader) leafReader;
      StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
      StoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
      assertNotNull(storedFieldsReader);
      for (int docId = 0; docId < leafReader.maxDoc(); docId++) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        storedFieldsReader.visitDocument(docId, visitor);
      }
    }
  } finally {
    searcherRef.decref();
    core.close();
  }
}