下面列出了org.apache.lucene.index.IndexReader#leaves ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Load the internal doc ID and version for the uid from the reader, returning<ul>
* <li>null if the uid wasn't found,
* <li>a doc ID and a version otherwise
* </ul>
*/
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term, boolean loadSeqNo) throws IOException {
PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, term.field());
List<LeafReaderContext> leaves = reader.leaves();
// iterate backwards to optimize for the frequently updated documents
// which are likely to be in the last segments
for (int i = leaves.size() - 1; i >= 0; i--) {
final LeafReaderContext leaf = leaves.get(i);
PerThreadIDVersionAndSeqNoLookup lookup = lookups[leaf.ord];
DocIdAndVersion result = lookup.lookupVersion(term.bytes(), loadSeqNo, leaf);
if (result != null) {
return result;
}
}
return null;
}
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
IndexReader indexReader = searcher.getIndexReader();
BytesRef rowIdRef = new BytesRef(rowId);
List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
AtomicReader atomicReader = atomicReaderContext.reader();
Fields fields = atomicReader.fields();
if (fields == null) {
continue;
}
Terms terms = fields.terms(BlurConstants.ROW_ID);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(rowIdRef, true)) {
continue;
}
// need atomic read as well...
possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
}
if (possibleRowIds.isEmpty()) {
return null;
}
return new IterableRow(rowId, getRecords(possibleRowIds));
}
private void countAllMultiValued(IndexReader reader, String field) throws IOException {
for (LeafReaderContext context : reader.leaves()) {
SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
if (values == null) {
// this field has no doc values for this segment
continue;
}
NumericDocValues singleValues = DocValues.unwrapSingleton(values);
if (singleValues != null) {
countAllOneSegment(singleValues);
} else {
int doc;
while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
int limit = values.docValueCount();
totCount += limit;
for (int i = 0; i < limit; i++) {
increment(values.nextValue());
}
}
}
}
}
protected final DocsStats docsStats(IndexReader indexReader) {
long numDocs = 0;
long numDeletedDocs = 0;
long sizeInBytes = 0;
// we don't wait for a pending refreshes here since it's a stats call instead we mark it as accessed only which will cause
// the next scheduled refresh to go through and refresh the stats as well
for (LeafReaderContext readerContext : indexReader.leaves()) {
// we go on the segment level here to get accurate numbers
final SegmentReader segmentReader = Lucene.segmentReader(readerContext.reader());
SegmentCommitInfo info = segmentReader.getSegmentInfo();
numDocs += readerContext.reader().numDocs();
numDeletedDocs += readerContext.reader().numDeletedDocs();
try {
sizeInBytes += info.sizeInBytes();
} catch (IOException e) {
logger.trace(() -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
}
}
return new DocsStats(numDocs, numDeletedDocs, sizeInBytes);
}
private Query newTermQuery(IndexReader reader, Term term) throws IOException {
// we build an artificial TermStates that will give an overall df and ttf
// equal to 1
TermStates termStates = new TermStates(reader.getContext());
for (LeafReaderContext leafContext : reader.leaves()) {
Terms terms = leafContext.reader().terms(term.field());
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(term.bytes())) {
int freq = 1 - termStates.docFreq(); // we want the total df and ttf to be 1
termStates.register(termsEnum.termState(), leafContext.ord, freq, freq);
}
}
}
return new TermQuery(term, termStates);
}
/**
* Given an IndexReader, asserts that there is at least one AtomcReader leaf,
* and that all LeafReader leaves are SegmentReader's that have a compound
* file status that matches the expected input.
*/
private static void assertCompoundSegments(IndexReader reader,
boolean compound) {
assertNotNull("Null leaves", reader.leaves());
assertTrue("no leaves", 0 < reader.leaves().size());
for (LeafReaderContext atomic : reader.leaves()) {
assertTrue("not a segment reader: " + atomic.reader().toString(),
atomic.reader() instanceof SegmentReader);
assertEquals("Compound status incorrect for: " +
atomic.reader().toString(),
compound,
((SegmentReader)atomic.reader()).getSegmentInfo().info.getUseCompoundFile());
}
}
/**
* Returns a Document representing the specified document ID (combination of resource and context), or null when no
* such Document exists yet.
*/
private Document getDocument(Term idTerm) throws IOException {
IndexReader reader = getIndexReader();
List<LeafReaderContext> leaves = reader.leaves();
int size = leaves.size();
for (int i = 0; i < size; i++) {
LeafReader lreader = leaves.get(i).reader();
Document document = getDocument(lreader, idTerm);
if (document != null) {
return document;
}
}
// no such Document
return null;
}
/**
* Returns a list of Documents representing the specified Resource (empty when no such Document exists yet). Each
* document represent a set of statements with the specified Resource as a subject, which are stored in a specific
* context
*/
private List<Document> getDocuments(Term uriTerm) throws IOException {
List<Document> result = new ArrayList<>();
IndexReader reader = getIndexReader();
List<LeafReaderContext> leaves = reader.leaves();
int size = leaves.size();
for (int i = 0; i < size; i++) {
LeafReader lreader = leaves.get(i).reader();
addDocuments(lreader, uriTerm, result);
}
return result;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
if (this.terms.isEmpty()) {
return new MatchNoDocsQuery("CommonTermsQuery with no terms");
} else if (this.terms.size() == 1) {
return newTermQuery(this.terms.get(0), null);
}
final List<LeafReaderContext> leaves = reader.leaves();
final int maxDoc = reader.maxDoc();
final TermStates[] contextArray = new TermStates[terms.size()];
final Term[] queryTerms = this.terms.toArray(new Term[0]);
collectTermStates(reader, leaves, contextArray, queryTerms);
return buildQuery(maxDoc, contextArray, queryTerms);
}
private void countAll(LongValuesSource valueSource, String field, IndexReader reader) throws IOException {
for (LeafReaderContext context : reader.leaves()) {
LongValues fv = valueSource.getValues(context, null);
int maxDoc = context.reader().maxDoc();
for (int doc = 0; doc < maxDoc; doc++) {
// Skip missing docs:
if (fv.advanceExact(doc)) {
increment(fv.longValue());
totCount++;
}
}
}
}
/**
* Check that the given index is good to use for block joins.
* @throws IllegalStateException if the index does not have an appropriate structure
*/
public static void check(IndexReader reader, BitSetProducer parentsFilter) throws IOException {
for (LeafReaderContext context : reader.leaves()) {
if (context.reader().maxDoc() == 0) {
continue;
}
final BitSet parents = parentsFilter.getBitSet(context);
if (parents == null || parents.cardinality() == 0) {
throw new IllegalStateException("Every segment should have at least one parent, but " + context.reader() + " does not have any");
}
if (parents.get(context.reader().maxDoc() - 1) == false) {
throw new IllegalStateException("The last document of a segment must always be a parent, but " + context.reader() + " has a child as a last doc");
}
final Bits liveDocs = context.reader().getLiveDocs();
if (liveDocs != null) {
int prevParentDoc = -1;
DocIdSetIterator it = new BitSetIterator(parents, 0L);
for (int parentDoc = it.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS; parentDoc = it.nextDoc()) {
final boolean parentIsLive = liveDocs.get(parentDoc);
for (int child = prevParentDoc + 1; child != parentDoc; child++) {
final boolean childIsLive = liveDocs.get(child);
if (parentIsLive != childIsLive) {
if (childIsLive) {
throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is live but has a deleted child document " + child);
} else {
throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is deleted but has a live child document " + child);
}
}
}
prevParentDoc = parentDoc;
}
}
}
}
private Document getParentDoc(IndexReader reader, BitSetProducer parents, int childDocID) throws IOException {
final List<LeafReaderContext> leaves = reader.leaves();
final int subIndex = ReaderUtil.subIndex(childDocID, leaves);
final LeafReaderContext leaf = leaves.get(subIndex);
final BitSet bits = parents.getBitSet(leaf);
return leaf.reader().document(bits.nextSetBit(childDocID - leaf.docBase));
}
QueryTermFilter(IndexReader reader) throws IOException {
for (LeafReaderContext ctx : reader.leaves()) {
for (FieldInfo fi : ctx.reader().getFieldInfos()) {
BytesRefHash terms = termsHash.computeIfAbsent(fi.name, f -> new BytesRefHash());
Terms t = ctx.reader().terms(fi.name);
if (t != null) {
TermsEnum te = t.iterator();
BytesRef term;
while ((term = te.next()) != null) {
terms.add(term);
}
}
}
}
}
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
// SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
// instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
List<LeafReaderContext> leaves = reader.leaves();
for (LeafReaderContext leaf : leaves) {
Terms _terms = leaf.reader().terms(field);
if (_terms == null) {
continue;
}
TermsEnum termsEnum = _terms.iterator();
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
if (TermsEnum.SeekStatus.END == seekStatus) {
continue;
}
for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
if (!StringHelper.startsWith(term, prefix.bytes())) {
break;
}
terms.add(new Term(field, BytesRef.deepCopyOf(term)));
if (terms.size() >= maxExpansions) {
return;
}
}
}
}
public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException {
if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
}
this.docsEnumFlag = docsEnumFlag;
if (filter == null) {
// Important - need to use the doc count that includes deleted docs
// or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
numDocs = reader.maxDoc();
}
List<LeafReaderContext> leaves = reader.leaves();
List<Holder> enums = new ArrayList<>(leaves.size());
final Weight weight;
if (filter == null) {
weight = null;
} else {
final IndexSearcher searcher = new IndexSearcher(reader);
searcher.setQueryCache(null);
weight = searcher.createNormalizedWeight(filter, false);
}
for (LeafReaderContext context : leaves) {
Terms terms = context.reader().terms(field);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator();
if (termsEnum == null) {
continue;
}
BitSet bits = null;
if (weight != null) {
Scorer scorer = weight.scorer(context);
if (scorer == null) {
// fully filtered, none matching, no need to iterate on this
continue;
}
DocIdSetIterator docs = scorer.iterator();
// we want to force apply deleted docs
final Bits liveDocs = context.reader().getLiveDocs();
if (liveDocs != null) {
docs = new FilteredDocIdSetIterator(docs) {
@Override
protected boolean match(int doc) {
return liveDocs.get(doc);
}
};
}
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
builder.or(docs);
bits = builder.build().bits();
// Count how many docs are in our filtered set
// TODO make this lazy-loaded only for those that need it?
numDocs += bits.cardinality();
}
enums.add(new Holder(termsEnum, bits));
}
this.enums = enums.toArray(new Holder[enums.size()]);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
byte type = 0;
boolean first = true;
Terms terms;
for (LeafReaderContext context : reader.leaves()) {
LeafReader leafReader = context.reader();
try {
if ((terms = leafReader.terms(getField())) == null) {
continue;
}
} catch (IOException e) {
continue;
}
if (terms instanceof CompletionTerms) {
CompletionTerms completionTerms = (CompletionTerms) terms;
byte t = completionTerms.getType();
if (first) {
type = t;
first = false;
} else if (type != t) {
throw new IllegalStateException(getField() + " has values of multiple types");
}
}
}
if (first == false) {
if (this instanceof ContextQuery) {
if (type == SuggestField.TYPE) {
throw new IllegalStateException(this.getClass().getSimpleName()
+ " can not be executed against a non context-enabled SuggestField: "
+ getField());
}
} else {
if (type == ContextSuggestField.TYPE) {
return new ContextQuery(this);
}
}
}
return super.rewrite(reader);
}
/**
* Indexes the data from the given {@link Dictionary}.
* @param dict Dictionary to index
* @param config {@link IndexWriterConfig} to use
* @param fullMerge whether or not the spellcheck index should be fully merged
* @throws AlreadyClosedException if the Spellchecker is already closed
* @throws IOException If there is a low-level I/O error.
*/
public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge) throws IOException {
synchronized (modifyCurrentIndexLock) {
ensureOpen();
final Directory dir = this.spellIndex;
final IndexWriter writer = new IndexWriter(dir, config);
IndexSearcher indexSearcher = obtainSearcher();
final List<TermsEnum> termsEnums = new ArrayList<>();
final IndexReader reader = searcher.getIndexReader();
if (reader.maxDoc() > 0) {
for (final LeafReaderContext ctx : reader.leaves()) {
Terms terms = ctx.reader().terms(F_WORD);
if (terms != null)
termsEnums.add(terms.iterator());
}
}
boolean isEmpty = termsEnums.isEmpty();
try {
BytesRefIterator iter = dict.getEntryIterator();
BytesRef currentTerm;
terms: while ((currentTerm = iter.next()) != null) {
String word = currentTerm.utf8ToString();
int len = word.length();
if (len < 3) {
continue; // too short we bail but "too long" is fine...
}
if (!isEmpty) {
for (TermsEnum te : termsEnums) {
if (te.seekExact(currentTerm)) {
continue terms;
}
}
}
// ok index the word
Document doc = createDocument(word, getMin(len), getMax(len));
writer.addDocument(doc);
}
} finally {
releaseSearcher(indexSearcher);
}
if (fullMerge) {
writer.forceMerge(1);
}
// close writer
writer.close();
// TODO: this isn't that great, maybe in the future SpellChecker should take
// IWC in its ctor / keep its writer open?
// also re-open the spell index to see our own changes when the next suggestion
// is fetched:
swapSearcher(dir);
}
}
private static PerThreadIDVersionAndSeqNoLookup[] getLookupState(IndexReader reader, String uidField) throws IOException {
// We cache on the top level
// This means cache entries have a shorter lifetime, maybe as low as 1s with the
// default refresh interval and a steady indexing rate, but on the other hand it
// proved to be cheaper than having to perform a CHM and a TL get for every segment.
// See https://github.com/elastic/elasticsearch/pull/19856.
IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> ctl = LOOKUP_STATES.get(cacheHelper.getKey());
if (ctl == null) {
// First time we are seeing this reader's core; make a new CTL:
ctl = new CloseableThreadLocal<>();
CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> other = LOOKUP_STATES.putIfAbsent(cacheHelper.getKey(), ctl);
if (other == null) {
// Our CTL won, we must remove it when the reader is closed:
cacheHelper.addClosedListener(REMOVE_LOOKUP_STATE);
} else {
// Another thread beat us to it: just use their CTL:
ctl = other;
}
}
PerThreadIDVersionAndSeqNoLookup[] lookupState = ctl.get();
if (lookupState == null) {
lookupState = new PerThreadIDVersionAndSeqNoLookup[reader.leaves().size()];
for (LeafReaderContext leaf : reader.leaves()) {
lookupState[leaf.ord] = new PerThreadIDVersionAndSeqNoLookup(leaf.reader(), uidField);
}
ctl.set(lookupState);
}
if (lookupState.length != reader.leaves().size()) {
throw new AssertionError("Mismatched numbers of leaves: " + lookupState.length + " != " + reader.leaves().size());
}
if (lookupState.length > 0 && Objects.equals(lookupState[0].uidField, uidField) == false) {
throw new AssertionError("Index does not consistently use the same uid field: ["
+ uidField + "] != [" + lookupState[0].uidField + "]");
}
return lookupState;
}
/** This runs the CheckIndex tool on the Reader. If any
* issues are hit, a RuntimeException is thrown */
public static void checkReader(IndexReader reader) throws IOException {
for (LeafReaderContext context : reader.leaves()) {
checkReader(context.reader(), true);
}
}
@Test
public void testEstimator() throws Exception {
JettySolrRunner jetty = cluster.getRandomJetty(random());
String randomCoreName = jetty.getCoreContainer().getAllCoreNames().iterator().next();
SolrCore core = jetty.getCoreContainer().getCore(randomCoreName);
RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
try {
SolrIndexSearcher searcher = searcherRef.get();
// limit the max length
IndexSizeEstimator estimator = new IndexSizeEstimator(searcher.getRawReader(), 20, 50, true, true);
IndexSizeEstimator.Estimate estimate = estimator.estimate();
Map<String, Long> fieldsBySize = estimate.getFieldsBySize();
assertFalse("empty fieldsBySize", fieldsBySize.isEmpty());
assertEquals(fieldsBySize.toString(), fields.size(), fieldsBySize.size());
fieldsBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
Map<String, Long> typesBySize = estimate.getTypesBySize();
assertFalse("empty typesBySize", typesBySize.isEmpty());
assertTrue("expected at least 8 types: " + typesBySize.toString(), typesBySize.size() >= 8);
typesBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
Map<String, Object> summary = estimate.getSummary();
assertNotNull("summary", summary);
assertFalse("empty summary", summary.isEmpty());
assertEquals(summary.keySet().toString(), fields.size(), summary.keySet().size());
Map<String, Object> details = estimate.getDetails();
assertNotNull("details", details);
assertFalse("empty details", details.isEmpty());
// by type
assertEquals(details.keySet().toString(), 6, details.keySet().size());
// check sampling
estimator.setSamplingThreshold(searcher.getRawReader().maxDoc() / 2);
IndexSizeEstimator.Estimate sampledEstimate = estimator.estimate();
Map<String, Long> sampledFieldsBySize = sampledEstimate.getFieldsBySize();
assertFalse("empty fieldsBySize", sampledFieldsBySize.isEmpty());
// verify that the sampled values are within 50% of the original values
fieldsBySize.forEach((field, size) -> {
Long sampledSize = sampledFieldsBySize.get(field);
assertNotNull("sampled size for " + field + " is missing in " + sampledFieldsBySize, sampledSize);
double delta = (double) size * 0.5;
assertEquals("sampled size of " + field + " is wildly off", (double)size, (double)sampledSize, delta);
});
// verify the reader is still usable - SOLR-13694
IndexReader reader = searcher.getRawReader();
for (LeafReaderContext context : reader.leaves()) {
LeafReader leafReader = context.reader();
assertTrue("unexpected LeafReader class: " + leafReader.getClass().getName(), leafReader instanceof CodecReader);
Bits liveDocs = leafReader.getLiveDocs();
CodecReader codecReader = (CodecReader) leafReader;
StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
StoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
assertNotNull(storedFieldsReader);
for (int docId = 0; docId < leafReader.maxDoc(); docId++) {
if (liveDocs != null && !liveDocs.get(docId)) {
continue;
}
storedFieldsReader.visitDocument(docId, visitor);
}
}
} finally {
searcherRef.decref();
core.close();
}
}