下面列出了org.apache.lucene.index.DocValues#emptySorted ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private FieldComparator<?> getStringComparator(int numHits) {
return new FieldComparator.TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST) {
@Override
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) throws IOException {
SortedSetDocValues sortedSet = DocValues.getSortedSet(context.reader(), field);
final BlockJoinSelector.Type type = order
? BlockJoinSelector.Type.MAX
: BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptySorted();
}
return BlockJoinSelector.wrap(sortedSet, type, parents, toIter(children));
}
};
}
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
SortedDocValues valuesIn = reader.getSortedDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return valuesIn;
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return DocValues.emptySorted();
} else if (info.getDocValuesType() != DocValuesType.NONE) {
// we don't try to build a sorted instance from numeric/binary doc
// values because dedup can be very costly
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (info.getIndexOptions() == IndexOptions.NONE) {
return DocValues.emptySorted();
}
SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
return impl.iterator();
}
}
public static AtomicParentChildFieldData empty() {
return new AbstractAtomicParentChildFieldData() {
@Override
public long ramBytesUsed() {
return 0;
}
@Override
public Collection<Accountable> getChildResources() {
return Collections.emptyList();
}
@Override
public void close() {
}
@Override
public SortedDocValues getOrdinalsValues(String type) {
return DocValues.emptySorted();
}
@Override
public Set<String> types() {
return ImmutableSet.of();
}
};
}
@Override
public SortedDocValues getOrdinalsValues(String type) {
AtomicOrdinalsFieldData atomicFieldData = typeToIds.get(type);
if (atomicFieldData != null) {
return MultiValueMode.MIN.select(atomicFieldData.getOrdinalsValues());
} else {
return DocValues.emptySorted();
}
}
public static SortedDocValues getSortedDocValues(QueryContext context, SchemaField field, QParser qparser) throws IOException {
SortedDocValues si = context.searcher().getSlowAtomicReader().getSortedDocValues( field.getName() );
// if (!field.hasDocValues() && (field.getType() instanceof StrField || field.getType() instanceof TextField)) {
// }
return si == null ? DocValues.emptySorted() : si;
}
@Override
protected void collectDocs() throws IOException {
int domainSize = fcontext.base.size();
if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
return;
}
// TODO: refactor some of this logic into a base class
boolean countOnly = collectAcc==null && allBucketsAcc==null;
boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
// Are we expecting many hits per bucket?
// FUTURE: pro-rate for nTerms?
// FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields.
// FUTURE: take into account that bigger ord maps are more expensive than smaller ones
// One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
// than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each.
// this was for heap docvalues produced by UninvertingReader
// Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
long domainMultiplier = multiValuedField ? 4L : 2L;
boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests
// If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
// then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings.
// FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
// the docid is not used)
boolean canDoPerSeg = countOnly && fullRange;
boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic
final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
Filter filter = fcontext.base.getTopFilter();
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
setNextReaderFirstPhase(subCtx);
DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
DocIdSetIterator disi = dis.iterator();
SortedDocValues singleDv = null;
SortedSetDocValues multiDv = null;
if (multiValuedField) {
// TODO: get sub from multi?
multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
if (multiDv == null) {
multiDv = DocValues.emptySortedSet();
}
// some codecs may optimize SortedSet storage for single-valued fields
// this will be null if this is not a wrapped single valued docvalues.
if (unwrap_singleValued_multiDv) {
singleDv = DocValues.unwrapSingleton(multiDv);
}
} else {
singleDv = subCtx.reader().getSortedDocValues(sf.getName());
if (singleDv == null) {
singleDv = DocValues.emptySorted();
}
}
LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
if (singleDv != null) {
if (accumSeg) {
collectPerSeg(singleDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(singleDv, disi, toGlobal);
} else {
collectDocs(singleDv, disi, toGlobal);
}
}
} else {
if (accumSeg) {
collectPerSeg(multiDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(multiDv, disi, toGlobal);
} else {
collectDocs(multiDv, disi, toGlobal);
}
}
}
}
reuse = null; // better GC
}