下面列出了org.apache.lucene.index.DocValues#unwrapSingleton ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private void countAllMultiValued(IndexReader reader, String field) throws IOException {
for (LeafReaderContext context : reader.leaves()) {
SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
if (values == null) {
// this field has no doc values for this segment
continue;
}
NumericDocValues singleValues = DocValues.unwrapSingleton(values);
if (singleValues != null) {
countAllOneSegment(singleValues);
} else {
int doc;
while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
int limit = values.docValueCount();
totCount += limit;
for (int i = 0; i < limit; i++) {
increment(values.nextValue());
}
}
}
}
}
/** Wraps a multi-valued SortedSetDocValues as a single-valued view, using the specified selector */
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selector) {
if (sortedSet.getValueCount() >= Integer.MAX_VALUE) {
throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported");
}
SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet);
if (singleton != null) {
// it's actually single-valued in practice, but indexed as multi-valued,
// so just sort on the underlying single-valued dv directly.
// regardless of selector type, this optimization is safe!
return singleton;
} else {
switch(selector) {
case MIN: return new MinValue(sortedSet);
case MAX: return new MaxValue(sortedSet);
case MIDDLE_MIN: return new MiddleMinValue(sortedSet);
case MIDDLE_MAX: return new MiddleMaxValue(sortedSet);
default:
throw new AssertionError();
}
}
}
@Override
public SortedNumericDoubleValues getDoubleValues() {
try {
SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field);
NumericDocValues single = DocValues.unwrapSingleton(raw);
if (single != null) {
return FieldData.singleton(new SingleFloatValues(single), DocValues.unwrapSingletonBits(raw));
} else {
return new MultiFloatValues(raw);
}
} catch (IOException e) {
throw new IllegalStateException("Cannot load doc values", e);
}
}
@Override
public MultiGeoPointValues getGeoPointValues() {
final RandomAccessOrds ords = ordinals.ordinals();
final SortedDocValues singleOrds = DocValues.unwrapSingleton(ords);
final GeoPoint point = new GeoPoint(Double.NaN, Double.NaN);
if (singleOrds != null) {
final GeoPointValues values = new GeoPointValues() {
@Override
public GeoPoint get(int docID) {
final int ord = singleOrds.getOrd(docID);
if (ord >= 0) {
return point.resetFromIndexHash(indexedPoints.get(ord));
}
return point.reset(Double.NaN, Double.NaN);
}
};
return FieldData.singleton(values, DocValues.docsWithValue(singleOrds, maxDoc));
}
return new MultiGeoPointValues() {
@Override
public GeoPoint valueAt(int index) {
return point.resetFromIndexHash(indexedPoints.get(ords.ordAt(index)));
}
@Override
public void setDocument(int docId) {
ords.setDocument(docId);
}
@Override
public int count() {
return ords.cardinality();
}
};
}
/** Counts directly from SortedNumericDocValues. */
private void countMultiValued(String field, List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) {
SortedNumericDocValues values = hits.context.reader().getSortedNumericDocValues(field);
if (values == null) {
// this field has no doc values for this segment
continue;
}
NumericDocValues singleValues = DocValues.unwrapSingleton(values);
if (singleValues != null) {
countOneSegment(singleValues, hits);
} else {
DocIdSetIterator it = ConjunctionDISI.intersectIterators(
Arrays.asList(hits.bits.iterator(), values));
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
int limit = values.docValueCount();
totCount += limit;
for (int i = 0; i < limit; i++) {
increment(values.nextValue());
}
}
}
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
Weight fallbackWeight = fallbackQuery.createWeight(searcher, scoreMode, boost);
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
SortedNumericDocValues sortedNumericValues = DocValues.getSortedNumeric(context.reader(), field);
NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);
if (numericValues != null) {
Sort indexSort = context.reader().getMetaData().getSort();
if (indexSort != null
&& indexSort.getSort().length > 0
&& indexSort.getSort()[0].getField().equals(field)) {
SortField sortField = indexSort.getSort()[0];
DocIdSetIterator disi = getDocIdSetIterator(sortField, context, numericValues);
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
}
return fallbackWeight.scorer(context);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
// Both queries should always return the same values, so we can just check
// if the fallback query is cacheable.
return fallbackWeight.isCacheable(ctx);
}
};
}
/**
* Wraps a multi-valued SortedNumericDocValues as a single-valued view, using the specified selector
* and numericType.
*/
public static NumericDocValues wrap(SortedNumericDocValues sortedNumeric, Type selector, SortField.Type numericType) {
if (numericType != SortField.Type.INT &&
numericType != SortField.Type.LONG &&
numericType != SortField.Type.FLOAT &&
numericType != SortField.Type.DOUBLE) {
throw new IllegalArgumentException("numericType must be a numeric type");
}
final NumericDocValues view;
NumericDocValues singleton = DocValues.unwrapSingleton(sortedNumeric);
if (singleton != null) {
// it's actually single-valued in practice, but indexed as multi-valued,
// so just sort on the underlying single-valued dv directly.
// regardless of selector type, this optimization is safe!
view = singleton;
} else {
switch(selector) {
case MIN:
view = new MinValue(sortedNumeric);
break;
case MAX:
view = new MaxValue(sortedNumeric);
break;
default:
throw new AssertionError();
}
}
// undo the numericutils sortability
switch(numericType) {
case FLOAT:
return new FilterNumericDocValues(view) {
@Override
public long longValue() throws IOException {
return NumericUtils.sortableFloatBits((int) in.longValue());
}
};
case DOUBLE:
return new FilterNumericDocValues(view) {
@Override
public long longValue() throws IOException {
return NumericUtils.sortableDoubleBits(in.longValue());
}
};
default:
return view;
}
}
@Override
protected void collectDocs() throws IOException {
int domainSize = fcontext.base.size();
if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
return;
}
// TODO: refactor some of this logic into a base class
boolean countOnly = collectAcc==null && allBucketsAcc==null;
boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
// Are we expecting many hits per bucket?
// FUTURE: pro-rate for nTerms?
// FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields.
// FUTURE: take into account that bigger ord maps are more expensive than smaller ones
// One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
// than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each.
// this was for heap docvalues produced by UninvertingReader
// Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
long domainMultiplier = multiValuedField ? 4L : 2L;
boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests
// If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
// then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings.
// FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
// the docid is not used)
boolean canDoPerSeg = countOnly && fullRange;
boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic
final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
Filter filter = fcontext.base.getTopFilter();
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
setNextReaderFirstPhase(subCtx);
DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
DocIdSetIterator disi = dis.iterator();
SortedDocValues singleDv = null;
SortedSetDocValues multiDv = null;
if (multiValuedField) {
// TODO: get sub from multi?
multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
if (multiDv == null) {
multiDv = DocValues.emptySortedSet();
}
// some codecs may optimize SortedSet storage for single-valued fields
// this will be null if this is not a wrapped single valued docvalues.
if (unwrap_singleValued_multiDv) {
singleDv = DocValues.unwrapSingleton(multiDv);
}
} else {
singleDv = subCtx.reader().getSortedDocValues(sf.getName());
if (singleDv == null) {
singleDv = DocValues.emptySorted();
}
}
LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
if (singleDv != null) {
if (accumSeg) {
collectPerSeg(singleDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(singleDv, disi, toGlobal);
} else {
collectDocs(singleDv, disi, toGlobal);
}
}
} else {
if (accumSeg) {
collectPerSeg(multiDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(multiDv, disi, toGlobal);
} else {
collectDocs(multiDv, disi, toGlobal);
}
}
}
}
reuse = null; // better GC
}