下面列出了怎么用org.apache.lucene.index.SortedSetDocValues的API类实例代码及写法,或者点击链接到github查看源代码。
private Optional<DocValues> createSortedSetDocValues(int docid, String field, DocValuesType dvType)
throws IOException {
SortedSetDocValues ssvalues = IndexUtils.getSortedSetDocvalues(reader, field);
if (ssvalues.advanceExact(docid)) {
List<BytesRef> values = new ArrayList<>();
long ord;
while ((ord = ssvalues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
values.add(BytesRef.deepCopyOf(ssvalues.lookupOrd(ord)));
}
DocValues dv = DocValues.of(
dvType,
values,
Collections.emptyList()
);
return Optional.of(dv);
}
return Optional.empty();
}
/** Converts {@link SortedSetDocValues} into an {@code Iterable<BytesRef>} for all the values.
*
* @deprecated Consume {@link SortedSetDocValues} instead. */
@Deprecated
public static Iterable<BytesRef> valuesIterable(final SortedSetDocValues values) {
return new Iterable<BytesRef>() {
@Override
public Iterator<BytesRef> iterator() {
return new Iterator<BytesRef>() {
private long nextOrd;
@Override
public boolean hasNext() {
return nextOrd < values.getValueCount();
}
@Override
public BytesRef next() {
try {
return values.lookupOrd(nextOrd++);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
}
};
}
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException {
SortedSetDocValues sortedSet = DocValues.getSortedSet(readerContext.reader(), field);
SortedDocValues view = SortedSetSelector.wrap(sortedSet, selector);
return new DocTermsIndexDocValues(this, view) {
@Override
protected String toTerm(String readableValue) {
return readableValue;
}
@Override
public Object objectVal(int doc) throws IOException {
return strVal(doc);
}
};
}
@Override
public void collect(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
long ord;
while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
int termID = collectedTerms.add(docValues.lookupOrd(ord));
if (termID < 0) {
termID = -termID - 1;
} else {
if (termID >= scoreSums.length) {
scoreSums = ArrayUtil.grow(scoreSums);
scoreCounts = ArrayUtil.grow(scoreCounts);
}
}
scoreSums[termID] += scorer.score();
scoreCounts[termID]++;
}
}
}
private FieldComparator<?> getStringComparator(int numHits) {
return new FieldComparator.TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST) {
@Override
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) throws IOException {
SortedSetDocValues sortedSet = DocValues.getSortedSet(context.reader(), field);
final BlockJoinSelector.Type type = order
? BlockJoinSelector.Type.MAX
: BlockJoinSelector.Type.MIN;
final BitSet parents = parentFilter.getBitSet(context);
final BitSet children = childFilter.getBitSet(context);
if (children == null) {
return DocValues.emptySorted();
}
return BlockJoinSelector.wrap(sortedSet, type, parents, toIter(children));
}
};
}
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */
static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex);
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (si.advanceExact(doc)) {
// strange do-while to collect the missing count (first ord is NO_MORE_ORDS)
int term = (int) si.nextOrd();
do {
if (map != null) {
term = (int) ordMap.get(term);
}
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++;
} while ((term = (int) si.nextOrd()) >= 0);
} else if (startTermIndex == -1) {
counts[0]++; // missing count
}
}
}
private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
long low = startOrd;
long high = docValues.getValueCount()-1;
while (low <= high) {
long mid = (low + high) >>> 1;
final BytesRef term = docValues.lookupOrd(mid);
int cmp = term.compareTo(key);
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
return mid; // key found
}
}
return -(low + 1); // key not found.
}
private SortedSetDocValues validateAndFetchDocValues(SolrIndexSearcher solrSearcher, String fieldName, String querySide) throws IOException {
final IndexSchema schema = solrSearcher.getSchema();
final SchemaField field = schema.getFieldOrNull(fieldName);
if (field == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, querySide + " field '" + fieldName + "' does not exist");
}
if (!field.hasDocValues()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"'top-level' join queries require both 'from' and 'to' fields to have docValues, but " + querySide +
" field [" + fieldName + "] does not.");
}
final LeafReader leafReader = solrSearcher.getSlowAtomicReader();
if (field.multiValued()) {
return DocValues.getSortedSet(leafReader, fieldName);
}
return DocValues.singleton(DocValues.getSorted(leafReader, fieldName));
}
private BitsetBounds convertFromOrdinalsIntoToField(LongBitSet fromOrdBitSet, SortedSetDocValues fromDocValues,
LongBitSet toOrdBitSet, SortedSetDocValues toDocValues) throws IOException {
long fromOrdinal = 0;
long firstToOrd = BitsetBounds.NO_MATCHES;
long lastToOrd = 0;
while (fromOrdinal < fromOrdBitSet.length() && (fromOrdinal = fromOrdBitSet.nextSetBit(fromOrdinal)) >= 0) {
final BytesRef fromBytesRef = fromDocValues.lookupOrd(fromOrdinal);
final long toOrdinal = lookupTerm(toDocValues, fromBytesRef, lastToOrd);
if (toOrdinal >= 0) {
toOrdBitSet.set(toOrdinal);
if (firstToOrd == BitsetBounds.NO_MATCHES) firstToOrd = toOrdinal;
lastToOrd = toOrdinal;
}
fromOrdinal++;
}
return new BitsetBounds(firstToOrd, lastToOrd);
}
private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
long low = startOrd;
long high = docValues.getValueCount()-1;
while (low <= high) {
long mid = (low + high) >>> 1;
final BytesRef term = docValues.lookupOrd(mid);
int cmp = term.compareTo(key);
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
return mid; // key found
}
}
return -(low + 1); // key not found.
}
private void collectPerSeg(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
int segMax = (int)multiDv.getValueCount();
final int[] counts = getCountArr( segMax );
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (multiDv.advanceExact(doc)) {
for(;;) {
int segOrd = (int)multiDv.nextOrd();
if (segOrd < 0) break;
counts[segOrd]++;
}
}
}
for (int i=0; i<segMax; i++) {
int segCount = counts[i];
if (segCount > 0) {
int slot = toGlobal == null ? (i) : (int) toGlobal.get(i);
countAcc.incrementCount(slot, segCount);
}
}
}
@Override
public String value() {
try {
if (values.advanceExact(docId)) {
long ord = values.nextOrd();
if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
throw new GroupByOnArrayUnsupportedException(columnName);
}
BytesRef encoded = values.lookupOrd(ord);
return (String) DocValueFormat.IP.format(encoded);
} else {
return null;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
@Override
public void collectValues(int doc, int slotNum) throws IOException {
long newOrd = MISSING;
if (minmax == 1) {// min
newOrd = subDv.nextOrd();
} else { // max
long ord;
while ((ord = subDv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
newOrd = ord;
}
}
long currOrd = slotOrd[slotNum];
long finalOrd = toGlobal==null ? newOrd : toGlobal.get(newOrd);
if (currOrd == MISSING || Long.compare(finalOrd, currOrd) * minmax < 0) {
slotOrd[slotNum] = finalOrd;
}
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
SortedSetDocValues sortedSetDocValues = _sortedSetDocValuesCache.get(field.number);
if (sortedSetDocValues != null) {
return sortedSetDocValues;
}
synchronized (_sortedSetDocValuesCache) {
sortedSetDocValues = _sortedSetDocValuesCache.get(field.number);
if (sortedSetDocValues != null) {
return sortedSetDocValues;
}
sortedSetDocValues = newSortedSetDocValues(field);
if (_cache && sortedSetDocValues != null) {
_sortedSetDocValuesCache.put(field.number, sortedSetDocValues);
}
return sortedSetDocValues;
}
}
private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
// can be null for the segment if no docs actually had any SortedDocValues
// in this case FC.getDocTermsOrds returns EMPTY
if (actual == null) {
assertEquals(expected.getValueCount(), 0);
return;
}
assertEquals(expected.getValueCount(), actual.getValueCount());
while (true) {
int docID = expected.nextDoc();
assertEquals(docID, actual.nextDoc());
if (docID == NO_MORE_DOCS) {
break;
}
long expectedOrd;
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
assertEquals(expectedOrd, actual.nextOrd());
}
assertEquals(NO_MORE_ORDS, actual.nextOrd());
}
// compare ord dictionary
for (long i = 0; i < expected.getValueCount(); i++) {
final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i));
final BytesRef actualBytes = actual.lookupOrd(i);
assertEquals(expectedBytes, actualBytes);
}
// compare termsenum
assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}
public void testDocValues_resetIterator() throws Exception {
Document doc = new Document();
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
assertEquals(3, sortedSetDocValues.getValueCount());
for (int times = 0; times < 3; times++) {
assertTrue(sortedSetDocValues.advanceExact(0));
assertEquals(0L, sortedSetDocValues.nextOrd());
assertEquals(1L, sortedSetDocValues.nextOrd());
assertEquals(2L, sortedSetDocValues.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
}
SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
for (int times = 0; times < 3; times++) {
assertTrue(sortedNumericDocValues.advanceExact(0));
assertEquals(5, sortedNumericDocValues.docValueCount());
assertEquals(30L, sortedNumericDocValues.nextValue());
assertEquals(31L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(33L, sortedNumericDocValues.nextValue());
}
}
@Override
protected void doAccumulate(int count) throws IOException {
long ord;
while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef val = ssdv.lookupOrd(ord);
if (max == null || val.compareTo(max) > 0) {
max = copyFrom(val, max);
}
if (min == null || val.compareTo(min) < 0) {
min = copyFrom(val, min);
}
}
}
private void buildFieldMap( ResponseBuilder rb ) throws IOException {
Log.debug( "buildFieldMap" );
SolrIndexSearcher searcher = rb.req.getSearcher();
// build a synonym map from the SortedDocValues -
// for each field value: lower case, stemmed, lookup synonyms from synonyms.txt - map to fieldValue
SynonymMap.Builder fieldBuilder = new SynonymMap.Builder( true );
SynonymMap.Builder termBuilder = new SynonymMap.Builder( true );
ArrayList<String> searchFields = getStringFields( searcher );
for (String searchField : searchFields ) {
Log.debug( "adding searchField " + searchField );
CharsRef fieldChars = new CharsRef( searchField );
SortedSetDocValues sdv = FieldCache.DEFAULT.getDocTermOrds( searcher.getAtomicReader( ), searchField );
if (sdv == null) continue;
Log.debug( "got SortedSetDocValues for " + searchField );
TermsEnum te = sdv.termsEnum();
while (te.next() != null) {
BytesRef term = te.term();
String fieldValue = term.utf8ToString( );
addTerm ( fieldChars, fieldValue, fieldBuilder, termBuilder );
}
}
addDistributedTerms( rb, fieldBuilder, termBuilder, searchFields );
fieldMap = fieldBuilder.build( );
termMap = termBuilder.build( );
}
@Override
public void collect(int doc) throws IOException {
long ord;
if (doc > docValues.docID()) {
docValues.advance(doc);
}
if (doc == docValues.docID()) {
while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
final BytesRef term = docValues.lookupOrd(ord);
collectorTerms.add(term);
}
}
}
static GenericTermsCollector createCollectorMV(Function<SortedSetDocValues> mvFunction,
ScoreMode mode) {
switch (mode) {
case None:
return wrap(new TermsCollector.MV(mvFunction));
case Avg:
return new MV.Avg(mvFunction);
default:
return new MV(mvFunction, mode);
}
}
/** Wraps the provided {@link SortedSetDocValues} in order to only select
* one value per parent among its {@code children} using the configured
* {@code selection} type. */
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selection, BitSet parents, DocIdSetIterator children) {
SortedDocValues values;
switch (selection) {
case MIN:
values = SortedSetSelector.wrap(sortedSet, SortedSetSelector.Type.MIN);
break;
case MAX:
values = SortedSetSelector.wrap(sortedSet, SortedSetSelector.Type.MAX);
break;
default:
throw new AssertionError();
}
return wrap(values, selection, parents, children);
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
assert field.getDocValuesType() == DocValuesType.SORTED_SET;
SortedSetDocValues values = in.getSortedSet(field);
assert values != null;
return new AssertingLeafReader.AssertingSortedSetDocValues(values, maxDoc);
}
@Override
public void collect(int doc) throws IOException {
values.clear();
if (docValues.advanceExact(doc)) {
int term;
while ((term = (int)docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
values.add(docValues.lookupOrd(term).utf8ToString());
}
}
}
@Override
public void collect(int doc) throws IOException {
count = 0;
if (docValues.advanceExact(doc)) {
int term;
while ((term = (int)docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
if (count == values.length) {
resizeValues();
}
values[count++] = LegacyNumericUtils.prefixCodedToInt(docValues.lookupOrd(term));
}
}
}
@Override
public void collect(int doc) throws IOException {
count = 0;
if (docValues.advanceExact(doc)) {
int term;
while ((term = (int)docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
if (count == values.length) {
resizeValues();
}
values[count++] = NumericUtils.sortableIntToFloat(LegacyNumericUtils.prefixCodedToInt(docValues.lookupOrd(term)));
}
}
}
@Override
public void collect(int doc) throws IOException {
count = 0;
if (docValues.advanceExact(doc)) {
int term;
while ((term = (int)docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
if (count == values.length) {
resizeValues();
}
values[count++] = term == trueOrd;
}
}
}
@Override
public void collect(int doc) throws IOException {
count = 0;
if (docValues.advanceExact(doc)) {
int term;
while ((term = (int)docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
if (count == values.length) {
resizeValues();
}
values[count++] = NumericUtils.sortableLongToDouble(LegacyNumericUtils.prefixCodedToLong(docValues.lookupOrd(term)));
}
}
}
/** accumulates per-segment multi-valued facet counts */
static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) {
// no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic):
// collect separately per-segment, then map to global ords
accumMultiSeg(counts, si, disi, subIndex, map);
} else {
// otherwise: do collect+map on the fly
accumMultiGeneric(counts, startTermIndex, si, disi, subIndex, map);
}
}
/** "typical" multi-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */
static void accumMultiSeg(int counts[], SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
// First count in seg-ord space:
final int segCounts[];
if (map == null) {
segCounts = counts;
} else {
segCounts = new int[1+(int)si.getValueCount()];
}
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (si.advanceExact(doc)) {
int term = (int) si.nextOrd();
do {
segCounts[1+term]++;
} while ((term = (int)si.nextOrd()) >= 0);
} else {
counts[0]++; // missing
}
}
// migrate to global ords (if necessary)
if (map != null) {
migrateGlobal(counts, segCounts, subIndex, map);
}
}
/** Returns a SortedSetDocValues view of this instance */
public SortedSetDocValues iterator(LeafReader reader) throws IOException {
if (isEmpty()) {
return DocValues.emptySortedSet();
} else {
return new Iterator(reader);
}
}