下面列出了org.apache.lucene.index.LeafReader#getSortedDocValues ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
SortedDocValues valuesIn = reader.getSortedDocValues(field);
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return valuesIn;
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return DocValues.emptySorted();
} else if (info.getDocValuesType() != DocValuesType.NONE) {
// we don't try to build a sorted instance from numeric/binary doc
// values because dedup can be very costly
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (info.getIndexOptions() == IndexOptions.NONE) {
return DocValues.emptySorted();
}
SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
return impl.iterator();
}
}
public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
if (valuesIn == null) {
valuesIn = reader.getSortedDocValues(field);
}
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return valuesIn;
}
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return DocValues.emptyBinary();
} else if (info.getDocValuesType() != DocValuesType.NONE) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (info.getIndexOptions() == IndexOptions.NONE) {
return DocValues.emptyBinary();
}
BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
return impl.iterator();
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext lrc) throws IOException {
LeafReader reader = lrc.reader( );
for (int i = 0; i < fields.length; i ++) {
if (groups[i][0] >= 1) {
if (groups[i][1] == 1) {
values[i] = reader.getSortedNumericDocValues("%"+fields[i]);
} else {
values[i] = reader. getNumericDocValues("#"+fields[i]);
}
} else {
if (groups[i][1] == 1) {
values[i] = reader.getSortedSetDocValues("%"+fields[i]);
} else {
values[i] = reader. getSortedDocValues("#"+fields[i]);
}
}
}
return this;
}
/**
* Returns a {@link DocIdSetIterator} from the given field or null if the field doesn't exist
* in the reader or if the reader has no doc values for the field.
*/
public static DocIdSetIterator getDocValuesDocIdSetIterator(String field, LeafReader reader) throws IOException {
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
final DocIdSetIterator iterator;
if (fieldInfo != null) {
switch (fieldInfo.getDocValuesType()) {
case NONE:
iterator = null;
break;
case NUMERIC:
iterator = reader.getNumericDocValues(field);
break;
case BINARY:
iterator = reader.getBinaryDocValues(field);
break;
case SORTED:
iterator = reader.getSortedDocValues(field);
break;
case SORTED_NUMERIC:
iterator = reader.getSortedNumericDocValues(field);
break;
case SORTED_SET:
iterator = reader.getSortedSetDocValues(field);
break;
default:
throw new AssertionError();
}
return iterator;
}
return null;
}
private BitsEntry createValueDocValues(LeafReader reader, String field) throws IOException {
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
DocValuesType dvType = fieldInfo.getDocValuesType();
DocIdSetIterator iterator;
switch(dvType) {
case NUMERIC:
iterator = reader.getNumericDocValues(field);
break;
case BINARY:
iterator = reader.getBinaryDocValues(field);
break;
case SORTED:
iterator = reader.getSortedDocValues(field);
break;
case SORTED_NUMERIC:
iterator = reader.getSortedNumericDocValues(field);
break;
case SORTED_SET:
iterator = reader.getSortedSetDocValues(field);
break;
default:
throw new AssertionError();
}
FixedBitSet bits = new FixedBitSet(reader.maxDoc());
while (true) {
int docID = iterator.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
bits.set(docID);
}
return new BitsEntry(bits);
}
public void testDocValues() throws Exception {
Document doc = new Document();
doc.add(new NumericDocValuesField("numeric", 29L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
assertEquals(0, numericDocValues.nextDoc());
assertEquals(29L, numericDocValues.longValue());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, numericDocValues.nextDoc());
SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
assertEquals(0, sortedNumericDocValues.nextDoc());
assertEquals(5, sortedNumericDocValues.docValueCount());
assertEquals(30L, sortedNumericDocValues.nextValue());
assertEquals(31L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(32L, sortedNumericDocValues.nextValue());
assertEquals(33L, sortedNumericDocValues.nextValue());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sortedNumericDocValues.nextDoc());
BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
assertEquals(0, binaryDocValues.nextDoc());
assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, binaryDocValues.nextDoc());
SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
assertEquals(0, sortedDocValues.nextDoc());
assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
assertEquals(0, sortedDocValues.ordValue());
assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sortedDocValues.nextDoc());
SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
assertEquals(3, sortedSetDocValues.getValueCount());
assertEquals(0, sortedSetDocValues.nextDoc());
assertEquals(0L, sortedSetDocValues.nextOrd());
assertEquals(1L, sortedSetDocValues.nextOrd());
assertEquals(2L, sortedSetDocValues.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sortedDocValues.nextDoc());
}
/**
* A query time join using global ordinals over a dedicated join field.
*
* This join has certain restrictions and requirements:
* 1) A document can only refer to one other document. (but can be referred by one or more documents)
* 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
* that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
* 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
* should store the join values as UTF-8 strings.
* 4) An ordinal map must be provided that is created on top of the join field.
*
* Note: min and max filtering and the avg score mode will require this join to keep track of the number of times
* a document matches per join value. This will increase the per join cost in terms of execution time and memory.
*
* @param joinField The {@link SortedDocValues} field containing the join values
* @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents.
* @param toQuery The query identifying all documents on the "to" side.
* @param searcher The index searcher used to execute the from query
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
* @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
* needs to be provided.
* @param min Optionally the minimum number of "from" documents that are required to match for a "to" document
* to be a match. The min is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
* disables the min and max "from" documents filtering
* @param max Optionally the maximum number of "from" documents that are allowed to match for a "to" document
* to be a match. The max is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
* disables the min and max "from" documents filtering
* @return a {@link Query} instance that can be used to join documents based on the join field
* @throws IOException If I/O related errors occur
*/
public static Query createJoinQuery(String joinField,
Query fromQuery,
Query toQuery,
IndexSearcher searcher,
ScoreMode scoreMode,
OrdinalMap ordinalMap,
int min,
int max) throws IOException {
int numSegments = searcher.getIndexReader().leaves().size();
final long valueCount;
if (numSegments == 0) {
return new MatchNoDocsQuery("JoinUtil.createJoinQuery with no segments");
} else if (numSegments == 1) {
// No need to use the ordinal map, because there is just one segment.
ordinalMap = null;
LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
if (joinSortedDocValues != null) {
valueCount = joinSortedDocValues.getValueCount();
} else {
return new MatchNoDocsQuery("JoinUtil.createJoinQuery: no join values");
}
} else {
if (ordinalMap == null) {
throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
}
valueCount = ordinalMap.getValueCount();
}
final Query rewrittenFromQuery = searcher.rewrite(fromQuery);
final Query rewrittenToQuery = searcher.rewrite(toQuery);
GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
switch (scoreMode) {
case Total:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount, min, max);
break;
case Min:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Min(joinField, ordinalMap, valueCount, min, max);
break;
case Max:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount, min, max);
break;
case Avg:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount, min, max);
break;
case None:
if (min <= 0 && max == Integer.MAX_VALUE) {
GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
searcher.search(rewrittenFromQuery, globalOrdinalsCollector);
return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, rewrittenToQuery,
rewrittenFromQuery, searcher.getTopReaderContext().id());
} else {
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.NoScore(joinField, ordinalMap, valueCount, min, max);
break;
}
default:
throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
}
searcher.search(rewrittenFromQuery, globalOrdinalsWithScoreCollector);
return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, scoreMode, joinField, ordinalMap, rewrittenToQuery,
rewrittenFromQuery, min, max, searcher.getTopReaderContext().id());
}
private static void checkReaderSanity(LeafReader reader) throws IOException {
for (FieldInfo info : reader.getFieldInfos()) {
// reader shouldn't return normValues if the field does not have them
if (!info.hasNorms()) {
if (reader.getNormValues(info.name) != null) {
throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
}
}
// reader shouldn't return docValues if the field does not have them
// reader shouldn't return multiple docvalues types for the same field.
switch(info.getDocValuesType()) {
case NONE:
if (reader.getBinaryDocValues(info.name) != null ||
reader.getNumericDocValues(info.name) != null ||
reader.getSortedDocValues(info.name) != null ||
reader.getSortedSetDocValues(info.name) != null) {
throw new RuntimeException("field: " + info.name + " has docvalues but should omit them!");
}
break;
case SORTED:
if (reader.getBinaryDocValues(info.name) != null ||
reader.getNumericDocValues(info.name) != null ||
reader.getSortedNumericDocValues(info.name) != null ||
reader.getSortedSetDocValues(info.name) != null) {
throw new RuntimeException(info.name + " returns multiple docvalues types!");
}
break;
case SORTED_NUMERIC:
if (reader.getBinaryDocValues(info.name) != null ||
reader.getNumericDocValues(info.name) != null ||
reader.getSortedSetDocValues(info.name) != null ||
reader.getSortedDocValues(info.name) != null) {
throw new RuntimeException(info.name + " returns multiple docvalues types!");
}
break;
case SORTED_SET:
if (reader.getBinaryDocValues(info.name) != null ||
reader.getNumericDocValues(info.name) != null ||
reader.getSortedNumericDocValues(info.name) != null ||
reader.getSortedDocValues(info.name) != null) {
throw new RuntimeException(info.name + " returns multiple docvalues types!");
}
break;
case BINARY:
if (reader.getNumericDocValues(info.name) != null ||
reader.getSortedDocValues(info.name) != null ||
reader.getSortedNumericDocValues(info.name) != null ||
reader.getSortedSetDocValues(info.name) != null) {
throw new RuntimeException(info.name + " returns multiple docvalues types!");
}
break;
case NUMERIC:
if (reader.getBinaryDocValues(info.name) != null ||
reader.getSortedDocValues(info.name) != null ||
reader.getSortedNumericDocValues(info.name) != null ||
reader.getSortedSetDocValues(info.name) != null) {
throw new RuntimeException(info.name + " returns multiple docvalues types!");
}
break;
default:
throw new AssertionError();
}
}
}
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field indexedField = new StringField("indexed", "", Field.Store.NO);
Field dvField = new SortedDocValuesField("dv", new BytesRef());
doc.add(idField);
doc.add(indexedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
final int length;
if (minLength == maxLength) {
length = minLength; // fixed length
} else {
length = TestUtil.nextInt(random(), minLength, maxLength);
}
String value = TestUtil.randomSimpleString(random(), length);
indexedField.setStringValue(value);
dvField.setBytesValue(new BytesRef(value));
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
SortedDocValues actual = r.getSortedDocValues("dv");
assertEquals(r.maxDoc(), expected, actual);
}
ir.close();
dir.close();
}
@Test
public void testDocValues() throws IOException {
assertU(adoc("id", "1"));
assertU(commit());
try (SolrCore core = h.getCoreInc()) {
final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
final SolrIndexSearcher searcher = searcherRef.get();
try {
final LeafReader reader = searcher.getSlowAtomicReader();
assertEquals(1, reader.numDocs());
final FieldInfos infos = reader.getFieldInfos();
assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("floatdv").getDocValuesType());
assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("intdv").getDocValuesType());
assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("doubledv").getDocValuesType());
assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("longdv").getDocValuesType());
assertEquals(DocValuesType.SORTED, infos.fieldInfo("stringdv").getDocValuesType());
assertEquals(DocValuesType.SORTED, infos.fieldInfo("booldv").getDocValuesType());
NumericDocValues dvs = reader.getNumericDocValues("floatdv");
assertEquals(0, dvs.nextDoc());
assertEquals((long) Float.floatToIntBits(1), dvs.longValue());
dvs = reader.getNumericDocValues("intdv");
assertEquals(0, dvs.nextDoc());
assertEquals(2L, dvs.longValue());
dvs = reader.getNumericDocValues("doubledv");
assertEquals(0, dvs.nextDoc());
assertEquals(Double.doubleToLongBits(3), dvs.longValue());
dvs = reader.getNumericDocValues("longdv");
assertEquals(0, dvs.nextDoc());
assertEquals(4L, dvs.longValue());
SortedDocValues sdv = reader.getSortedDocValues("stringdv");
assertEquals(0, sdv.nextDoc());
assertEquals("solr", sdv.binaryValue().utf8ToString());
sdv = reader.getSortedDocValues("booldv");
assertEquals(0, sdv.nextDoc());
assertEquals("T", sdv.binaryValue().utf8ToString());
final IndexSchema schema = core.getLatestSchema();
final SchemaField floatDv = schema.getField("floatdv");
final SchemaField intDv = schema.getField("intdv");
final SchemaField doubleDv = schema.getField("doubledv");
final SchemaField longDv = schema.getField("longdv");
final SchemaField boolDv = schema.getField("booldv");
FunctionValues values = floatDv.getType().getValueSource(floatDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
assertEquals(1f, values.floatVal(0), 0f);
assertEquals(1f, values.objectVal(0));
values = intDv.getType().getValueSource(intDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
assertEquals(2, values.intVal(0));
assertEquals(2, values.objectVal(0));
values = doubleDv.getType().getValueSource(doubleDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
assertEquals(3d, values.doubleVal(0), 0d);
assertEquals(3d, values.objectVal(0));
values = longDv.getType().getValueSource(longDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
assertEquals(4L, values.longVal(0));
assertEquals(4L, values.objectVal(0));
values = boolDv.getType().getValueSource(boolDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
assertEquals("true", values.strVal(0));
assertEquals(true, values.objectVal(0));
// check reversibility of created fields
tstToObj(schema.getField("floatdv"), -1.5f);
tstToObj(schema.getField("floatdvs"), -1.5f);
tstToObj(schema.getField("doubledv"), -1.5d);
tstToObj(schema.getField("doubledvs"), -1.5d);
tstToObj(schema.getField("intdv"), -7);
tstToObj(schema.getField("intdvs"), -7);
tstToObj(schema.getField("longdv"), -11L);
tstToObj(schema.getField("longdvs"), -11L);
tstToObj(schema.getField("datedv"), new Date(1000));
tstToObj(schema.getField("datedvs"), new Date(1000));
tstToObj(schema.getField("stringdv"), "foo");
tstToObj(schema.getField("stringdvs"), "foo");
tstToObj(schema.getField("booldv"), true);
tstToObj(schema.getField("booldvs"), true);
} finally {
searcherRef.decref();
}
}
}