org.apache.lucene.index.LeafReader#getSortedDocValues ( )源码实例Demo

下面列出了org.apache.lucene.index.LeafReader#getSortedDocValues ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: lucene-solr   文件: FieldCacheImpl.java
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  SortedDocValues valuesIn = reader.getSortedDocValues(field);
  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  } else {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
      return DocValues.emptySorted();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
      // we don't try to build a sorted instance from numeric/binary doc
      // values because dedup can be very costly
      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
      return DocValues.emptySorted();
    }
    SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
  }
}
 
源代码2 项目: lucene-solr   文件: FieldCacheImpl.java
public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
  if (valuesIn == null) {
    valuesIn = reader.getSortedDocValues(field);
  }

  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  }

  final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
  if (info == null) {
    return DocValues.emptyBinary();
  } else if (info.getDocValuesType() != DocValuesType.NONE) {
    throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
  } else if (info.getIndexOptions() == IndexOptions.NONE) {
    return DocValues.emptyBinary();
  }

  BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
  return impl.iterator();
}
 
源代码3 项目: HongsCORE   文件: StatisHelper.java
@Override
public LeafCollector getLeafCollector(LeafReaderContext lrc) throws IOException {
    LeafReader reader = lrc.reader( );

    for (int i = 0; i < fields.length; i ++) {
        if (groups[i][0] >= 1) {
        if (groups[i][1] == 1) {
            values[i] = reader.getSortedNumericDocValues("%"+fields[i]);
        } else {
            values[i] = reader.      getNumericDocValues("#"+fields[i]);
        }
        } else {
        if (groups[i][1] == 1) {
            values[i] = reader.getSortedSetDocValues("%"+fields[i]);
        } else {
            values[i] = reader.   getSortedDocValues("#"+fields[i]);
        }
        }
    }

    return this;
}
 
源代码4 项目: lucene-solr   文件: DocValuesFieldExistsQuery.java
/**
 * Returns a {@link DocIdSetIterator} from the given field or null if the field doesn't exist
 * in the reader or if the reader has no doc values for the field.
 */
public static DocIdSetIterator getDocValuesDocIdSetIterator(String field, LeafReader reader) throws IOException {
  FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
  final DocIdSetIterator iterator;
  if (fieldInfo != null) {
    switch (fieldInfo.getDocValuesType()) {
      case NONE:
        iterator = null;
        break;
      case NUMERIC:
        iterator = reader.getNumericDocValues(field);
        break;
      case BINARY:
        iterator = reader.getBinaryDocValues(field);
        break;
      case SORTED:
        iterator = reader.getSortedDocValues(field);
        break;
      case SORTED_NUMERIC:
        iterator = reader.getSortedNumericDocValues(field);
        break;
      case SORTED_SET:
        iterator = reader.getSortedSetDocValues(field);
        break;
      default:
        throw new AssertionError();
    }
    return iterator;
  }
  return null;
}
 
源代码5 项目: lucene-solr   文件: FieldCacheImpl.java
private BitsEntry createValueDocValues(LeafReader reader, String field) throws IOException {
  FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
  
  DocValuesType dvType = fieldInfo.getDocValuesType();
  DocIdSetIterator iterator;
  switch(dvType) {
  case NUMERIC:
    iterator = reader.getNumericDocValues(field);
    break;
  case BINARY:
    iterator = reader.getBinaryDocValues(field);
    break;
  case SORTED:
    iterator = reader.getSortedDocValues(field);
    break;
  case SORTED_NUMERIC:
    iterator = reader.getSortedNumericDocValues(field);
    break;
  case SORTED_SET:
    iterator = reader.getSortedSetDocValues(field);
    break;
  default:
    throw new AssertionError();
  }

  FixedBitSet bits = new FixedBitSet(reader.maxDoc());
  while (true) {
    int docID = iterator.nextDoc();
    if (docID == DocIdSetIterator.NO_MORE_DOCS) {
      break;
    }
    bits.set(docID);
  }

  return new BitsEntry(bits);
}
 
源代码6 项目: lucene-solr   文件: TestMemoryIndex.java
public void testDocValues() throws Exception {
  Document doc = new Document();
  doc.add(new NumericDocValuesField("numeric", 29L));
  doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
  doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
  doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
  doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
  doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
  doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
  doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
  doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
  doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
  doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
  doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));

  MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
  LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
  NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
  assertEquals(0, numericDocValues.nextDoc());
  assertEquals(29L, numericDocValues.longValue());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, numericDocValues.nextDoc());
  SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
  assertEquals(0, sortedNumericDocValues.nextDoc());
  assertEquals(5, sortedNumericDocValues.docValueCount());
  assertEquals(30L, sortedNumericDocValues.nextValue());
  assertEquals(31L, sortedNumericDocValues.nextValue());
  assertEquals(32L, sortedNumericDocValues.nextValue());
  assertEquals(32L, sortedNumericDocValues.nextValue());
  assertEquals(33L, sortedNumericDocValues.nextValue());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, sortedNumericDocValues.nextDoc());
  BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
  assertEquals(0, binaryDocValues.nextDoc());
  assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, binaryDocValues.nextDoc());
  SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
  assertEquals(0, sortedDocValues.nextDoc());
  assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
  assertEquals(0, sortedDocValues.ordValue());
  assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, sortedDocValues.nextDoc());
  SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
  assertEquals(3, sortedSetDocValues.getValueCount());
  assertEquals(0, sortedSetDocValues.nextDoc());
  assertEquals(0L, sortedSetDocValues.nextOrd());
  assertEquals(1L, sortedSetDocValues.nextOrd());
  assertEquals(2L, sortedSetDocValues.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
  assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
  assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
  assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, sortedDocValues.nextDoc());
}
 
源代码7 项目: lucene-solr   文件: JoinUtil.java
/**
 * A query time join using global ordinals over a dedicated join field.
 *
 * This join has certain restrictions and requirements:
 * 1) A document can only refer to one other document. (but can be referred by one or more documents)
 * 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
 *    that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
 * 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
 *    should store the join values as UTF-8 strings.
 * 4) An ordinal map must be provided that is created on top of the join field.
 *
 * Note: min and max filtering and the avg score mode will require this join to keep track of the number of times
 * a document matches per join value. This will increase the per join cost in terms of execution time and memory.
 *
 * @param joinField   The {@link SortedDocValues} field containing the join values
 * @param fromQuery   The query containing the actual user query. Also the fromQuery can only match "from" documents.
 * @param toQuery     The query identifying all documents on the "to" side.
 * @param searcher    The index searcher used to execute the from query
 * @param scoreMode   Instructs how scores from the fromQuery are mapped to the returned query
 * @param ordinalMap  The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
 *                    needs to be provided.
 * @param min         Optionally the minimum number of "from" documents that are required to match for a "to" document
 *                    to be a match. The min is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
 *                    disables the min and max "from" documents filtering
 * @param max         Optionally the maximum number of "from" documents that are allowed to match for a "to" document
 *                    to be a match. The max is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
 *                    disables the min and max "from" documents filtering
 * @return a {@link Query} instance that can be used to join documents based on the join field
 * @throws IOException If I/O related errors occur
 */
public static Query createJoinQuery(String joinField,
                                    Query fromQuery,
                                    Query toQuery,
                                    IndexSearcher searcher,
                                    ScoreMode scoreMode,
                                    OrdinalMap ordinalMap,
                                    int min,
                                    int max) throws IOException {
  int numSegments = searcher.getIndexReader().leaves().size();
  final long valueCount;
  if (numSegments == 0) {
    return new MatchNoDocsQuery("JoinUtil.createJoinQuery with no segments");
  } else if (numSegments == 1) {
    // No need to use the ordinal map, because there is just one segment.
    ordinalMap = null;
    LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
    SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
    if (joinSortedDocValues != null) {
      valueCount = joinSortedDocValues.getValueCount();
    } else {
      return new MatchNoDocsQuery("JoinUtil.createJoinQuery: no join values");
    }
  } else {
    if (ordinalMap == null) {
      throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
    }
    valueCount = ordinalMap.getValueCount();
  }

  final Query rewrittenFromQuery = searcher.rewrite(fromQuery);
  final Query rewrittenToQuery = searcher.rewrite(toQuery);
  GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
  switch (scoreMode) {
    case Total:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount, min, max);
      break;
    case Min:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Min(joinField, ordinalMap, valueCount, min, max);
      break;
    case Max:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount, min, max);
      break;
    case Avg:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount, min, max);
      break;
    case None:
      if (min <= 0 && max == Integer.MAX_VALUE) {
        GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
        searcher.search(rewrittenFromQuery, globalOrdinalsCollector);
        return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, rewrittenToQuery,
            rewrittenFromQuery, searcher.getTopReaderContext().id());
      } else {
        globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.NoScore(joinField, ordinalMap, valueCount, min, max);
        break;
      }
    default:
      throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
  }
  searcher.search(rewrittenFromQuery, globalOrdinalsWithScoreCollector);
  return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, scoreMode, joinField, ordinalMap, rewrittenToQuery,
      rewrittenFromQuery, min, max, searcher.getTopReaderContext().id());
}
 
源代码8 项目: lucene-solr   文件: TestUtil.java
private static void checkReaderSanity(LeafReader reader) throws IOException {
  for (FieldInfo info : reader.getFieldInfos()) {
    
    // reader shouldn't return normValues if the field does not have them
    if (!info.hasNorms()) {
      if (reader.getNormValues(info.name) != null) {
        throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
      }
    }
    
    // reader shouldn't return docValues if the field does not have them
    // reader shouldn't return multiple docvalues types for the same field.
    switch(info.getDocValuesType()) {
      case NONE:
        if (reader.getBinaryDocValues(info.name) != null ||
            reader.getNumericDocValues(info.name) != null ||
            reader.getSortedDocValues(info.name) != null || 
            reader.getSortedSetDocValues(info.name) != null) {
          throw new RuntimeException("field: " + info.name + " has docvalues but should omit them!");
        }
        break;
      case SORTED:
        if (reader.getBinaryDocValues(info.name) != null ||
            reader.getNumericDocValues(info.name) != null ||
            reader.getSortedNumericDocValues(info.name) != null ||
            reader.getSortedSetDocValues(info.name) != null) {
          throw new RuntimeException(info.name + " returns multiple docvalues types!");
        }
        break;
      case SORTED_NUMERIC:
        if (reader.getBinaryDocValues(info.name) != null ||
            reader.getNumericDocValues(info.name) != null ||
            reader.getSortedSetDocValues(info.name) != null ||
            reader.getSortedDocValues(info.name) != null) {
          throw new RuntimeException(info.name + " returns multiple docvalues types!");
        }
        break;
      case SORTED_SET:
        if (reader.getBinaryDocValues(info.name) != null ||
            reader.getNumericDocValues(info.name) != null ||
            reader.getSortedNumericDocValues(info.name) != null ||
            reader.getSortedDocValues(info.name) != null) {
          throw new RuntimeException(info.name + " returns multiple docvalues types!");
        }
        break;
      case BINARY:
        if (reader.getNumericDocValues(info.name) != null ||
            reader.getSortedDocValues(info.name) != null ||
            reader.getSortedNumericDocValues(info.name) != null ||
            reader.getSortedSetDocValues(info.name) != null) {
          throw new RuntimeException(info.name + " returns multiple docvalues types!");
        }
        break;
      case NUMERIC:
        if (reader.getBinaryDocValues(info.name) != null ||
            reader.getSortedDocValues(info.name) != null ||
            reader.getSortedNumericDocValues(info.name) != null ||
            reader.getSortedSetDocValues(info.name) != null) {
          throw new RuntimeException(info.name + " returns multiple docvalues types!");
        }
        break;
      default:
        throw new AssertionError();
    }
  }
}
 
源代码9 项目: lucene-solr   文件: TestFieldCacheVsDocValues.java
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
  Document doc = new Document();
  Field idField = new StringField("id", "", Field.Store.NO);
  Field indexedField = new StringField("indexed", "", Field.Store.NO);
  Field dvField = new SortedDocValuesField("dv", new BytesRef());
  doc.add(idField);
  doc.add(indexedField);
  doc.add(dvField);
  
  // index some docs
  int numDocs = atLeast(300);
  for (int i = 0; i < numDocs; i++) {
    idField.setStringValue(Integer.toString(i));
    final int length;
    if (minLength == maxLength) {
      length = minLength; // fixed length
    } else {
      length = TestUtil.nextInt(random(), minLength, maxLength);
    }
    String value = TestUtil.randomSimpleString(random(), length);
    indexedField.setStringValue(value);
    dvField.setBytesValue(new BytesRef(value));
    writer.addDocument(doc);
    if (random().nextInt(31) == 0) {
      writer.commit();
    }
  }
  
  // delete some docs
  int numDeletions = random().nextInt(numDocs/10);
  for (int i = 0; i < numDeletions; i++) {
    int id = random().nextInt(numDocs);
    writer.deleteDocuments(new Term("id", Integer.toString(id)));
  }
  writer.close();
  
  // compare
  DirectoryReader ir = DirectoryReader.open(dir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();
    SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
    SortedDocValues actual = r.getSortedDocValues("dv");
    assertEquals(r.maxDoc(), expected, actual);
  }
  ir.close();
  dir.close();
}
 
源代码10 项目: lucene-solr   文件: DocValuesTest.java
@Test
public void testDocValues() throws IOException {
  assertU(adoc("id", "1"));
  assertU(commit());
  try (SolrCore core = h.getCoreInc()) {
    final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
    final SolrIndexSearcher searcher = searcherRef.get();
    try {
      final LeafReader reader = searcher.getSlowAtomicReader();
      assertEquals(1, reader.numDocs());
      final FieldInfos infos = reader.getFieldInfos();
      assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("floatdv").getDocValuesType());
      assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("intdv").getDocValuesType());
      assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("doubledv").getDocValuesType());
      assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("longdv").getDocValuesType());
      assertEquals(DocValuesType.SORTED, infos.fieldInfo("stringdv").getDocValuesType());
      assertEquals(DocValuesType.SORTED, infos.fieldInfo("booldv").getDocValuesType());

      NumericDocValues dvs = reader.getNumericDocValues("floatdv");
      assertEquals(0, dvs.nextDoc());
      assertEquals((long) Float.floatToIntBits(1), dvs.longValue());
      dvs = reader.getNumericDocValues("intdv");
      assertEquals(0, dvs.nextDoc());
      assertEquals(2L, dvs.longValue());
      dvs = reader.getNumericDocValues("doubledv");
      assertEquals(0, dvs.nextDoc());
      assertEquals(Double.doubleToLongBits(3), dvs.longValue());
      dvs = reader.getNumericDocValues("longdv");
      assertEquals(0, dvs.nextDoc());
      assertEquals(4L, dvs.longValue());
      SortedDocValues sdv = reader.getSortedDocValues("stringdv");
      assertEquals(0, sdv.nextDoc());
      assertEquals("solr", sdv.binaryValue().utf8ToString());
      sdv = reader.getSortedDocValues("booldv");
      assertEquals(0, sdv.nextDoc());
      assertEquals("T", sdv.binaryValue().utf8ToString());

      final IndexSchema schema = core.getLatestSchema();
      final SchemaField floatDv = schema.getField("floatdv");
      final SchemaField intDv = schema.getField("intdv");
      final SchemaField doubleDv = schema.getField("doubledv");
      final SchemaField longDv = schema.getField("longdv");
      final SchemaField boolDv = schema.getField("booldv");

      FunctionValues values = floatDv.getType().getValueSource(floatDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
      assertEquals(1f, values.floatVal(0), 0f);
      assertEquals(1f, values.objectVal(0));
      values = intDv.getType().getValueSource(intDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
      assertEquals(2, values.intVal(0));
      assertEquals(2, values.objectVal(0));
      values = doubleDv.getType().getValueSource(doubleDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
      assertEquals(3d, values.doubleVal(0), 0d);
      assertEquals(3d, values.objectVal(0));
      values = longDv.getType().getValueSource(longDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
      assertEquals(4L, values.longVal(0));
      assertEquals(4L, values.objectVal(0));
      
      values = boolDv.getType().getValueSource(boolDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
      assertEquals("true", values.strVal(0));
      assertEquals(true, values.objectVal(0));

      // check reversibility of created fields
      tstToObj(schema.getField("floatdv"), -1.5f);
      tstToObj(schema.getField("floatdvs"), -1.5f);
      tstToObj(schema.getField("doubledv"), -1.5d);
      tstToObj(schema.getField("doubledvs"), -1.5d);
      tstToObj(schema.getField("intdv"), -7);
      tstToObj(schema.getField("intdvs"), -7);
      tstToObj(schema.getField("longdv"), -11L);
      tstToObj(schema.getField("longdvs"), -11L);
      tstToObj(schema.getField("datedv"), new Date(1000));
      tstToObj(schema.getField("datedvs"), new Date(1000));
      tstToObj(schema.getField("stringdv"), "foo");
      tstToObj(schema.getField("stringdvs"), "foo");
      tstToObj(schema.getField("booldv"), true);
      tstToObj(schema.getField("booldvs"), true);

    } finally {
      searcherRef.decref();
    }
  }
}