下面列出了怎么用org.apache.lucene.index.LeafReader的API类实例代码及写法,或者点击链接到github查看源代码。
@Override
public AtomicGeoPointFieldData loadDirect(LeafReaderContext context) throws Exception {
LeafReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
AtomicGeoPointFieldData data = null;
// TODO: Use an actual estimator to estimate before loading.
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker(CircuitBreaker.FIELDDATA));
if (terms == null) {
data = AbstractAtomicGeoPointFieldData.empty(reader.maxDoc());
estimator.afterLoad(null, data.ramBytesUsed());
return data;
}
return (Version.indexCreated(indexSettings).before(Version.V_2_2_0)) ?
loadLegacyFieldData(reader, estimator, terms, data) : loadFieldData22(reader, estimator, terms, data);
}
private boolean innerMoveNext() throws IOException {
while (tryAdvanceDocIdSetIterator()) {
LeafReader reader = currentLeaf.reader();
Bits liveDocs = reader.getLiveDocs();
int doc;
while ((doc = currentDocIdSetIt.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (docDeleted(liveDocs, doc) || belowMinScore(currentScorer)) {
continue;
}
onDoc(doc);
return true;
}
currentDocIdSetIt = null;
}
clearState();
return false;
}
@Test
public void testGetConfusionMatrixWithSNB() throws Exception {
LeafReader reader = null;
try {
MockAnalyzer analyzer = new MockAnalyzer(random());
reader = getSampleIndex(analyzer);
Classifier<BytesRef> classifier = new SimpleNaiveBayesClassifier(reader, analyzer, null, categoryFieldName, textFieldName);
ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(reader,
classifier, categoryFieldName, textFieldName, -1);
checkCM(confusionMatrix);
} finally {
if (reader != null) {
reader.close();
}
}
}
/**
* Initialize lookup for the provided segment
*/
public PerThreadIDAndVersionLookup(LeafReader reader) throws IOException {
TermsEnum termsEnum = null;
NumericDocValues versions = null;
boolean hasPayloads = false;
Fields fields = reader.fields();
if (fields != null) {
Terms terms = fields.terms(UidFieldMapper.NAME);
if (terms != null) {
hasPayloads = terms.hasPayloads();
termsEnum = terms.iterator();
assert termsEnum != null;
versions = reader.getNumericDocValues(VersionFieldMapper.NAME);
}
}
this.versions = versions;
this.termsEnum = termsEnum;
this.hasPayloads = hasPayloads;
}
@Test
public void testGetConfusionMatrixWithBM25NB() throws Exception {
LeafReader reader = null;
try {
MockAnalyzer analyzer = new MockAnalyzer(random());
reader = getSampleIndex(analyzer);
Classifier<BytesRef> classifier = new BM25NBClassifier(reader, analyzer, null, categoryFieldName, textFieldName);
ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(reader,
classifier, categoryFieldName, textFieldName, -1);
checkCM(confusionMatrix);
} finally {
if (reader != null) {
reader.close();
}
}
}
private SortedSetDocValues validateAndFetchDocValues(SolrIndexSearcher solrSearcher, String fieldName, String querySide) throws IOException {
final IndexSchema schema = solrSearcher.getSchema();
final SchemaField field = schema.getFieldOrNull(fieldName);
if (field == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, querySide + " field '" + fieldName + "' does not exist");
}
if (!field.hasDocValues()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"'top-level' join queries require both 'from' and 'to' fields to have docValues, but " + querySide +
" field [" + fieldName + "] does not.");
}
final LeafReader leafReader = solrSearcher.getSlowAtomicReader();
if (field.multiValued()) {
return DocValues.getSortedSet(leafReader, fieldName);
}
return DocValues.singleton(DocValues.getSorted(leafReader, fieldName));
}
static IndexReader wrap(IndexReader reader) throws IOException {
LeafReader[] leafReaders = reader.leaves().stream()
.map(LeafReaderContext::reader)
.map(TermVectorReusingLeafReader::new)
.toArray(LeafReader[]::new);
return new BaseCompositeReader<IndexReader>(leafReaders) {
@Override
protected void doClose() throws IOException {
reader.close();
}
@Override
public CacheHelper getReaderCacheHelper() {
return null;
}
};
}
@Test
public void testBasicUsage() throws Exception {
LeafReader leafReader = null;
try {
MockAnalyzer analyzer = new MockAnalyzer(random());
leafReader = getSampleIndex(analyzer);
checkCorrectClassification(new KNearestNeighborClassifier(leafReader, null, analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
ClassificationResult<BytesRef> resultDS = checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new BM25Similarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
ClassificationResult<BytesRef> resultLMS = checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
assertTrue(resultDS.getScore() != resultLMS.getScore());
} finally {
if (leafReader != null) {
leafReader.close();
}
}
}
public static String explain(String fieldName, GeoShape shape, GeoPoint targetDocPoint, GeoPoint scaledDocPoint, IndexReader reader, int docID) throws Exception {
final XYZBounds bounds = new XYZBounds();
shape.getBounds(bounds);
// First find the leaf reader that owns this doc:
int subIndex = ReaderUtil.subIndex(docID, reader.leaves());
LeafReader leafReader = reader.leaves().get(subIndex).reader();
StringBuilder b = new StringBuilder();
b.append("target is in leaf " + leafReader + " of full reader " + reader + "\n");
DocIdSetBuilder hits = new DocIdSetBuilder(leafReader.maxDoc());
ExplainingVisitor visitor = new ExplainingVisitor(shape, targetDocPoint, scaledDocPoint,
new PointInShapeIntersectVisitor(hits, shape, bounds),
docID - reader.leaves().get(subIndex).docBase, 3, Integer.BYTES, b);
// Do first phase, where we just figure out the "path" that leads to the target docID:
leafReader.getPointValues(fieldName).intersect(visitor);
// Do second phase, where we we see how the wrapped visitor responded along that path:
visitor.startSecondPhase();
leafReader.getPointValues(fieldName).intersect(visitor);
return b.toString();
}
public void testNoOrds() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
doc.add(new Field("foo", "this is a test", ft));
iw.addDocument(doc);
LeafReader ir = getOnlyLeafReader(iw.getReader());
Terms terms = ir.getTermVector(0, "foo");
assertNotNull(terms);
TermsEnum termsEnum = terms.iterator();
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("this")));
expectThrows(UnsupportedOperationException.class, termsEnum::ord);
expectThrows(UnsupportedOperationException.class, () -> termsEnum.seekExact(0));
ir.close();
iw.close();
dir.close();
}
/**
* Collect collection.
*
* @param reader
* the reader
* @param docSet
* the doc set
* @param collectionInfo
* the collection info
* @throws IOException
* Signals that an I/O exception has occurred.
*/
public static void collectCollection(IndexReader reader, List<Integer> docSet,
ComponentCollection collectionInfo) throws IOException {
if (collectionInfo.action().equals(ComponentCollection.ACTION_CHECK)) {
// can't do anything in lucene for check
} else if (collectionInfo.action()
.equals(ComponentCollection.ACTION_LIST)) {
// can't do anything in lucene for list
} else if (collectionInfo.action()
.equals(ComponentCollection.ACTION_CREATE)) {
BytesRef term = null;
PostingsEnum postingsEnum = null;
Integer docId;
Integer termDocId = -1;
Terms terms;
LeafReaderContext lrc;
LeafReader r;
ListIterator<LeafReaderContext> iterator = reader.leaves().listIterator();
while (iterator.hasNext()) {
lrc = iterator.next();
r = lrc.reader();
for (String field : collectionInfo.fields()) {
if ((terms = r.terms(field)) != null) {
TermsEnum termsEnum = terms.iterator();
while ((term = termsEnum.next()) != null) {
Iterator<Integer> docIterator = docSet.iterator();
postingsEnum = termsEnum.postings(postingsEnum,
PostingsEnum.NONE);
termDocId = -1;
while (docIterator.hasNext()) {
docId = docIterator.next() - lrc.docBase;
if ((docId >= termDocId) && ((docId.equals(termDocId))
|| ((termDocId = postingsEnum.advance(docId))
.equals(docId)))) {
collectionInfo.addValue(term.utf8ToString());
break;
}
if (termDocId.equals(PostingsEnum.NO_MORE_DOCS)) {
break;
}
}
}
}
}
}
}
}
public void testIndexingPointsAndDocValues() throws Exception {
FieldType type = new FieldType();
type.setDimensions(1, 4);
type.setDocValuesType(DocValuesType.BINARY);
type.freeze();
Document doc = new Document();
byte[] packedPoint = "term".getBytes(StandardCharsets.UTF_8);
doc.add(new BinaryPoint("field", packedPoint, type));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
assertEquals(1, leafReader.getPointValues("field").size());
assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMinPackedValue());
assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMaxPackedValue());
BinaryDocValues dvs = leafReader.getBinaryDocValues("field");
assertEquals(0, dvs.nextDoc());
assertEquals("term", dvs.binaryValue().utf8ToString());
}
/**
* This test is for the scenario where in the first topK results from the MLT query, we have less results
* for the expected class than the results for the bad class.
* But the results for the expected class have a better score in comparison with the results of the second class.
* So we would expect a greater score for the best ranked class.
*
* @throws Exception if any error happens
*/
@Test
public void testUnbalancedClasses() throws Exception {
LeafReader leafReader = null;
try {
Analyzer analyzer = new EnglishAnalyzer();
leafReader = getSampleIndex(analyzer);
KNearestNeighborClassifier knnClassifier = new KNearestNeighborClassifier(leafReader, null,analyzer, null, 3, 1, 1, categoryFieldName, textFieldName);
List<ClassificationResult<BytesRef>> classes = knnClassifier.getClasses(SUPER_STRONG_TECHNOLOGY_INPUT);
assertTrue(classes.get(0).getScore() > classes.get(1).getScore());
checkCorrectClassification(knnClassifier, SUPER_STRONG_TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
} finally {
if (leafReader != null) {
leafReader.close();
}
}
}
public DummyDirectoryReader(DirectoryReader in) throws IOException {
super(in, new SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
return new FilterLeafReader(reader) {
@Override
public CacheHelper getCoreCacheHelper() {
return null;
}
@Override
public CacheHelper getReaderCacheHelper() {
return null;
}};
}
});
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext lrc) throws IOException {
LeafReader reader = lrc.reader( );
for (int i = 0; i < fields.length; i ++) {
if (groups[i][0] >= 1) {
if (groups[i][1] == 1) {
values[i] = reader.getSortedNumericDocValues("%"+fields[i]);
} else {
values[i] = reader. getNumericDocValues("#"+fields[i]);
}
} else {
if (groups[i][1] == 1) {
values[i] = reader.getSortedSetDocValues("%"+fields[i]);
} else {
values[i] = reader. getSortedDocValues("#"+fields[i]);
}
}
}
return this;
}
private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException {
PostingsEnum postingsEnum = null;
TermsEnum termsEnum = terms.iterator();
BytesRef text;
// Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way?
for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) {
text = termsEnum.next();
if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
return null;
}
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
final Bits liveDocs = reader.getLiveDocs();
if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (liveDocs != null && liveDocs.get(postingsEnum.docID())) {
continue;
}
return reader.document(postingsEnum.docID());
}
}
return null;
}
private NonClosingReaderWrapper(DirectoryReader in) throws IOException {
super(in, new SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
return reader;
}
});
}
private boolean assertDocSoftDeleted(LeafReader leafReader, int segmentDocId) throws IOException {
final NumericDocValues ndv = leafReader.getNumericDocValues(Lucene.SOFT_DELETES_FIELD);
if (ndv == null || ndv.advanceExact(segmentDocId) == false) {
throw new IllegalStateException("DocValues for field [" + Lucene.SOFT_DELETES_FIELD + "] is not found");
}
return ndv.longValue() == 1;
}
/**
* Sole constructor: Score documents of {@code reader} with {@code scorer}.
*
*/
MultiNormsLeafSimScorer(SimScorer scorer, LeafReader reader, Collection<FieldAndWeight> normFields, boolean needsScores) throws IOException {
this.scorer = Objects.requireNonNull(scorer);
if (needsScores) {
final List<NumericDocValues> normsList = new ArrayList<>();
final List<Float> weightList = new ArrayList<>();
for (FieldAndWeight field : normFields) {
NumericDocValues norms = reader.getNormValues(field.field);
if (norms != null) {
normsList.add(norms);
weightList.add(field.weight);
}
}
if (normsList.isEmpty()) {
norms = null;
} else if (normsList.size() == 1) {
norms = normsList.get(0);
} else {
final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]);
final float[] weightArr = new float[normsList.size()];
for (int i = 0; i < weightList.size(); i++) {
weightArr[i] = weightList.get(i);
}
norms = new MultiFieldNormValues(normsArr, weightArr);
}
} else {
norms = null;
}
}
public static void assertSplit(LeafReader originalIndex, double testRatio, double crossValidationRatio, String... fieldNames) throws Exception {
BaseDirectoryWrapper trainingIndex = newDirectory();
BaseDirectoryWrapper testIndex = newDirectory();
BaseDirectoryWrapper crossValidationIndex = newDirectory();
try {
DatasetSplitter datasetSplitter = new DatasetSplitter(testRatio, crossValidationRatio);
datasetSplitter.split(originalIndex, trainingIndex, testIndex, crossValidationIndex, new MockAnalyzer(random()), true, classFieldName, fieldNames);
assertNotNull(trainingIndex);
assertNotNull(testIndex);
assertNotNull(crossValidationIndex);
DirectoryReader trainingReader = DirectoryReader.open(trainingIndex);
assertEquals((int) (originalIndex.maxDoc() * (1d - testRatio - crossValidationRatio)), trainingReader.maxDoc(), 20);
DirectoryReader testReader = DirectoryReader.open(testIndex);
assertEquals((int) (originalIndex.maxDoc() * testRatio), testReader.maxDoc(), 20);
DirectoryReader cvReader = DirectoryReader.open(crossValidationIndex);
assertEquals((int) (originalIndex.maxDoc() * crossValidationRatio), cvReader.maxDoc(), 20);
trainingReader.close();
testReader.close();
cvReader.close();
closeQuietly(trainingReader);
closeQuietly(testReader);
closeQuietly(cvReader);
} finally {
if (trainingIndex != null) {
trainingIndex.close();
}
if (testIndex != null) {
testIndex.close();
}
if (crossValidationIndex != null) {
crossValidationIndex.close();
}
}
}
/**
* Tries to extract the shard id from a reader if possible, when its not possible,
* will return null.
*/
@Nullable
public static ShardId extractShardId(LeafReader reader) {
final ElasticsearchLeafReader esReader = ElasticsearchLeafReader.getElasticsearchLeafReader(reader);
if (esReader != null) {
assert reader.getRefCount() > 0 : "ElasticsearchLeafReader is already closed";
return esReader.shardId();
}
return null;
}
@Test
public void testBasicUsage() throws Exception {
LeafReader leafReader = null;
try {
MockAnalyzer analyzer = new MockAnalyzer(random());
leafReader = getSampleIndex(analyzer);
SimpleNaiveBayesClassifier classifier = new SimpleNaiveBayesClassifier(leafReader, analyzer, null, categoryFieldName, textFieldName);
checkCorrectClassification(classifier, TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
checkCorrectClassification(classifier, POLITICS_INPUT, POLITICS_RESULT);
} finally {
if (leafReader != null) {
leafReader.close();
}
}
}
private NonClosingReaderWrapper(DirectoryReader in) throws IOException {
super(in, new SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
return reader;
}
});
}
/**
* Tries to extract a segment reader from the given index reader.
* If no SegmentReader can be extracted an {@link IllegalStateException} is thrown.
*/
protected static SegmentReader segmentReader(LeafReader reader) {
if (reader instanceof SegmentReader) {
return (SegmentReader) reader;
} else if (reader instanceof FilterLeafReader) {
final FilterLeafReader fReader = (FilterLeafReader) reader;
return segmentReader(FilterLeafReader.unwrap(fReader));
}
// hard fail - we can't get a SegmentReader
throw new IllegalStateException("Can not extract segment reader from given index reader [" + reader + "]");
}
private Scorer getContainsDenseScorer(LeafReader reader, Weight weight, final float boost, ScoreMode scoreMode) throws IOException {
final FixedBitSet result = new FixedBitSet(reader.maxDoc());
final long[] cost = new long[]{0};
// Get potential documents.
final FixedBitSet excluded = new FixedBitSet(reader.maxDoc());
values.intersect(getContainsDenseVisitor(query, result, excluded, cost));
result.andNot(excluded);
final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
}
/**
* Match a DocumentBatch against the queries stored in the Monitor, also returning information
* about which queries were selected by the presearcher, and why.
*
* @param docs a DocumentBatch to match against the index
* @param factory a {@link MatcherFactory} to use to create a {@link CandidateMatcher} for the match run
* @param <T> the type of QueryMatch produced by the CandidateMatcher
* @return a {@link PresearcherMatches} object containing debug information
* @throws IOException on IO errors
*/
public <T extends QueryMatch> PresearcherMatches<T> debug(Document[] docs, MatcherFactory<T> factory)
throws IOException {
try (DocumentBatch batch = DocumentBatch.of(analyzer, docs)) {
LeafReader reader = batch.get();
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setQueryCache(null);
PresearcherQueryCollector<T> collector = new PresearcherQueryCollector<>(factory.createMatcher(searcher));
long buildTime = queryIndex.search(t -> new ForceNoBulkScoringQuery(presearcher.buildQuery(reader, t)), collector);
return collector.getMatches(buildTime);
}
}
private void assertDVAdvance(Directory dir, int jumpStep) throws IOException {
DirectoryReader ir = DirectoryReader.open(dir);
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
for (int jump = jumpStep; jump < r.maxDoc(); jump += jumpStep) {
// Create a new instance each time to ensure jumps from the beginning
NumericDocValues docValues = DocValues.getNumeric(r, "dv");
for (int docID = 0; docID < r.maxDoc(); docID += jump) {
String base = "document #" + docID + "/" + r.maxDoc() + ", jumping " + jump + " from #" + (docID-jump);
String storedValue = r.document(docID).get("stored");
if (storedValue == null) {
assertFalse("There should be no DocValue for " + base,
docValues.advanceExact(docID));
} else {
assertTrue("There should be a DocValue for " + base,
docValues.advanceExact(docID));
assertEquals("The doc value should be correct for " + base,
Long.parseLong(storedValue), docValues.longValue());
}
}
}
}
ir.close();
}
/** Returns the sum of RAM bytes used by each segment */
private static long getIndexHeapUsed(DirectoryReader reader) {
long indexHeapRamBytesUsed = 0;
for (LeafReaderContext leafReaderContext : reader.leaves()) {
LeafReader leafReader = leafReaderContext.reader();
if (leafReader instanceof SegmentReader) {
indexHeapRamBytesUsed += ((SegmentReader) leafReader)
.ramBytesUsed();
} else {
// Not supported for any reader that is not a SegmentReader
return -1;
}
}
return indexHeapRamBytesUsed;
}
/**
* Returns a list of Documents representing the specified Resource (empty when no such Document exists yet). Each
* document represent a set of statements with the specified Resource as a subject, which are stored in a specific
* context
*/
private List<Document> getDocuments(Term uriTerm) throws IOException {
List<Document> result = new ArrayList<>();
IndexReader reader = getIndexReader();
List<LeafReaderContext> leaves = reader.leaves();
int size = leaves.size();
for (int i = 0; i < size; i++) {
LeafReader lreader = leaves.get(i).reader();
addDocuments(lreader, uriTerm, result);
}
return result;
}
private static void addDocuments(LeafReader reader, Term term, Collection<Document> documents) throws IOException {
PostingsEnum docs = reader.postings(term);
if (docs != null) {
int docId;
while ((docId = docs.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
Bits liveDocs = reader.getLiveDocs();
// Maybe some of the docs have been deleted! Check that too..
if (liveDocs != null && !liveDocs.get(docId)) {
continue;
}
Document document = readDocument(reader, docId, null);
documents.add(document);
}
}
}