org.apache.lucene.index.LeafReader#getLiveDocs ( )源码实例Demo

下面列出了org.apache.lucene.index.LeafReader#getLiveDocs ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: rdf4j   文件: LuceneIndex.java
private static Document getDocument(LeafReader reader, Term term) throws IOException {
	PostingsEnum docs = reader.postings(term);
	if (docs != null) {
		int docId = docs.nextDoc();
		// PostingsEnum may contain deleted documents, we have to cope for it
		while (docId != PostingsEnum.NO_MORE_DOCS) {

			// if document is deleted, skip and continue
			Bits liveDocs = reader.getLiveDocs();
			if (liveDocs != null && !liveDocs.get(docId)) {
				docId = docs.nextDoc();
				continue;
			}
			if (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
				throw new IllegalStateException("Multiple Documents for term " + term.text());
			}
			return readDocument(reader, docId, null);
		}
	}
	return null;
}
 
源代码2 项目: lucene-solr   文件: IndexSizeEstimator.java
private void estimateTermVectors(Map<String, Object> result) throws IOException {
  log.info("- estimating term vectors...");
  Map<String, Map<String, Object>> stats = new HashMap<>();
  for (LeafReaderContext leafReaderContext : reader.leaves()) {
    LeafReader leafReader = leafReaderContext.reader();
    Bits liveDocs = leafReader.getLiveDocs();
    for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
      if (liveDocs != null && !liveDocs.get(docId)) {
        continue;
      }
      Fields termVectors = leafReader.getTermVectors(docId);
      if (termVectors == null) {
        continue;
      }
      for (String field : termVectors) {
        Terms terms = termVectors.terms(field);
        if (terms == null) {
          continue;
        }
        estimateTermStats(field, terms, stats, true);
      }
    }
  }
  result.put(TERM_VECTORS, stats);
}
 
源代码3 项目: lucene-solr   文件: LukeRequestHandler.java
private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException {
  PostingsEnum postingsEnum = null;
  TermsEnum termsEnum = terms.iterator();
  BytesRef text;
  // Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way?
  for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) {
    text = termsEnum.next();
    if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
      return null;
    }
    postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
    final Bits liveDocs = reader.getLiveDocs();
    if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      if (liveDocs != null && liveDocs.get(postingsEnum.docID())) {
        continue;
      }
      return reader.document(postingsEnum.docID());
    }
  }
  return null;
}
 
源代码4 项目: crate   文件: LuceneBatchIterator.java
private boolean innerMoveNext() throws IOException {
    while (tryAdvanceDocIdSetIterator()) {
        LeafReader reader = currentLeaf.reader();
        Bits liveDocs = reader.getLiveDocs();
        int doc;
        while ((doc = currentDocIdSetIt.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            if (docDeleted(liveDocs, doc) || belowMinScore(currentScorer)) {
                continue;
            }
            onDoc(doc);
            return true;
        }
        currentDocIdSetIt = null;
    }
    clearState();
    return false;
}
 
protected static Document getFirstLiveDoc(Terms terms, LeafReader reader)
		throws IOException {
	TermsEnum termsEnum = terms.iterator();
	if (termsEnum.next() == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
		return null;
	}
	PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.NONE);
	final Bits liveDocs = reader.getLiveDocs();
	if (postingsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS
			|| (liveDocs != null && liveDocs.get(postingsEnum.docID()))) {
		return null;
	}
	return reader.document(postingsEnum.docID());
}
 
源代码6 项目: rdf4j   文件: LuceneIndex.java
private static void addDocuments(LeafReader reader, Term term, Collection<Document> documents) throws IOException {
	PostingsEnum docs = reader.postings(term);
	if (docs != null) {
		int docId;
		while ((docId = docs.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
			Bits liveDocs = reader.getLiveDocs();
			// Maybe some of the docs have been deleted! Check that too..
			if (liveDocs != null && !liveDocs.get(docId)) {
				continue;
			}
			Document document = readDocument(reader, docId, null);
			documents.add(document);
		}
	}
}
 
源代码7 项目: lucene-solr   文件: IndexSizeEstimator.java
private void estimateStoredFields(Map<String, Object> result) throws IOException {
  log.info("- estimating stored fields...");
  Map<String, Map<String, Object>> stats = new HashMap<>();
  for (LeafReaderContext context : reader.leaves()) {
    LeafReader leafReader = context.reader();
    EstimatingVisitor visitor = new EstimatingVisitor(stats, topN, maxLength, samplingStep);
    Bits liveDocs = leafReader.getLiveDocs();
    if (leafReader instanceof CodecReader) {
      CodecReader codecReader = (CodecReader)leafReader;
      StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
      // this instance may be faster for a full sequential pass
      StoredFieldsReader mergeInstance = storedFieldsReader.getMergeInstance();
      for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        mergeInstance.visitDocument(docId, visitor);
      }
      if (mergeInstance != storedFieldsReader) {
        mergeInstance.close();
      }
    } else {
      for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        leafReader.document(docId, visitor);
      }
    }
  }
  result.put(STORED_FIELDS, stats);
}
 
源代码8 项目: mtas   文件: CodecCollector.java
/**
 * Compute termvector number basic.
 *
 * @param termsEnum
 *          the terms enum
 * @param r
 *          the r
 * @return the termvector number basic
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
private static TermvectorNumberBasic computeTermvectorNumberBasic(
    TermsEnum termsEnum, LeafReader r) throws IOException {
  TermvectorNumberBasic result = new TermvectorNumberBasic();
  boolean hasDeletedDocuments = (r.getLiveDocs() != null);
  if (!hasDeletedDocuments) {
    result.valueSum[0] = termsEnum.totalTermFreq();
    result.docNumber = termsEnum.docFreq();
    if (result.valueSum[0] > -1) {
      return result;
    }
  }
  throw new IOException("should not call this");
}
 
源代码9 项目: mtas   文件: CodecCollector.java
/**
 * Compute termvector number basic.
 *
 * @param docSet
 *          the doc set
 * @param termDocId
 *          the term doc id
 * @param termsEnum
 *          the terms enum
 * @param r
 *          the r
 * @param lrc
 *          the lrc
 * @param postingsEnum
 *          the postings enum
 * @return the termvector number basic
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
private static TermvectorNumberBasic computeTermvectorNumberBasic(
    List<Integer> docSet, int termDocId, TermsEnum termsEnum, LeafReader r,
    LeafReaderContext lrc, PostingsEnum postingsEnum) throws IOException {
  TermvectorNumberBasic result = new TermvectorNumberBasic();
  boolean hasDeletedDocuments = (r.getLiveDocs() != null);
  if ((docSet.size() == r.numDocs()) && !hasDeletedDocuments) {
    try {
      return computeTermvectorNumberBasic(termsEnum, r);
    } catch (IOException e) {
      log.debug("problem", e);
      // problem
    }
  }
  result.docNumber = 0;
  result.valueSum[0] = 0;
  int localTermDocId = termDocId;
  Iterator<Integer> docIterator = docSet.iterator();
  postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS);
  int docId;
  while (docIterator.hasNext()) {
    docId = docIterator.next() - lrc.docBase;
    if (docId >= localTermDocId && ((docId == localTermDocId)
        || ((localTermDocId = postingsEnum.advance(docId)) == docId))) {
      result.docNumber++;
      result.valueSum[0] += postingsEnum.freq();
    }
    if (localTermDocId == DocIdSetIterator.NO_MORE_DOCS) {
      break;
    }
  }
  return result;
}
 
源代码10 项目: lucene-solr   文件: TestDocTermOrdsUninvertLimit.java
@SuppressWarnings({"ConstantConditions", "PointlessBooleanExpression"})
@Nightly
public void testTriggerUnInvertLimit() throws IOException {
  final boolean SHOULD_TRIGGER = false; // Set this to true to use the test with the old implementation

  // Ensure enough terms inside of a single UnInvert-pass-structure to trigger the limit
  final int REF_LIMIT = (int) Math.pow(2, 24); // Maximum number of references within a single pass-structure
  final int DOCS = (1<<16)-1;                  // The number of documents within a single pass (simplified)
  final int TERMS = REF_LIMIT/DOCS;            // Each document must have this many references aka terms hit limit

  // disk based Directory and IWC settings to reduce risk of OOM
  Directory dir = newFSDirectory(createTempDir("TestDocTermOrdsUninvertLimit"));
  final IndexWriter w = new IndexWriter(dir,
                                        new IndexWriterConfig(new MockAnalyzer(random()))
                                        .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                        .setRAMBufferSizeMB(256.0)
                                        .setMergeScheduler(new ConcurrentMergeScheduler())
                                        .setMergePolicy(newLogMergePolicy(false, 10))
                                        .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
                                        .setCodec(TestUtil.getDefaultCodec()));
  
  Document doc = new Document();
  Field field = newTextField("field", "", Field.Store.NO);
  doc.add(field);

  StringBuilder sb = new StringBuilder(TERMS*(Integer.toString(TERMS).length()+1));
  for (int i = 0 ; i < TERMS ; i++) {
    sb.append(" ").append(Integer.toString(i));
  }
  field.setStringValue(sb.toString());

  for (int i = 0 ; i < DOCS ; i++) {
    w.addDocument(doc);
  }
  //System.out.println("\n Finished adding " + DOCS + " documents of " + TERMS + " unique terms");
  w.close();
  
  final IndexReader r = DirectoryReader.open(dir);
  try {
    final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(ar);
    final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field"); // bigTerms turned off
    if (SHOULD_TRIGGER) {
      fail("DocTermOrds should have failed with a \"Too many values for UnInvertedField\" message");
    }
  } catch (IllegalStateException e) {
    if (!SHOULD_TRIGGER) {
      fail("DocTermsOrd should not have failed with this implementation, but got exception " +
          e.getClass().getSimpleName() + " with message " + e.getMessage());
    }
    // This is (hopefully) "Too many values for UnInvertedField faceting on field field", so all is as expected
  } finally {
    r.close();
    dir.close();
  }
}
 
源代码11 项目: lucene-solr   文件: TestDocTermOrds.java
public void testSimple() throws Exception {
  Directory dir = newDirectory();
  final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
  Document doc = new Document();
  Field field = newTextField("field", "", Field.Store.NO);
  doc.add(field);
  field.setStringValue("a b c");
  w.addDocument(doc);

  field.setStringValue("d e f");
  w.addDocument(doc);

  field.setStringValue("a f");
  w.addDocument(doc);
  
  final IndexReader r = w.getReader();
  w.close();

  final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
  TestUtil.checkReader(ar);
  final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
  SortedSetDocValues iter = dto.iterator(ar);
  
  assertEquals(0, iter.nextDoc());
  assertEquals(0, iter.nextOrd());
  assertEquals(1, iter.nextOrd());
  assertEquals(2, iter.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
  
  assertEquals(1, iter.nextDoc());
  assertEquals(3, iter.nextOrd());
  assertEquals(4, iter.nextOrd());
  assertEquals(5, iter.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

  assertEquals(2, iter.nextDoc());
  assertEquals(0, iter.nextOrd());
  assertEquals(5, iter.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

  r.close();
  dir.close();
}
 
源代码12 项目: lucene-solr   文件: IndexSizeEstimatorTest.java
@Test
public void testEstimator() throws Exception {
  JettySolrRunner jetty = cluster.getRandomJetty(random());
  String randomCoreName = jetty.getCoreContainer().getAllCoreNames().iterator().next();
  SolrCore core = jetty.getCoreContainer().getCore(randomCoreName);
  RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
  try {
    SolrIndexSearcher searcher = searcherRef.get();
    // limit the max length
    IndexSizeEstimator estimator = new IndexSizeEstimator(searcher.getRawReader(), 20, 50, true, true);
    IndexSizeEstimator.Estimate estimate = estimator.estimate();
    Map<String, Long> fieldsBySize = estimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", fieldsBySize.isEmpty());
    assertEquals(fieldsBySize.toString(), fields.size(), fieldsBySize.size());
    fieldsBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Long> typesBySize = estimate.getTypesBySize();
    assertFalse("empty typesBySize", typesBySize.isEmpty());
    assertTrue("expected at least 8 types: " + typesBySize.toString(), typesBySize.size() >= 8);
    typesBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Object> summary = estimate.getSummary();
    assertNotNull("summary", summary);
    assertFalse("empty summary", summary.isEmpty());
    assertEquals(summary.keySet().toString(), fields.size(), summary.keySet().size());
    Map<String, Object> details = estimate.getDetails();
    assertNotNull("details", details);
    assertFalse("empty details", details.isEmpty());
    // by type
    assertEquals(details.keySet().toString(), 6, details.keySet().size());

    // check sampling
    estimator.setSamplingThreshold(searcher.getRawReader().maxDoc() / 2);
    IndexSizeEstimator.Estimate sampledEstimate = estimator.estimate();
    Map<String, Long> sampledFieldsBySize = sampledEstimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", sampledFieldsBySize.isEmpty());
    // verify that the sampled values are within 50% of the original values
    fieldsBySize.forEach((field, size) -> {
      Long sampledSize = sampledFieldsBySize.get(field);
      assertNotNull("sampled size for " + field + " is missing in " + sampledFieldsBySize, sampledSize);
      double delta = (double) size * 0.5;
      assertEquals("sampled size of " + field + " is wildly off", (double)size, (double)sampledSize, delta);
    });
    // verify the reader is still usable - SOLR-13694
    IndexReader reader = searcher.getRawReader();
    for (LeafReaderContext context : reader.leaves()) {
      LeafReader leafReader = context.reader();
      assertTrue("unexpected LeafReader class: " + leafReader.getClass().getName(), leafReader instanceof CodecReader);
      Bits liveDocs = leafReader.getLiveDocs();
      CodecReader codecReader = (CodecReader) leafReader;
      StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
      StoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
      assertNotNull(storedFieldsReader);
      for (int docId = 0; docId < leafReader.maxDoc(); docId++) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        storedFieldsReader.visitDocument(docId, visitor);
      }
    }
  } finally {
    searcherRef.decref();
    core.close();
  }
}