下面列出了org.apache.lucene.index.IndexReader#numDocs ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public Query rewrite(IndexReader reader) throws IOException {
if (getBoost() != 1.0F) {
return super.rewrite(reader);
}
if (reader instanceof DirectoryReader) {
String joinField = ParentFieldMapper.joinField(parentType);
IndexSearcher indexSearcher = new IndexSearcher(reader);
indexSearcher.setQueryCache(null);
indexSearcher.setSimilarity(similarity);
IndexParentChildFieldData indexParentChildFieldData = parentChildIndexFieldData.loadGlobal((DirectoryReader) reader);
MultiDocValues.OrdinalMap ordinalMap = ParentChildIndexFieldData.getOrdinalMap(indexParentChildFieldData, parentType);
return JoinUtil.createJoinQuery(joinField, innerQuery, toQuery, indexSearcher, scoreMode, ordinalMap, minChildren, maxChildren);
} else {
if (reader.leaves().isEmpty() && reader.numDocs() == 0) {
// asserting reader passes down a MultiReader during rewrite which makes this
// blow up since for this query to work we have to have a DirectoryReader otherwise
// we can't load global ordinals - for this to work we simply check if the reader has no leaves
// and rewrite to match nothing
return new MatchNoDocsQuery();
}
throw new IllegalStateException("can't load global ordinals for reader of type: " + reader.getClass() + " must be a DirectoryReader");
}
}
@Override
public void dump(long memory, Consumer<Entry> consumer) throws IOException {
IndexSearcher searcher = getIndexSearcher();
IndexReader reader = getIndexReader();
int size = reader.numDocs();
if (size == 0)
return;
Query memoryQuery = new TermQuery(documentBuilder.makeMemoryTerm(memory));
TopDocs docs = searcher.search(memoryQuery, size);
for (ScoreDoc scoreDoc : docs.scoreDocs) {
Document document = reader.document(scoreDoc.doc);
if (documentBuilder.getMemory(document) > 0) {
TranslationMemory.Entry entry = documentBuilder.asEntry(document);
consumer.accept(entry);
}
}
}
@Override
public void dumpAll(Consumer<Entry> consumer) throws IOException {
IndexSearcher searcher = getIndexSearcher();
IndexReader reader = getIndexReader();
int size = reader.numDocs();
if (size == 0)
return;
TopDocs docs = searcher.search(new MatchAllDocsQuery(), size);
for (ScoreDoc scoreDoc : docs.scoreDocs) {
Document document = reader.document(scoreDoc.doc);
if (documentBuilder.getMemory(document) > 0) {
TranslationMemory.Entry entry = documentBuilder.asEntry(document);
consumer.accept(entry);
}
}
}
public int getNumberOfDocuments() throws IOException
{
IndexReader reader = getMainIndexReferenceCountingReadOnlyIndexReader();
try
{
return reader.numDocs();
}
finally
{
reader.close();
}
}
@Override
public SignificantStringTerms buildEmptyAggregation() {
// We need to account for the significance of a miss in our global stats - provide corpus size as context
ContextIndexSearcher searcher = context.searchContext().searcher();
IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs();
return new SignificantStringTerms(0, supersetSize, name, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
}
@Override
public SignificantStringTerms buildEmptyAggregation() {
// We need to account for the significance of a miss in our global stats - provide corpus size as context
ContextIndexSearcher searcher = context.searchContext().searcher();
IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs();
return new SignificantStringTerms(0, supersetSize, name, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
}
@Override
public SignificantLongTerms buildEmptyAggregation() {
// We need to account for the significance of a miss in our global stats - provide corpus size as context
ContextIndexSearcher searcher = context.searchContext().searcher();
IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs();
return new SignificantLongTerms(0, supersetSize, name, formatter, bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
}
private int numdoc() throws IOException {
IndexReader rs = DirectoryReader.open(spellindex);
int num = rs.numDocs();
assertTrue(num != 0);
//System.out.println("num docs: " + num);
rs.close();
return num;
}
public void testMissingTerms() throws Exception {
String fieldName = "field1";
Directory rd = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), rd);
for (int i = 0; i < 100; i++) {
Document doc = new Document();
int term = i * 10; //terms are units of 10;
doc.add(newStringField(fieldName, "" + term, Field.Store.YES));
doc.add(new SortedDocValuesField(fieldName, new BytesRef("" + term)));
w.addDocument(doc);
}
IndexReader reader = w.getReader();
w.close();
IndexSearcher searcher = newSearcher(reader);
int numDocs = reader.numDocs();
ScoreDoc[] results;
List<String> terms = new ArrayList<>();
terms.add("5");
results = searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
assertEquals("Must match nothing", 0, results.length);
terms = new ArrayList<>();
terms.add("10");
results = searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
assertEquals("Must match 1", 1, results.length);
terms = new ArrayList<>();
terms.add("10");
terms.add("20");
results = searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
assertEquals("Must match 2", 2, results.length);
reader.close();
rd.close();
}
public long size(final ValuesTransformer<V> transformer) {
IndexReader reader = null;
IndexSearcher searcher = null;
try {
reader = getSearcher().getIndexReader();
} catch (IOException e) {
OLogManager.instance().error(this, "Error on getting size of Lucene index", e);
} finally {
if (searcher != null) {
release(searcher);
}
}
return reader.numDocs();
}
public DocFreq(IndexReader indexReader, String field) {
this.indexReader = indexReader;
this.field = field;
this.docFreqMap = new HashMap<>();
this.similarity = new ClassicSimilarity();
this.numDocs = indexReader.numDocs();
}
public Explanation explain(IndexReader reader, int doc)
throws IOException {
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
Explanation idfExpl =
new Explanation(idf, "idf(docFreq=" + reader.docFreq(term) +
", numDocs=" + reader.numDocs() + ")");
// explain query weight
Explanation queryExpl = new Explanation();
queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
Explanation boostExpl = new Explanation(getBoost(), "boost");
if (getBoost() != 1.0f)
queryExpl.addDetail(boostExpl);
queryExpl.addDetail(idfExpl);
Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
queryExpl.addDetail(queryNormExpl);
queryExpl.setValue(boostExpl.getValue() *
idfExpl.getValue() *
queryNormExpl.getValue());
result.addDetail(queryExpl);
// explain field weight
String field = term.field();
ComplexExplanation fieldExpl = new ComplexExplanation();
fieldExpl.setDescription("fieldWeight("+term+" in "+doc+
"), product of:");
Explanation tfExpl = scorer(reader).explain(doc);
fieldExpl.addDetail(tfExpl);
fieldExpl.addDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.norms(field);
float fieldNorm =
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);
fieldExpl.setMatch(Boolean.valueOf(tfExpl.isMatch()));
fieldExpl.setValue(tfExpl.getValue() *
idfExpl.getValue() *
fieldNormExpl.getValue());
result.addDetail(fieldExpl);
result.setMatch(fieldExpl.getMatch());
// combine them
result.setValue(queryExpl.getValue() * fieldExpl.getValue());
if (queryExpl.getValue() == 1.0f)
return fieldExpl;
return result;
}
public void testDuel() throws IOException {
final int iters = atLeast(2);
final String field = "f";
for (int iter = 0; iter < iters; ++iter) {
final List<BytesRef> allTerms = new ArrayList<>();
final int numTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
for (int i = 0; i < numTerms; ++i) {
final String value = TestUtil.randomAnalysisString(random(), 10, true);
allTerms.add(new BytesRef(value));
}
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
final int numDocs = atLeast(100);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final BytesRef term = allTerms.get(random().nextInt(allTerms.size()));
doc.add(new StringField(field, term, Store.NO));
iw.addDocument(doc);
}
if (numTerms > 1 && random().nextBoolean()) {
iw.deleteDocuments(new TermQuery(new Term(field, allTerms.get(0))));
}
iw.commit();
final IndexReader reader = iw.getReader();
final IndexSearcher searcher = newSearcher(reader);
iw.close();
if (reader.numDocs() == 0) {
// may occasionally happen if all documents got the same term
IOUtils.close(reader, dir);
continue;
}
for (int i = 0; i < 100; ++i) {
final float boost = random().nextFloat() * 10;
final int numQueryTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
List<BytesRef> queryTerms = new ArrayList<>();
for (int j = 0; j < numQueryTerms; ++j) {
queryTerms.add(allTerms.get(random().nextInt(allTerms.size())));
}
final BooleanQuery.Builder bq = new BooleanQuery.Builder();
for (BytesRef t : queryTerms) {
bq.add(new TermQuery(new Term(field, t)), Occur.SHOULD);
}
final Query q1 = new ConstantScoreQuery(bq.build());
final Query q2 = new TermInSetQuery(field, queryTerms);
assertSameMatches(searcher, new BoostQuery(q1, boost), new BoostQuery(q2, boost), true);
}
reader.close();
dir.close();
}
}
@Test
public void testRangeQueryRand() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = signedIndexReader;
IndexSearcher search = newSearcher(reader);
String minRP = pad(signedIndexDir.minR);
String maxRP = pad(signedIndexDir.maxR);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1 + maxId - minId);
ScoreDoc[] result;
// test extremes, bounded on both ends
result = search.search(csrq("rand", minRP, maxRP, T, T), numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("rand", minRP, maxRP, T, F), numDocs).scoreDocs;
assertEquals("all but biggest", numDocs - 1, result.length);
result = search.search(csrq("rand", minRP, maxRP, F, T), numDocs).scoreDocs;
assertEquals("all but smallest", numDocs - 1, result.length);
result = search.search(csrq("rand", minRP, maxRP, F, F), numDocs).scoreDocs;
assertEquals("all but extremes", numDocs - 2, result.length);
// unbounded
result = search.search(csrq("rand", minRP, null, T, F), numDocs).scoreDocs;
assertEquals("smallest and up", numDocs, result.length);
result = search.search(csrq("rand", null, maxRP, F, T), numDocs).scoreDocs;
assertEquals("biggest and down", numDocs, result.length);
result = search.search(csrq("rand", minRP, null, F, F), numDocs).scoreDocs;
assertEquals("not smallest, but up", numDocs - 1, result.length);
result = search.search(csrq("rand", null, maxRP, F, F), numDocs).scoreDocs;
assertEquals("not biggest, but down", numDocs - 1, result.length);
// very small sets
result = search.search(csrq("rand", minRP, minRP, F, F), numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(csrq("rand", maxRP, maxRP, F, F), numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(csrq("rand", minRP, minRP, T, T), numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(csrq("rand", null, minRP, F, T), numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(csrq("rand", maxRP, maxRP, T, T), numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(csrq("rand", maxRP, null, T, F), numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
}
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
* given selection of fields from terms with a document frequency percentage
* greater than the given maxPercentDocs
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
* @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
* contain a term, after which the word is considered to be a stop word
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
Collection<String> fields,
float maxPercentDocs) throws IOException {
this(delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs));
}
/**
* @return the number of documents indexed
*/
public int numDocs() {
IndexReader reader = getReader();
return reader.numDocs();
}