类org.apache.lucene.search.TermStatistics源码实例Demo

下面列出了怎么用org.apache.lucene.search.TermStatistics的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: Elasticsearch   文件: AggregatedDfs.java
@Override
public void writeTo(final StreamOutput out) throws IOException {
    out.writeVInt(termStatistics.size());
    
    for (ObjectObjectCursor<Term, TermStatistics> c : termStatistics()) {
        Term term = (Term) c.key;
        out.writeString(term.field());
        out.writeBytesRef(term.bytes());
        TermStatistics stats = (TermStatistics) c.value;
        out.writeBytesRef(stats.term());
        out.writeVLong(stats.docFreq());
        out.writeVLong(DfsSearchResult.addOne(stats.totalTermFreq()));
    }

    DfsSearchResult.writeFieldStats(out, fieldStatistics);
    out.writeVLong(maxDoc);
}
 
源代码2 项目: Elasticsearch   文件: DfsSearchResult.java
public static TermStatistics[] readTermStats(StreamInput in, Term[] terms) throws IOException {
    int termsStatsSize = in.readVInt();
    final TermStatistics[] termStatistics;
    if (termsStatsSize == 0) {
        termStatistics = EMPTY_TERM_STATS;
    } else {
        termStatistics = new TermStatistics[termsStatsSize];
        assert terms.length == termsStatsSize;
        for (int i = 0; i < termStatistics.length; i++) {
            BytesRef term = terms[i].bytes();
            final long docFreq = in.readVLong();
            assert docFreq >= 0;
            final long totalTermFreq = subOne(in.readVLong());
            termStatistics[i] = new TermStatistics(term, docFreq, totalTermFreq);
        }
    }
    return termStatistics;
}
 
源代码3 项目: lucene-solr   文件: NormValueSource.java
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException {
  IndexSearcher searcher = (IndexSearcher)context.get("searcher");
  final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field);
  if (similarity == null) {
    throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
  }
  // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
  // is 1 when docCount == docFreq == 1
  final SimScorer simScorer = similarity.scorer(1f,
      new CollectionStatistics(field, 1, 1, 1, 1),
      new TermStatistics(new BytesRef("bogus"), 1, 1));
  final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, readerContext.reader(), field, true);
  
  return new FloatDocValues(this) {
    int lastDocID = -1;
    @Override
    public float floatVal(int docID) throws IOException {
      if (docID < lastDocID) {
        throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
      }
      lastDocID = docID;
      return leafSimScorer.score(docID, 1f);
    }
  };
}
 
源代码4 项目: lucene-solr   文件: SpanWeight.java
private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost) throws IOException {
  if (termStates == null || termStates.size() == 0 || query.getField() == null)
    return null;
  TermStatistics[] termStats = new TermStatistics[termStates.size()];
  int termUpTo = 0;
  for (Map.Entry<Term, TermStates> entry : termStates.entrySet()) {
    TermStates ts = entry.getValue();
    if (ts.docFreq() > 0) {
      termStats[termUpTo++] = searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq());
    }
  }
  CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
  if (termUpTo > 0) {
    return similarity.scorer(boost, collectionStats, ArrayUtil.copyOfSubArray(termStats, 0, termUpTo));
  } else {
    return null; // no terms at all exist, we won't use similarity
  }
}
 
源代码5 项目: lucene4ir   文件: SMARTBNNBNNSimilarity.java
@Override
   public final SimWeight computeWeight(CollectionStatistics collectionStats,
				 TermStatistics... termStats)
   {
float N, n, idf, adl;
idf = 1.0f;
N   = collectionStats.maxDoc();
adl = collectionStats.sumTotalTermFreq() / N;

if (termStats.length == 1) {
    n = termStats[0].docFreq();
    idf = log(N/n);
}
else {
    for (final TermStatistics stat : termStats) {
	n = stat.docFreq();
	idf += log(N/n);
    }
}

return new TFIDFWeight(collectionStats.field(), idf, adl);
   }
 
源代码6 项目: Elasticsearch   文件: AggregatedDfs.java
@Override
public void readFrom(StreamInput in) throws IOException {
    int size = in.readVInt();
    termStatistics = HppcMaps.newMap(size);
    for (int i = 0; i < size; i++) {
        Term term = new Term(in.readString(), in.readBytesRef());
        TermStatistics stats = new TermStatistics(in.readBytesRef(), 
                in.readVLong(), 
                DfsSearchResult.subOne(in.readVLong()));
        termStatistics.put(term, stats);
    }
    fieldStatistics = DfsSearchResult.readFieldStats(in);
    maxDoc = in.readVLong();
}
 
源代码7 项目: Elasticsearch   文件: TermVectorsWriter.java
private void writeTermStatistics(TermStatistics termStatistics) throws IOException {
    int docFreq = (int) termStatistics.docFreq();
    assert (docFreq >= -1);
    writePotentiallyNegativeVInt(docFreq);
    long ttf = termStatistics.totalTermFreq();
    assert (ttf >= -1);
    writePotentiallyNegativeVLong(ttf);
}
 
源代码8 项目: linden   文件: LindenSimilarity.java
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
  final long df = termStats.docFreq();
  final long max = collectionStats.maxDoc();
  final float idf = idfManager.getIDF(termStats.term().utf8ToString());
  return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
}
 
源代码9 项目: lucene-solr   文件: TestMemoryIndex.java
@Test
public void testSimilarities() throws IOException {

  MemoryIndex mi = new MemoryIndex();
  mi.addField("f1", "a long text field that contains many many terms", analyzer);

  IndexSearcher searcher = mi.createSearcher();
  LeafReader reader = (LeafReader) searcher.getIndexReader();
  NumericDocValues norms = reader.getNormValues("f1");
  assertEquals(0, norms.nextDoc());
  float n1 = norms.longValue();

  // Norms are re-computed when we change the Similarity
  mi.setSimilarity(new Similarity() {

    @Override
    public long computeNorm(FieldInvertState state) {
      return 74;
    }

    @Override
    public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
      throw new UnsupportedOperationException();
    }

  });
  norms = reader.getNormValues("f1");
  assertEquals(0, norms.nextDoc());
  float n2 = norms.longValue();

  assertTrue(n1 != n2);
  TestUtil.checkReader(reader);
}
 
源代码10 项目: lucene-solr   文件: AssertingSimilarity.java
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  assert boost >= 0;
  assert collectionStats != null;
  assert termStats.length > 0;
  for (TermStatistics term : termStats) {
    assert term != null;
  }
  // TODO: check that TermStats is in bounds with respect to collection? e.g. docFreq <= maxDoc
  SimScorer scorer = delegate.scorer(boost, collectionStats, termStats);
  assert scorer != null;
  return new AssertingSimScorer(scorer, boost);
}
 
源代码11 项目: lucene-solr   文件: SimilarityBase.java
@Override
public final SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  SimScorer weights[] = new SimScorer[termStats.length];
  for (int i = 0; i < termStats.length; i++) {
    BasicStats stats = newStats(collectionStats.field(), boost);
    fillBasicStats(stats, collectionStats, termStats[i]);
    weights[i] = new BasicSimScorer(stats);
  }
  if (weights.length == 1) {
    return weights[0];
  } else {
    return new MultiSimilarity.MultiSimScorer(weights);
  }
}
 
源代码12 项目: lucene-solr   文件: SimilarityBase.java
/** Fills all member fields defined in {@code BasicStats} in {@code stats}. 
 *  Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
  // TODO: validate this for real, somewhere else
  assert termStats.totalTermFreq() <= collectionStats.sumTotalTermFreq();
  assert termStats.docFreq() <= collectionStats.sumDocFreq();
 
  // TODO: add sumDocFreq for field (numberOfFieldPostings)
  stats.setNumberOfDocuments(collectionStats.docCount());
  stats.setNumberOfFieldTokens(collectionStats.sumTotalTermFreq());
  stats.setAvgFieldLength(collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount());
  stats.setDocFreq(termStats.docFreq());
  stats.setTotalTermFreq(termStats.totalTermFreq());
}
 
源代码13 项目: lucene-solr   文件: BM25Similarity.java
@Override
public final SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
  float avgdl = avgFieldLength(collectionStats);

  float[] cache = new float[256];
  for (int i = 0; i < cache.length; i++) {
    cache[i] = 1f / (k1 * ((1 - b) + b * LENGTH_TABLE[i] / avgdl));
  }
  return new BM25Scorer(boost, k1, b, idf, avgdl, cache);
}
 
源代码14 项目: lucene-solr   文件: MultiSimilarity.java
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  SimScorer subScorers[] = new SimScorer[sims.length];
  for (int i = 0; i < subScorers.length; i++) {
    subScorers[i] = sims[i].scorer(boost, collectionStats, termStats);
  }
  return new MultiSimScorer(subScorers);
}
 
源代码15 项目: lucene-solr   文件: LMSimilarity.java
/**
 * Computes the collection probability of the current term in addition to the
 * usual statistics.
 */
@Override
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
  super.fillBasicStats(stats, collectionStats, termStats);
  LMStats lmStats = (LMStats) stats;
  lmStats.setCollectionProbability(collectionModel.computeProbability(stats));
}
 
源代码16 项目: lucene-solr   文件: ClassicSimilarity.java
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
  final long df = termStats.docFreq();
  final long docCount = collectionStats.docCount();
  final float idf = idf(df, docCount);
  return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:",
      Explanation.match(df, "docFreq, number of documents containing term"),
      Explanation.match(docCount, "docCount, total number of documents with field"));
}
 
源代码17 项目: lucene-solr   文件: TestMaxTermFrequency.java
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  return new SimScorer() {

    @Override
    public float score(float freq, long norm) {
      return 0;
    }

  };
}
 
源代码18 项目: lucene-solr   文件: LRUStatsCache.java
@Override
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq)
    throws IOException {
  TermStats termStats = currentGlobalTermStats.get(term.toString());
  if (termStats == null) {
    log.debug("## Missing global termStats info: {}, using local", term);
    missingTermStats.add(term);
    metrics.missingGlobalTermStats.increment();
    return localSearcher != null ? localSearcher.localTermStatistics(term, docFreq, totalTermFreq) : null;
  } else {
    return termStats.toTermStatistics();
  }
}
 
源代码19 项目: lucene-solr   文件: ExactStatsCache.java
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq)
    throws IOException {
  TermStats termStats = termStatsCache.get(term.toString());
  // TermStats == null is also true if term has no docFreq anyway,
  // see returnLocalStats, if docFreq == 0, they are not added anyway
  // Not sure we need a warning here
  if (termStats == null) {
    log.debug("Missing global termStats info for term={}, using local stats", term);
    metrics.missingGlobalTermStats.increment();
    return localSearcher != null ? localSearcher.localTermStatistics(term, docFreq, totalTermFreq) : null;
  } else {
    return termStats.toTermStatistics();
  }
}
 
源代码20 项目: lucene-solr   文件: StatsCache.java
@Override
public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
  if (statsSource.termStatistics(null, term, docFreq, totalTermFreq) == null) {
    missingTermStats.accept(term);
    missingTermsCount++;
  }
  return super.termStatistics(term, docFreq, totalTermFreq);
}
 
源代码21 项目: lucene4ir   文件: BM25Similarity.java
@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
  Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);

  float avgdl = avgFieldLength(collectionStats);

  // compute freq-independent part of bm25 equation across all norm values
  float cache[] = new float[256];
  for (int i = 0; i < cache.length; i++) {
    cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
  }
  return new BM25Stats(collectionStats.field(), idf, avgdl, cache);
}
 
源代码22 项目: lucene4ir   文件: OKAPIBM25Similarity.java
@Override
   public final SimWeight computeWeight(CollectionStatistics collectionStats,
				 TermStatistics... termStats)
   {
long  N, n;
float idf_, avdl;

idf_ = 1.0f;

N    = collectionStats.docCount();
if (N == -1)
    N = collectionStats.maxDoc();

avdl = collectionStats.sumTotalTermFreq() / N;

if (termStats.length == 1) {
    n    = termStats[0].docFreq();
    idf_ = idf(n, N);
}
else { /* computation for a phrase */
    for (final TermStatistics stat : termStats) {
	n     = stat.docFreq();
	idf_ += idf(n, N);
    }
}

return new TFIDFWeight(collectionStats.field(), idf_, avdl);
   }
 
源代码23 项目: Elasticsearch   文件: AggregatedDfs.java
public AggregatedDfs(ObjectObjectHashMap<Term, TermStatistics> termStatistics, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics, long maxDoc) {
    this.termStatistics = termStatistics;
    this.fieldStatistics = fieldStatistics;
    this.maxDoc = maxDoc;
}
 
源代码24 项目: Elasticsearch   文件: AggregatedDfs.java
public ObjectObjectHashMap<Term, TermStatistics> termStatistics() {
    return termStatistics;
}
 
源代码25 项目: Elasticsearch   文件: DfsSearchResult.java
public DfsSearchResult termsStatistics(Term[] terms, TermStatistics[] termStatistics) {
    this.terms = terms;
    this.termStatistics = termStatistics;
    return this;
}
 
源代码26 项目: Elasticsearch   文件: DfsSearchResult.java
public TermStatistics[] termStatistics() {
    return termStatistics;
}
 
源代码27 项目: Elasticsearch   文件: DfsSearchResult.java
public static void writeTermStats(StreamOutput out, TermStatistics[] termStatistics) throws IOException {
    out.writeVInt(termStatistics.length);
    for (TermStatistics termStatistic : termStatistics) {
        writeSingleTermStats(out, termStatistic);
    }
}
 
源代码28 项目: Elasticsearch   文件: DfsSearchResult.java
public  static void writeSingleTermStats(StreamOutput out, TermStatistics termStatistic) throws IOException {
    assert termStatistic.docFreq() >= 0;
    out.writeVLong(termStatistic.docFreq());
    out.writeVLong(addOne(termStatistic.totalTermFreq()));        
}
 
源代码29 项目: Elasticsearch   文件: TermVectorsFilter.java
private TermStatistics getTermStatistics(TermsEnum termsEnum, Term term) throws IOException {
    if (dfs != null) {
        return dfs.termStatistics().get(term);
    }
    return new TermStatistics(termsEnum.term(), termsEnum.docFreq(), termsEnum.totalTermFreq());
}
 
源代码30 项目: lucene-solr   文件: TestPayloadScoreQuery.java
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) {
  return Explanation.match(1.0f, "Inexplicable");
}
 
 类所在包
 类方法
 同包方法