类org.apache.lucene.search.similarities.TFIDFSimilarity源码实例Demo

下面列出了怎么用org.apache.lucene.search.similarities.TFIDFSimilarity的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: lucene-solr   文件: NormValueSource.java
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException {
  IndexSearcher searcher = (IndexSearcher)context.get("searcher");
  final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field);
  if (similarity == null) {
    throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
  }
  // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
  // is 1 when docCount == docFreq == 1
  final SimScorer simScorer = similarity.scorer(1f,
      new CollectionStatistics(field, 1, 1, 1, 1),
      new TermStatistics(new BytesRef("bogus"), 1, 1));
  final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, readerContext.reader(), field, true);
  
  return new FloatDocValues(this) {
    int lastDocID = -1;
    @Override
    public float floatVal(int docID) throws IOException {
      if (docID < lastDocID) {
        throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
      }
      lastDocID = docID;
      return leafSimScorer.score(docID, 1f);
    }
  };
}
 
源代码2 项目: lucene-solr   文件: SweetSpotSimilarityTest.java
public void testHyperbolicSweetSpot() {

  SweetSpotSimilarity ss = new SweetSpotSimilarity() {
      @Override
      public float tf(float freq) {
        return hyperbolicTf(freq);
      }
    };
  ss.setHyperbolicTfFactors(3.3f, 7.7f, Math.E, 5.0f);
  
  TFIDFSimilarity s = ss;

  for (int i = 1; i <=1000; i++) {
    assertTrue("MIN tf: i="+i+" : s="+s.tf(i),
               3.3f <= s.tf(i));
    assertTrue("MAX tf: i="+i+" : s="+s.tf(i),
               s.tf(i) <= 7.7f);
  }
  assertEquals("MID tf", 3.3f+(7.7f - 3.3f)/2.0f, s.tf(5), 0.00001f);
  
  // stupidity
  assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
  
}
 
源代码3 项目: lucene-solr   文件: TestFieldMaskingSpanQuery.java
public void testSpans2() throws Exception {
  assumeTrue("Broken scoring: LUCENE-3723",
      searcher.getSimilarity() instanceof TFIDFSimilarity);
  SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female"));
  SpanQuery qA2 = new SpanTermQuery(new Term("first",  "james"));
  SpanQuery qA  = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender"));
  SpanQuery qB  = new SpanTermQuery(new Term("last",   "jones"));
  SpanQuery q   = new SpanNearQuery(new SpanQuery[]
    { new FieldMaskingSpanQuery(qA, "id"),
      new FieldMaskingSpanQuery(qB, "id") }, -1, false );
  check(q, new int[] { 0, 1, 2, 3 });

  Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
  assertNext(span, 0,0,1);
  assertNext(span, 1,1,2);
  assertNext(span, 2,0,1);
  assertNext(span, 2,2,3);
  assertNext(span, 3,0,1);
  assertFinished(span);
}
 
private void assertScoresMatch(List<PrebuiltFeature> features, float[] scores,
                               RankerQuery ltrQuery, ScoreDoc scoreDoc) throws IOException {
    Document d = searcherUnderTest.doc(scoreDoc.doc);
    String idVal = d.get("id");
    int docId = Integer.decode(idVal);
    float modelScore = scores[docId];
    float queryScore = scoreDoc.score;

    assertEquals("Scores match with similarity " + similarity.getClass(), modelScore,
            queryScore, SCORE_NB_ULP_PREC *Math.ulp(modelScore));

    if (!(similarity instanceof TFIDFSimilarity)) {
        // There are precision issues with these similarities when using explain
        // It produces 0.56103003 for feat:0 in doc1 using score() but 0.5610301 using explain
        Explanation expl = searcherUnderTest.explain(ltrQuery, docId);

        assertEquals("Explain scores match with similarity " + similarity.getClass(), expl.getValue().floatValue(),
                queryScore, 5 * Math.ulp(modelScore));
        checkFeatureNames(expl, features);
    }
}
 
源代码5 项目: lucene-solr   文件: IDFValueSource.java
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException {
  IndexSearcher searcher = (IndexSearcher)context.get("searcher");
  TFIDFSimilarity sim = asTFIDF(searcher.getSimilarity(), field);
  if (sim == null) {
    throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
  }
  int docfreq = searcher.getIndexReader().docFreq(new Term(indexedField, indexedBytes));
  float idf = sim.idf(docfreq, searcher.getIndexReader().maxDoc());
  return new DocFreqValueSource.ConstDoubleDocValues(idf, this);
}
 
源代码6 项目: lucene-solr   文件: IDFValueSource.java
static TFIDFSimilarity asTFIDF(Similarity sim, String field) {
  while (sim instanceof PerFieldSimilarityWrapper) {
    sim = ((PerFieldSimilarityWrapper)sim).get(field);
  }
  if (sim instanceof TFIDFSimilarity) {
    return (TFIDFSimilarity)sim;
  } else {
    return null;
  }
}
 
源代码7 项目: lucene-solr   文件: TestFieldMaskingSpanQuery.java
public void testSimple2() throws Exception {
  assumeTrue("Broken scoring: LUCENE-3723", 
      searcher.getSimilarity() instanceof TFIDFSimilarity);
  SpanQuery q1 = new SpanTermQuery(new Term("gender", "female"));
  SpanQuery q2 = new SpanTermQuery(new Term("last", "smith"));
  SpanQuery q = new SpanNearQuery(new SpanQuery[]
    { q1, new FieldMaskingSpanQuery(q2, "gender")}, -1, false );
  check(q, new int[] { 2, 4 });
  q = new SpanNearQuery(new SpanQuery[]
    { new FieldMaskingSpanQuery(q1, "id"),
      new FieldMaskingSpanQuery(q2, "id") }, -1, false );
  check(q, new int[] { 2, 4 });
}
 
源代码8 项目: Elasticsearch   文件: XMoreLikeThis.java
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
    this.ir = ir;
    this.similarity = sim;
}
 
源代码9 项目: Elasticsearch   文件: XMoreLikeThis.java
public TFIDFSimilarity getSimilarity() {
    return similarity;
}
 
源代码10 项目: Elasticsearch   文件: XMoreLikeThis.java
public void setSimilarity(TFIDFSimilarity similarity) {
    this.similarity = similarity;
}
 
源代码11 项目: lucene-solr   文件: MoreLikeThis.java
public MoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
  this.ir = ir;
  this.similarity = sim;
}
 
源代码12 项目: lucene-solr   文件: MoreLikeThis.java
public TFIDFSimilarity getSimilarity() {
  return similarity;
}
 
源代码13 项目: lucene-solr   文件: MoreLikeThis.java
public void setSimilarity(TFIDFSimilarity similarity) {
  this.similarity = similarity;
}
 
源代码14 项目: lucene-solr   文件: SweetSpotSimilarityTest.java
public void testSweetSpotTf() {

  SweetSpotSimilarity ss = new SweetSpotSimilarity();

  TFIDFSimilarity d = new ClassicSimilarity();
  TFIDFSimilarity s = ss;
  
  // tf equal

  ss.setBaselineTfFactors(0.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertEquals("tf: i="+i,
                 d.tf(i), s.tf(i), 0.0f);
  }

  // tf higher

  ss.setBaselineTfFactors(1.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertTrue("tf: i="+i+" : d="+d.tf(i)+
               " < s="+s.tf(i),
               d.tf(i) < s.tf(i));
  }

  // tf flat

  ss.setBaselineTfFactors(1.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
  }
  ss.setBaselineTfFactors(2.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
  }
  for (int i = 6; i <=1000; i++) {
    assertTrue("tf: i="+i+" : s="+s.tf(i)+
               " < d="+d.tf(i),
               s.tf(i) < d.tf(i));
  }

  // stupidity
  assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
}
 
源代码15 项目: Elasticsearch   文件: MoreLikeThisQuery.java
public void setSimilarity(Similarity similarity) {
    if (similarity == null || similarity instanceof TFIDFSimilarity) {
        //LUCENE 4 UPGRADE we need TFIDF similarity here so I only set it if it is an instance of it
        this.similarity = (TFIDFSimilarity) similarity;
    }
}
 
 类所在包
 同包方法