下面列出了怎么用org.apache.lucene.search.similarities.TFIDFSimilarity的API类实例代码及写法,或者点击链接到github查看源代码。
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException {
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field);
if (similarity == null) {
throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
}
// Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
// is 1 when docCount == docFreq == 1
final SimScorer simScorer = similarity.scorer(1f,
new CollectionStatistics(field, 1, 1, 1, 1),
new TermStatistics(new BytesRef("bogus"), 1, 1));
final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, readerContext.reader(), field, true);
return new FloatDocValues(this) {
int lastDocID = -1;
@Override
public float floatVal(int docID) throws IOException {
if (docID < lastDocID) {
throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
}
lastDocID = docID;
return leafSimScorer.score(docID, 1f);
}
};
}
public void testHyperbolicSweetSpot() {
SweetSpotSimilarity ss = new SweetSpotSimilarity() {
@Override
public float tf(float freq) {
return hyperbolicTf(freq);
}
};
ss.setHyperbolicTfFactors(3.3f, 7.7f, Math.E, 5.0f);
TFIDFSimilarity s = ss;
for (int i = 1; i <=1000; i++) {
assertTrue("MIN tf: i="+i+" : s="+s.tf(i),
3.3f <= s.tf(i));
assertTrue("MAX tf: i="+i+" : s="+s.tf(i),
s.tf(i) <= 7.7f);
}
assertEquals("MID tf", 3.3f+(7.7f - 3.3f)/2.0f, s.tf(5), 0.00001f);
// stupidity
assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
}
public void testSpans2() throws Exception {
assumeTrue("Broken scoring: LUCENE-3723",
searcher.getSimilarity() instanceof TFIDFSimilarity);
SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female"));
SpanQuery qA2 = new SpanTermQuery(new Term("first", "james"));
SpanQuery qA = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender"));
SpanQuery qB = new SpanTermQuery(new Term("last", "jones"));
SpanQuery q = new SpanNearQuery(new SpanQuery[]
{ new FieldMaskingSpanQuery(qA, "id"),
new FieldMaskingSpanQuery(qB, "id") }, -1, false );
check(q, new int[] { 0, 1, 2, 3 });
Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
assertNext(span, 0,0,1);
assertNext(span, 1,1,2);
assertNext(span, 2,0,1);
assertNext(span, 2,2,3);
assertNext(span, 3,0,1);
assertFinished(span);
}
private void assertScoresMatch(List<PrebuiltFeature> features, float[] scores,
RankerQuery ltrQuery, ScoreDoc scoreDoc) throws IOException {
Document d = searcherUnderTest.doc(scoreDoc.doc);
String idVal = d.get("id");
int docId = Integer.decode(idVal);
float modelScore = scores[docId];
float queryScore = scoreDoc.score;
assertEquals("Scores match with similarity " + similarity.getClass(), modelScore,
queryScore, SCORE_NB_ULP_PREC *Math.ulp(modelScore));
if (!(similarity instanceof TFIDFSimilarity)) {
// There are precision issues with these similarities when using explain
// It produces 0.56103003 for feat:0 in doc1 using score() but 0.5610301 using explain
Explanation expl = searcherUnderTest.explain(ltrQuery, docId);
assertEquals("Explain scores match with similarity " + similarity.getClass(), expl.getValue().floatValue(),
queryScore, 5 * Math.ulp(modelScore));
checkFeatureNames(expl, features);
}
}
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException {
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
TFIDFSimilarity sim = asTFIDF(searcher.getSimilarity(), field);
if (sim == null) {
throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
}
int docfreq = searcher.getIndexReader().docFreq(new Term(indexedField, indexedBytes));
float idf = sim.idf(docfreq, searcher.getIndexReader().maxDoc());
return new DocFreqValueSource.ConstDoubleDocValues(idf, this);
}
static TFIDFSimilarity asTFIDF(Similarity sim, String field) {
while (sim instanceof PerFieldSimilarityWrapper) {
sim = ((PerFieldSimilarityWrapper)sim).get(field);
}
if (sim instanceof TFIDFSimilarity) {
return (TFIDFSimilarity)sim;
} else {
return null;
}
}
public void testSimple2() throws Exception {
assumeTrue("Broken scoring: LUCENE-3723",
searcher.getSimilarity() instanceof TFIDFSimilarity);
SpanQuery q1 = new SpanTermQuery(new Term("gender", "female"));
SpanQuery q2 = new SpanTermQuery(new Term("last", "smith"));
SpanQuery q = new SpanNearQuery(new SpanQuery[]
{ q1, new FieldMaskingSpanQuery(q2, "gender")}, -1, false );
check(q, new int[] { 2, 4 });
q = new SpanNearQuery(new SpanQuery[]
{ new FieldMaskingSpanQuery(q1, "id"),
new FieldMaskingSpanQuery(q2, "id") }, -1, false );
check(q, new int[] { 2, 4 });
}
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
this.ir = ir;
this.similarity = sim;
}
public TFIDFSimilarity getSimilarity() {
return similarity;
}
public void setSimilarity(TFIDFSimilarity similarity) {
this.similarity = similarity;
}
public MoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
this.ir = ir;
this.similarity = sim;
}
public TFIDFSimilarity getSimilarity() {
return similarity;
}
public void setSimilarity(TFIDFSimilarity similarity) {
this.similarity = similarity;
}
public void testSweetSpotTf() {
SweetSpotSimilarity ss = new SweetSpotSimilarity();
TFIDFSimilarity d = new ClassicSimilarity();
TFIDFSimilarity s = ss;
// tf equal
ss.setBaselineTfFactors(0.0f, 0.0f);
for (int i = 1; i < 1000; i++) {
assertEquals("tf: i="+i,
d.tf(i), s.tf(i), 0.0f);
}
// tf higher
ss.setBaselineTfFactors(1.0f, 0.0f);
for (int i = 1; i < 1000; i++) {
assertTrue("tf: i="+i+" : d="+d.tf(i)+
" < s="+s.tf(i),
d.tf(i) < s.tf(i));
}
// tf flat
ss.setBaselineTfFactors(1.0f, 6.0f);
for (int i = 1; i <=6; i++) {
assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
}
ss.setBaselineTfFactors(2.0f, 6.0f);
for (int i = 1; i <=6; i++) {
assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
}
for (int i = 6; i <=1000; i++) {
assertTrue("tf: i="+i+" : s="+s.tf(i)+
" < d="+d.tf(i),
s.tf(i) < d.tf(i));
}
// stupidity
assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
}
public void setSimilarity(Similarity similarity) {
if (similarity == null || similarity instanceof TFIDFSimilarity) {
//LUCENE 4 UPGRADE we need TFIDF similarity here so I only set it if it is an instance of it
this.similarity = (TFIDFSimilarity) similarity;
}
}