下面列出了怎么用org.apache.lucene.search.similarities.ClassicSimilarity的API类实例代码及写法,或者点击链接到github查看源代码。
public TopDocs executeQuery(org.apache.lucene.search.Query query) throws IOException, ParseException {
Directory indexDir = FSDirectory.open(Paths.get(INDEX_DIRECTORY));
try {
IndexReader reader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(reader);
if (isBm25 == false) {
ClassicSimilarity CS = new ClassicSimilarity();
searcher.setSimilarity(CS);
}
TopDocs docs = searcher.search(query, hitsPerPage);
return docs;
} catch (Exception e) {
logger.error(e.getMessage());
return null;
}
}
public void testNorm() throws Exception {
Similarity saved = searcher.getSimilarity();
try {
// no norm field (so agnostic to indexed similarity)
searcher.setSimilarity(new ClassicSimilarity());
ValueSource vs = new NormValueSource("byte");
assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });
// regardless of whether norms exist, value source exists == 0
assertAllExist(vs);
vs = new NormValueSource("text");
assertAllExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
public void testTF() throws Exception {
Similarity saved = searcher.getSimilarity();
try {
// no norm field (so agnostic to indexed similarity)
searcher.setSimilarity(new ClassicSimilarity());
ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test"));
assertHits(new FunctionQuery(vs),
new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) });
assertAllExist(vs);
vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"));
assertHits(new FunctionQuery(vs), new float[] { 0f, 1f });
assertAllExist(vs);
// regardless of whether norms exist, value source exists == 0
vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus"));
assertHits(new FunctionQuery(vs), new float[] { 0F, 0F });
assertAllExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
public void testReallyNoNormsForDrillDown() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(new PerFieldSimilarityWrapper() {
final Similarity sim = new ClassicSimilarity();
@Override
public Similarity get(String name) {
assertEquals("field", name);
return sim;
}
});
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.add(newTextField("field", "text", Field.Store.NO));
doc.add(new FacetField("a", "path"));
writer.addDocument(config.build(taxoWriter, doc));
writer.close();
IOUtils.close(taxoWriter, dir, taxoDir);
}
public void testPayloadSpanUtil() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(new ClassicSimilarity()));
Document doc = new Document();
doc.add(newTextField(FIELD, "xx rr yy mm pp", Field.Store.YES));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(FIELD, "rr")));
if(VERBOSE) {
System.out.println("Num payloads:" + payloads.size());
for (final byte [] bytes : payloads) {
System.out.println(new String(bytes, StandardCharsets.UTF_8));
}
}
reader.close();
directory.close();
}
public void testNonStandardSimilarity() throws Exception {
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", MonitorTestBase.parse("test")));
Similarity similarity = new ClassicSimilarity() {
@Override
public float tf(float freq) {
return 1000f;
}
};
Document doc = new Document();
doc.add(newTextField("field", "this is a test", Field.Store.NO));
MatchingQueries<ScoringMatch> standard = monitor.match(doc, ScoringMatch.matchWithSimilarity(new ClassicSimilarity()));
MatchingQueries<ScoringMatch> withSim = monitor.match(doc, ScoringMatch.matchWithSimilarity(similarity));
float standScore = standard.getMatches().iterator().next().getScore();
float simScore = withSim.getMatches().iterator().next().getScore();
assertEquals(standScore, simScore / 1000, 0.1f);
}
}
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new MockAnalyzer(random());
dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(analyzer);
config.setMergePolicy(newLogMergePolicy()); // we will use docids to validate
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
writer.addDocument(doc("lucene", "lucene is a very popular search engine library"));
writer.addDocument(doc("solr", "solr is a very popular search server and is using lucene"));
writer.addDocument(doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop"));
reader = writer.getReader();
writer.close();
// we do not use newSearcher because the assertingXXX layers break
// the toString representations we are relying on
// TODO: clean that up
searcher = new IndexSearcher(reader);
searcher.setSimilarity(new ClassicSimilarity());
scorerSearcher = new ScorerIndexSearcher(reader);
scorerSearcher.setSimilarity(new CountingSimilarity());
}
@Override
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy())
.setSimilarity(new ClassicSimilarity()));
for (int i = 0; i < values.length; i++) {
Document doc = new Document();
doc.add(newTextField(FIELD, values[i], Field.Store.YES));
writer.addDocument(doc);
}
writer.forceMerge(1);
indexReader = getOnlyLeafReader(writer.getReader());
writer.close();
indexSearcher = newSearcher(indexReader, false);
indexSearcher.setSimilarity(new ClassicSimilarity());
}
@Test
public void testFreezeAPI() {
MemoryIndex mi = new MemoryIndex();
mi.addField("f1", "some text", analyzer);
assertThat(mi.search(new MatchAllDocsQuery()), not(is(0.0f)));
assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));
// check we can add a new field after searching
mi.addField("f2", "some more text", analyzer);
assertThat(mi.search(new TermQuery(new Term("f2", "some"))), not(is(0.0f)));
// freeze!
mi.freeze();
RuntimeException expected = expectThrows(RuntimeException.class, () -> {
mi.addField("f3", "and yet more", analyzer);
});
assertThat(expected.getMessage(), containsString("frozen"));
expected = expectThrows(RuntimeException.class, () -> {
mi.setSimilarity(new BM25Similarity(1, 1));
});
assertThat(expected.getMessage(), containsString("frozen"));
assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));
mi.reset();
mi.addField("f1", "wibble", analyzer);
assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));
// check we can set the Similarity again
mi.setSimilarity(new ClassicSimilarity());
}
private Similarity createSimilarity(SimilarityConfig config) {
Similarity similarity;
if (config.isUseClassicSimilarity()) {
ClassicSimilarity tfidf = new ClassicSimilarity();
tfidf.setDiscountOverlaps(config.isDiscountOverlaps());
similarity = tfidf;
} else {
BM25Similarity bm25 = new BM25Similarity(config.getK1(), config.getB());
bm25.setDiscountOverlaps(config.isDiscountOverlaps());
similarity = bm25;
}
return similarity;
}
public void testIDF() throws Exception {
Similarity saved = searcher.getSimilarity();
try {
searcher.setSimilarity(new ClassicSimilarity());
ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
assertHits(new FunctionQuery(vs), new float[] { 1.0f, 1.0f });
assertAllExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
@Test
public void testNestedNearQuery() throws Exception {
// (one OR hundred) NEAR (twenty two) ~ 1
// 2 4 4 4
// one hundred twenty two
// two hundred twenty two
SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))),
new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term("field", "twenty")),
new SpanTermQuery(new Term("field", "two"))
}, 0, true)
}, 1, true);
// check includeSpanScore makes a difference here
searcher.setSimilarity(new ClassicSimilarity());
try {
checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 20.901256561279297f, 17.06580352783203f });
checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 17.06580352783203f, 10.450628280639648f });
checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222 }, new float[]{ 19.15948486328125f, 17.06580352783203f });
checkQuery(q, new MaxPayloadFunction(), false, new int[]{122, 222}, new float[]{4.0f, 4.0f});
checkQuery(q, new MinPayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 2.0f});
checkQuery(q, new AveragePayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 3.666666f});
}
finally {
searcher.setSimilarity(similarity);
}
}
@BeforeClass
public static void beforeClass() throws Exception {
dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
final int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
addSome(doc, alwaysTerms);
if (random().nextInt(100) < 90) {
addSome(doc, commonTerms);
}
if (random().nextInt(100) < 50) {
addSome(doc, mediumTerms);
}
if (random().nextInt(100) < 10) {
addSome(doc, rareTerms);
}
iw.addDocument(doc);
}
iw.forceMerge(1);
iw.close();
r = DirectoryReader.open(dir);
reader = getOnlyLeafReader(r);
searcher = new IndexSearcher(reader);
searcher.setSimilarity(new ClassicSimilarity());
}
public void testSorting() throws Throwable {
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(
directory,
newIndexWriterConfig(new MockAnalyzer(random())).
setMaxBufferedDocs(2).
setMergePolicy(newLogMergePolicy(1000)).
setSimilarity(new ClassicSimilarity())
);
writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"}));
writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"}));
writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"}));
writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"}));
writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"}));
writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"}));
IndexReader r = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = newSearcher(r);
searcher.setSimilarity(new BM25Similarity());
runTest(searcher, true);
runTest(searcher, false);
r.close();
directory.close();
}
public void testSingleQueryExactMatchScoresHighest() throws Exception {
//See issue LUCENE-329 - IDF shouldn't wreck similarity ranking
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
addDoc("smith", writer);
addDoc("smith", writer);
addDoc("smith", writer);
addDoc("smith", writer);
addDoc("smith", writer);
addDoc("smith", writer);
addDoc("smythe", writer);
addDoc("smdssasd", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework
writer.close();
String searchTerms[] = { "smith", "smythe", "smdssasd" };
for (String searchTerm : searchTerms) {
FuzzyQuery query = new FuzzyQuery(new Term("field", searchTerm), 2, 1);
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
Document bestDoc = searcher.doc(hits[0].doc);
assertTrue(hits.length > 0);
String topMatch = bestDoc.get("field");
assertEquals(searchTerm, topMatch);
if (hits.length > 1) {
Document worstDoc = searcher.doc(hits[hits.length - 1].doc);
String worstMatch = worstDoc.get("field");
assertNotSame(searchTerm, worstMatch);
}
}
reader.close();
directory.close();
}
public void testSlopScoring() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy())
.setSimilarity(new BM25Similarity()));
Document doc = new Document();
doc.add(newTextField("field", "foo firstname lastname foo", Field.Store.YES));
writer.addDocument(doc);
Document doc2 = new Document();
doc2.add(newTextField("field", "foo firstname zzz lastname foo", Field.Store.YES));
writer.addDocument(doc2);
Document doc3 = new Document();
doc3.add(newTextField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES));
writer.addDocument(doc3);
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(new ClassicSimilarity());
PhraseQuery query = new PhraseQuery(Integer.MAX_VALUE, "field", "firstname", "lastname");
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
// Make sure that those matches where the terms appear closer to
// each other get a higher score:
assertEquals(1.0, hits[0].score, 0.01);
assertEquals(0, hits[0].doc);
assertEquals(0.63, hits[1].score, 0.01);
assertEquals(1, hits[1].doc);
assertEquals(0.47, hits[2].score, 0.01);
assertEquals(2, hits[2].doc);
QueryUtils.check(random(), query,searcher);
reader.close();
directory.close();
}
private IndexSearcher getSearcher(IndexReader r) {
IndexSearcher searcher = newSearcher(r);
// We rely on more tokens = lower score:
searcher.setSimilarity(new ClassicSimilarity());
return searcher;
}
public void testNullOrSubScorer() throws Throwable {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "a b c d", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
// this test relies upon coord being the default implementation,
// otherwise scores are different!
s.setSimilarity(new ClassicSimilarity());
BooleanQuery.Builder q = new BooleanQuery.Builder();
q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
// PhraseQuery w/ no terms added returns a null scorer
PhraseQuery pq = new PhraseQuery("field", new String[0]);
q.add(pq, BooleanClause.Occur.SHOULD);
assertEquals(1, s.search(q.build(), 10).totalHits.value);
// A required clause which returns null scorer should return null scorer to
// IndexSearcher.
q = new BooleanQuery.Builder();
pq = new PhraseQuery("field", new String[0]);
q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
q.add(pq, BooleanClause.Occur.MUST);
assertEquals(0, s.search(q.build(), 10).totalHits.value);
DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(
Arrays.asList(new TermQuery(new Term("field", "a")), pq),
1.0f);
assertEquals(1, s.search(dmq, 10).totalHits.value);
r.close();
w.close();
dir.close();
}
/** default parameters */
public void testDefaults() throws Exception {
SweetSpotSimilarity sim = getSimilarity("text", SweetSpotSimilarity.class);
// SSS tf w/defaults should behave just like DS
ClassicSimilarity d = new ClassicSimilarity();
for (int i = 0; i <=1000; i++) {
assertEquals("tf: i="+i, d.tf(i), sim.tf(i), 0.0F);
}
// default norm sanity check
assertEquals("norm 1", 1.00F, computeNorm(sim, 1), 0.0F);
assertEquals("norm 4", 0.50F, computeNorm(sim, 4), 0.0F);
assertEquals("norm 16", 0.25F, computeNorm(sim, 16), 0.0F);
}
/** baseline with parameters */
public void testBaselineParameters() throws Exception {
SweetSpotSimilarity sim = getSimilarity("text_baseline",
SweetSpotSimilarity.class);
ClassicSimilarity d = new ClassicSimilarity();
// constant up to 6
for (int i = 1; i <=6; i++) {
assertEquals("tf i="+i, 1.5F, sim.tf(i), 0.0F);
}
// less then default sim above 6
for (int i = 6; i <=1000; i++) {
assertTrue("tf: i="+i+" : s="+sim.tf(i)+
" < d="+d.tf(i),
sim.tf(i) < d.tf(i));
}
// norms: plateau from 3-5
assertEquals("norm 1 == 7",
computeNorm(sim, 1), computeNorm(sim, 7), 0.0F);
assertEquals("norm 2 == 6",
computeNorm(sim, 1), computeNorm(sim, 7), 0.0F);
assertEquals("norm 3", 1.00F, computeNorm(sim, 3), 0.0F);
assertEquals("norm 4", 1.00F, computeNorm(sim, 4), 0.0F);
assertEquals("norm 5", 1.00F, computeNorm(sim, 5), 0.0F);
assertTrue("norm 6 too high: " + computeNorm(sim, 6),
computeNorm(sim, 6) < 1.0F);
assertTrue("norm 7 higher then norm 6",
computeNorm(sim, 7) < computeNorm(sim, 6));
assertEquals("norm 20", 0.25F, computeNorm(sim, 20), 0.0F);
}
@Before
public void setupIndex() throws IOException {
dirUnderTest = newDirectory();
List<Similarity> sims = Arrays.asList(
new ClassicSimilarity(),
new SweetSpotSimilarity(), // extends Classic
new BM25Similarity(),
new LMDirichletSimilarity(),
new BooleanSimilarity(),
new LMJelinekMercerSimilarity(0.2F),
new AxiomaticF3LOG(0.5F, 10),
new DFISimilarity(new IndependenceChiSquared()),
new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()),
new IBSimilarity(new DistributionLL(), new LambdaDF(), new NormalizationH3())
);
similarity = sims.get(random().nextInt(sims.size()));
indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest, newIndexWriterConfig().setSimilarity(similarity));
for (int i = 0; i < docs.length; i++) {
Document doc = new Document();
doc.add(newStringField("id", "" + i, Field.Store.YES));
doc.add(newField("field", docs[i], Store.YES));
indexWriterUnderTest.addDocument(doc);
}
indexWriterUnderTest.commit();
indexWriterUnderTest.forceMerge(1);
indexWriterUnderTest.flush();
indexReaderUnderTest = indexWriterUnderTest.getReader();
searcherUnderTest = newSearcher(indexReaderUnderTest);
searcherUnderTest.setSimilarity(similarity);
}
@Test
public void testThatQueryUsesTermButNoFieldBoost() throws Exception {
Analyzer analyzer = new StandardAnalyzer();
Directory directory = new ByteBuffersDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setSimilarity(new ClassicSimilarity());
IndexWriter indexWriter = new IndexWriter(directory, config);
TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4);
TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
indexSearcher.setSimilarity(new ClassicSimilarity());
final TermQuery termQuery = new LuceneTermQueryBuilder()
.createTermQuery(new Term("f1", "v1"), new ConstantFieldBoost(3f));
final Term term = termQuery.getTerm();
assertEquals("f1", term.field());
assertEquals("v1", term.text());
TopDocs topDocs = indexSearcher.search(termQuery, 10);
final Weight weight = termQuery.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc);
String explainText = explain.toString();
assertTrue(explainText.contains("4.5 = boost")); // 4.5 (query) but ignore field boost
assertTrue(explainText.contains("4 = docFreq")); // 4 * v1
assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field
}
public DocFreq(IndexReader indexReader, String field) {
this.indexReader = indexReader;
this.field = field;
this.docFreqMap = new HashMap<>();
this.similarity = new ClassicSimilarity();
this.numDocs = indexReader.numDocs();
}
private PerFieldSimilarityWrapper getSimilarity(final QueryWithFilters queryWithFilters) {
return new PerFieldSimilarityWrapper() {
@Override
public Similarity get(String name) {
AnalyzerSettings analyzerSettings = indexConfig.getAnalyzerSettingsForIndexField(name);
AnalyzerSettings.Similarity similarity = AnalyzerSettings.Similarity.BM25;
if (analyzerSettings != null) {
similarity = analyzerSettings.getSimilarity();
}
AnalyzerSettings.Similarity fieldSimilarityOverride = queryWithFilters.getFieldSimilarityOverride(name);
if (fieldSimilarityOverride != null) {
similarity = fieldSimilarityOverride;
}
if (AnalyzerSettings.Similarity.TFIDF.equals(similarity)) {
return new ClassicSimilarity();
}
else if (AnalyzerSettings.Similarity.BM25.equals(similarity)) {
return new BM25Similarity();
}
else if (AnalyzerSettings.Similarity.CONSTANT.equals(similarity)) {
return new ConstantSimilarity();
}
else if (AnalyzerSettings.Similarity.TF.equals(similarity)) {
return new TFSimilarity();
}
else {
throw new RuntimeException("Unknown similarity type <" + similarity + ">");
}
}
};
}
/**
* Constructor requiring an IndexReader.
*/
public MoreLikeThis(IndexReader ir) {
this(ir, new ClassicSimilarity());
}
public void testSweetSpotTf() {
SweetSpotSimilarity ss = new SweetSpotSimilarity();
TFIDFSimilarity d = new ClassicSimilarity();
TFIDFSimilarity s = ss;
// tf equal
ss.setBaselineTfFactors(0.0f, 0.0f);
for (int i = 1; i < 1000; i++) {
assertEquals("tf: i="+i,
d.tf(i), s.tf(i), 0.0f);
}
// tf higher
ss.setBaselineTfFactors(1.0f, 0.0f);
for (int i = 1; i < 1000; i++) {
assertTrue("tf: i="+i+" : d="+d.tf(i)+
" < s="+s.tf(i),
d.tf(i) < s.tf(i));
}
// tf flat
ss.setBaselineTfFactors(1.0f, 6.0f);
for (int i = 1; i <=6; i++) {
assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
}
ss.setBaselineTfFactors(2.0f, 6.0f);
for (int i = 1; i <=6; i++) {
assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
}
for (int i = 6; i <=1000; i++) {
assertTrue("tf: i="+i+" : s="+s.tf(i)+
" < d="+d.tf(i),
s.tf(i) < d.tf(i));
}
// stupidity
assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
}
@Override
public void setUp() throws Exception {
super.setUp();
// TODO: switch to BM25?
searcher.setSimilarity(new ClassicSimilarity());
}
public void testMultipleQueriesIdfWorks() throws Exception {
// With issue LUCENE-329 - it could be argued a MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite
// is the solution as it disables IDF.
// However - IDF is still useful as in this case where there are multiple FuzzyQueries.
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
addDoc("michael smith", writer);
addDoc("michael lucero", writer);
addDoc("doug cutting", writer);
addDoc("doug cuttin", writer);
addDoc("michael wardle", writer);
addDoc("micheal vegas", writer);
addDoc("michael lydon", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework
writer.close();
BooleanQuery.Builder query = new BooleanQuery.Builder();
String commonSearchTerm = "michael";
FuzzyQuery commonQuery = new FuzzyQuery(new Term("field", commonSearchTerm), 2, 1);
query.add(commonQuery, Occur.SHOULD);
String rareSearchTerm = "cutting";
FuzzyQuery rareQuery = new FuzzyQuery(new Term("field", rareSearchTerm), 2, 1);
query.add(rareQuery, Occur.SHOULD);
ScoreDoc[] hits = searcher.search(query.build(), 1000).scoreDocs;
// Matches on the rare surname should be worth more than matches on the common forename
assertEquals(7, hits.length);
Document bestDoc = searcher.doc(hits[0].doc);
String topMatch = bestDoc.get("field");
assertTrue(topMatch.contains(rareSearchTerm));
Document runnerUpDoc = searcher.doc(hits[1].doc);
String runnerUpMatch = runnerUpDoc.get("field");
assertTrue(runnerUpMatch.contains("cuttin"));
Document worstDoc = searcher.doc(hits[hits.length - 1].doc);
String worstMatch = worstDoc.get("field");
assertTrue(worstMatch.contains("micheal")); //misspelling of common name
reader.close();
directory.close();
}
@Test
public void testRandomQueries() throws Exception {
String[] vals = {"w1","w2","w3","w4","w5","xx","yy","zzz"};
int tot=0;
BooleanQuery q1 = null;
try {
// increase number of iterations for more complete testing
int num = atLeast(3);
for (int i=0; i<num; i++) {
int level = random().nextInt(3);
q1 = randBoolQuery(new Random(random().nextLong()), random().nextBoolean(), level, field, vals, null).build();
// Can't sort by relevance since floating point numbers may not quite
// match up.
Sort sort = Sort.INDEXORDER;
QueryUtils.check(random(), q1,searcher); // baseline sim
try {
// a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop.
searcher.setSimilarity(bigSearcher.getSimilarity()); // random sim
QueryUtils.check(random(), q1, searcher);
} finally {
searcher.setSimilarity(new ClassicSimilarity()); // restore
}
// check diff (randomized) scorers (from AssertingSearcher) produce the same results
TopFieldCollector collector = TopFieldCollector.create(sort, 1000, 1);
searcher.search(q1, collector);
ScoreDoc[] hits1 = collector.topDocs().scoreDocs;
collector = TopFieldCollector.create(sort, 1000, 1);
searcher.search(q1, collector);
ScoreDoc[] hits2 = collector.topDocs().scoreDocs;
tot+=hits2.length;
CheckHits.checkEqual(q1, hits1, hits2);
BooleanQuery.Builder q3 = new BooleanQuery.Builder();
q3.add(q1, BooleanClause.Occur.SHOULD);
q3.add(new PrefixQuery(new Term("field2", "b")), BooleanClause.Occur.SHOULD);
assertEquals(mulFactor*collector.totalHits + NUM_EXTRA_DOCS/2, bigSearcher.count(q3.build()));
// test diff (randomized) scorers produce the same results on bigSearcher as well
collector = TopFieldCollector.create(sort, 1000 * mulFactor, 1);
bigSearcher.search(q1, collector);
hits1 = collector.topDocs().scoreDocs;
collector = TopFieldCollector.create(sort, 1000 * mulFactor, 1);
bigSearcher.search(q1, collector);
hits2 = collector.topDocs().scoreDocs;
CheckHits.checkEqual(q1, hits1, hits2);
}
} catch (Exception e) {
// For easier debugging
System.out.println("failed query: " + q1);
throw e;
}
// System.out.println("Total hits:"+tot);
}
public static IndexWriterConfig newIndexWriterConfig() {
// We rely on more tokens = lower score:
return LuceneTestCase.newIndexWriterConfig().setSimilarity(new ClassicSimilarity());
}