

源代码1 项目: scava   文件: SORecommender.java
public TopDocs executeQuery(org.apache.lucene.search.Query query) throws IOException, ParseException {
	Directory indexDir = FSDirectory.open(Paths.get(INDEX_DIRECTORY));
	try {
		IndexReader reader = DirectoryReader.open(indexDir);
		IndexSearcher searcher = new IndexSearcher(reader);
		if (isBm25 == false) {
			ClassicSimilarity CS = new ClassicSimilarity();
		TopDocs docs = searcher.search(query, hitsPerPage);
		return docs;
	} catch (Exception e) {
		return null;
源代码2 项目: lucene-solr   文件: TestValueSources.java
public void testNorm() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    // no norm field (so agnostic to indexed similarity)
    searcher.setSimilarity(new ClassicSimilarity());
    ValueSource vs = new NormValueSource("byte");
    assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });

    // regardless of whether norms exist, value source exists == 0

    vs = new NormValueSource("text");
  } finally {
源代码3 项目: lucene-solr   文件: TestValueSources.java
public void testTF() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    // no norm field (so agnostic to indexed similarity)
    searcher.setSimilarity(new ClassicSimilarity());

    ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test"));
    assertHits(new FunctionQuery(vs), 
               new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) });
    vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"));
    assertHits(new FunctionQuery(vs), new float[] { 0f, 1f });
    // regardless of whether norms exist, value source exists == 0
    vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus"));
    assertHits(new FunctionQuery(vs), new float[] { 0F, 0F });

  } finally {
源代码4 项目: lucene-solr   文件: TestTaxonomyFacetCounts.java
public void testReallyNoNormsForDrillDown() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(new PerFieldSimilarityWrapper() {
      final Similarity sim = new ClassicSimilarity();

      public Similarity get(String name) {
        assertEquals("field", name);
        return sim;
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path"));
  writer.addDocument(config.build(taxoWriter, doc));
  IOUtils.close(taxoWriter, dir, taxoDir);
源代码5 项目: lucene-solr   文件: TestPayloadSpanUtil.java
public void testPayloadSpanUtil() throws Exception {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
      newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(new ClassicSimilarity()));

  Document doc = new Document();
  doc.add(newTextField(FIELD, "xx rr yy mm  pp", Field.Store.YES));

  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);

  PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());

  Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(FIELD, "rr")));
  if(VERBOSE) {
    System.out.println("Num payloads:" + payloads.size());
    for (final byte [] bytes : payloads) {
      System.out.println(new String(bytes, StandardCharsets.UTF_8));
源代码6 项目: lucene-solr   文件: TestSimilarities.java
public void testNonStandardSimilarity() throws Exception {

    try (Monitor monitor = newMonitor()) {
      monitor.register(new MonitorQuery("1", MonitorTestBase.parse("test")));

      Similarity similarity = new ClassicSimilarity() {
        public float tf(float freq) {
          return 1000f;

      Document doc = new Document();
      doc.add(newTextField("field", "this is a test", Field.Store.NO));

      MatchingQueries<ScoringMatch> standard = monitor.match(doc, ScoringMatch.matchWithSimilarity(new ClassicSimilarity()));
      MatchingQueries<ScoringMatch> withSim = monitor.match(doc, ScoringMatch.matchWithSimilarity(similarity));

      float standScore = standard.getMatches().iterator().next().getScore();
      float simScore = withSim.getMatches().iterator().next().getScore();
      assertEquals(standScore, simScore / 1000, 0.1f);
public void setUp() throws Exception {
  analyzer = new MockAnalyzer(random());
  dir = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig(analyzer);
  config.setMergePolicy(newLogMergePolicy()); // we will use docids to validate
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
  writer.addDocument(doc("lucene", "lucene is a very popular search engine library"));
  writer.addDocument(doc("solr", "solr is a very popular search server and is using lucene"));
  writer.addDocument(doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop"));
  reader = writer.getReader();
  // we do not use newSearcher because the assertingXXX layers break
  // the toString representations we are relying on
  // TODO: clean that up
  searcher = new IndexSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
  scorerSearcher = new ScorerIndexSearcher(reader);
  scorerSearcher.setSimilarity(new CountingSimilarity());
源代码8 项目: lucene-solr   文件: TestTermScorer.java
public void setUp() throws Exception {
  directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
      newIndexWriterConfig(new MockAnalyzer(random()))
      .setSimilarity(new ClassicSimilarity()));
  for (int i = 0; i < values.length; i++) {
    Document doc = new Document();
    doc.add(newTextField(FIELD, values[i], Field.Store.YES));
  indexReader = getOnlyLeafReader(writer.getReader());
  indexSearcher = newSearcher(indexReader, false);
  indexSearcher.setSimilarity(new ClassicSimilarity());
源代码9 项目: lucene-solr   文件: TestMemoryIndex.java
public void testFreezeAPI() {

  MemoryIndex mi = new MemoryIndex();
  mi.addField("f1", "some text", analyzer);

  assertThat(mi.search(new MatchAllDocsQuery()), not(is(0.0f)));
  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));

  // check we can add a new field after searching
  mi.addField("f2", "some more text", analyzer);
  assertThat(mi.search(new TermQuery(new Term("f2", "some"))), not(is(0.0f)));

  // freeze!

  RuntimeException expected = expectThrows(RuntimeException.class, () -> {
    mi.addField("f3", "and yet more", analyzer);
  assertThat(expected.getMessage(), containsString("frozen"));

  expected = expectThrows(RuntimeException.class, () -> {
    mi.setSimilarity(new BM25Similarity(1, 1));
  assertThat(expected.getMessage(), containsString("frozen"));

  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));

  mi.addField("f1", "wibble", analyzer);
  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
  assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));

  // check we can set the Similarity again
  mi.setSimilarity(new ClassicSimilarity());

源代码10 项目: lucene-solr   文件: SearchImpl.java
private Similarity createSimilarity(SimilarityConfig config) {
  Similarity similarity;

  if (config.isUseClassicSimilarity()) {
    ClassicSimilarity tfidf = new ClassicSimilarity();
    similarity = tfidf;
  } else {
    BM25Similarity bm25 = new BM25Similarity(config.getK1(), config.getB());
    similarity = bm25;

  return similarity;
源代码11 项目: lucene-solr   文件: TestValueSources.java
public void testIDF() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    searcher.setSimilarity(new ClassicSimilarity());
    ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
    assertHits(new FunctionQuery(vs), new float[] { 1.0f, 1.0f });
  } finally {
源代码12 项目: lucene-solr   文件: TestPayloadScoreQuery.java
public void testNestedNearQuery() throws Exception {

  // (one OR hundred) NEAR (twenty two) ~ 1
  //  2    4        4    4
  // one hundred twenty two
  // two hundred twenty two

  SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
      new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))),
      new SpanNearQuery(new SpanQuery[]{
          new SpanTermQuery(new Term("field", "twenty")),
          new SpanTermQuery(new Term("field", "two"))
      }, 0, true)
  }, 1, true);

  // check includeSpanScore makes a difference here
  searcher.setSimilarity(new ClassicSimilarity());
  try {
    checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 20.901256561279297f, 17.06580352783203f });
    checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 17.06580352783203f, 10.450628280639648f });
    checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222 }, new float[]{ 19.15948486328125f, 17.06580352783203f });
    checkQuery(q, new MaxPayloadFunction(), false, new int[]{122, 222}, new float[]{4.0f, 4.0f});
    checkQuery(q, new MinPayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 2.0f});
    checkQuery(q, new AveragePayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 3.666666f});
  finally {

源代码13 项目: lucene-solr   文件: TestMinShouldMatch2.java
public static void beforeClass() throws Exception {
  dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  final int numDocs = atLeast(300);
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    addSome(doc, alwaysTerms);
    if (random().nextInt(100) < 90) {
      addSome(doc, commonTerms);
    if (random().nextInt(100) < 50) {
      addSome(doc, mediumTerms);
    if (random().nextInt(100) < 10) {
      addSome(doc, rareTerms);
  r = DirectoryReader.open(dir);
  reader = getOnlyLeafReader(r);
  searcher = new IndexSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
源代码14 项目: lucene-solr   文件: TestElevationComparator.java
public void testSorting() throws Throwable {
  Directory directory = newDirectory();
  IndexWriter writer = new IndexWriter(
      newIndexWriterConfig(new MockAnalyzer(random())).
          setSimilarity(new ClassicSimilarity())
  writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"}));
  writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"}));
  writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"}));
  writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"}));
  writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"}));
  writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"}));

  IndexReader r = DirectoryReader.open(writer);

  IndexSearcher searcher = newSearcher(r);
  searcher.setSimilarity(new BM25Similarity());

  runTest(searcher, true);
  runTest(searcher, false);

源代码15 项目: lucene-solr   文件: TestFuzzyQuery.java
public void testSingleQueryExactMatchScoresHighest() throws Exception {
  //See issue LUCENE-329 - IDF shouldn't wreck similarity ranking 
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smythe", writer);
  addDoc("smdssasd", writer);

  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework
  String searchTerms[] = { "smith", "smythe", "smdssasd" };
  for (String searchTerm : searchTerms) {
    FuzzyQuery query = new FuzzyQuery(new Term("field", searchTerm), 2, 1);
    ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
    Document bestDoc = searcher.doc(hits[0].doc);
    assertTrue(hits.length > 0);
    String topMatch = bestDoc.get("field");
    assertEquals(searchTerm, topMatch);
    if (hits.length > 1) {
      Document worstDoc = searcher.doc(hits[hits.length - 1].doc);
      String worstMatch = worstDoc.get("field");
      assertNotSame(searchTerm, worstMatch);
源代码16 项目: lucene-solr   文件: TestPhraseQuery.java
public void testSlopScoring() throws IOException {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
      newIndexWriterConfig(new MockAnalyzer(random()))
        .setSimilarity(new BM25Similarity()));

  Document doc = new Document();
  doc.add(newTextField("field", "foo firstname lastname foo", Field.Store.YES));
  Document doc2 = new Document();
  doc2.add(newTextField("field", "foo firstname zzz lastname foo", Field.Store.YES));
  Document doc3 = new Document();
  doc3.add(newTextField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES));
  IndexReader reader = writer.getReader();

  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
  PhraseQuery query = new PhraseQuery(Integer.MAX_VALUE, "field", "firstname", "lastname");
  ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
  assertEquals(3, hits.length);
  // Make sure that those matches where the terms appear closer to
  // each other get a higher score:
  assertEquals(1.0, hits[0].score, 0.01);
  assertEquals(0, hits[0].doc);
  assertEquals(0.63, hits[1].score, 0.01);
  assertEquals(1, hits[1].doc);
  assertEquals(0.47, hits[2].score, 0.01);
  assertEquals(2, hits[2].doc);
  QueryUtils.check(random(), query,searcher);
源代码17 项目: lucene-solr   文件: TestQueryRescorer.java
private IndexSearcher getSearcher(IndexReader r) {
  IndexSearcher searcher = newSearcher(r);

  // We rely on more tokens = lower score:
  searcher.setSimilarity(new ClassicSimilarity());

  return searcher;
源代码18 项目: lucene-solr   文件: TestBooleanQuery.java
public void testNullOrSubScorer() throws Throwable {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  doc.add(newTextField("field", "a b c d", Field.Store.NO));

  IndexReader r = w.getReader();
  IndexSearcher s = newSearcher(r);
  // this test relies upon coord being the default implementation,
  // otherwise scores are different!
  s.setSimilarity(new ClassicSimilarity());

  BooleanQuery.Builder q = new BooleanQuery.Builder();
  q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);

  // PhraseQuery w/ no terms added returns a null scorer
  PhraseQuery pq = new PhraseQuery("field", new String[0]);
  q.add(pq, BooleanClause.Occur.SHOULD);
  assertEquals(1, s.search(q.build(), 10).totalHits.value);

  // A required clause which returns null scorer should return null scorer to
  // IndexSearcher.
  q = new BooleanQuery.Builder();
  pq = new PhraseQuery("field", new String[0]);
  q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
  q.add(pq, BooleanClause.Occur.MUST);
  assertEquals(0, s.search(q.build(), 10).totalHits.value);

  DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(
      Arrays.asList(new TermQuery(new Term("field", "a")), pq),
  assertEquals(1, s.search(dmq, 10).totalHits.value);

/** default parameters */
public void testDefaults() throws Exception {
  SweetSpotSimilarity sim = getSimilarity("text", SweetSpotSimilarity.class);

  // SSS tf w/defaults should behave just like DS
  ClassicSimilarity d = new ClassicSimilarity();
  for (int i = 0; i <=1000; i++) {
    assertEquals("tf: i="+i, d.tf(i), sim.tf(i), 0.0F);

  // default norm sanity check
  assertEquals("norm 1",  1.00F, computeNorm(sim, 1),  0.0F);
  assertEquals("norm 4",  0.50F, computeNorm(sim, 4),  0.0F);
  assertEquals("norm 16", 0.25F, computeNorm(sim, 16), 0.0F);
/** baseline with parameters */
public void testBaselineParameters() throws Exception {
  SweetSpotSimilarity sim = getSimilarity("text_baseline", 
  ClassicSimilarity d = new ClassicSimilarity();

  // constant up to 6
  for (int i = 1; i <=6; i++) {
    assertEquals("tf i="+i, 1.5F, sim.tf(i), 0.0F);
  // less then default sim above 6
  for (int i = 6; i <=1000; i++) {
    assertTrue("tf: i="+i+" : s="+sim.tf(i)+
               " < d="+d.tf(i),
               sim.tf(i) < d.tf(i));

  // norms: plateau from 3-5
  assertEquals("norm 1 == 7", 
               computeNorm(sim, 1), computeNorm(sim, 7),  0.0F);
  assertEquals("norm 2 == 6",  
               computeNorm(sim, 1), computeNorm(sim, 7),  0.0F);
  assertEquals("norm 3",  1.00F, computeNorm(sim, 3),  0.0F);
  assertEquals("norm 4",  1.00F, computeNorm(sim, 4),  0.0F);
  assertEquals("norm 5",  1.00F, computeNorm(sim, 5),  0.0F);
  assertTrue("norm 6 too high: " + computeNorm(sim, 6),
             computeNorm(sim, 6) < 1.0F);
  assertTrue("norm 7 higher then norm 6", 
             computeNorm(sim, 7) < computeNorm(sim, 6));
  assertEquals("norm 20", 0.25F, computeNorm(sim, 20), 0.0F);
public void setupIndex() throws IOException {
    dirUnderTest = newDirectory();
    List<Similarity> sims = Arrays.asList(
            new ClassicSimilarity(),
            new SweetSpotSimilarity(), // extends Classic
            new BM25Similarity(),
            new LMDirichletSimilarity(),
            new BooleanSimilarity(),
            new LMJelinekMercerSimilarity(0.2F),
            new AxiomaticF3LOG(0.5F, 10),
            new DFISimilarity(new IndependenceChiSquared()),
            new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()),
            new IBSimilarity(new DistributionLL(), new LambdaDF(), new NormalizationH3())
    similarity = sims.get(random().nextInt(sims.size()));

    indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest, newIndexWriterConfig().setSimilarity(similarity));
    for (int i = 0; i < docs.length; i++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + i, Field.Store.YES));
        doc.add(newField("field", docs[i], Store.YES));

    indexReaderUnderTest = indexWriterUnderTest.getReader();
    searcherUnderTest = newSearcher(indexReaderUnderTest);
源代码22 项目: querqy   文件: LuceneTermQueryBuilderTest.java
public void testThatQueryUsesTermButNoFieldBoost() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);

    TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1);


    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());

    final TermQuery termQuery = new LuceneTermQueryBuilder()
            .createTermQuery(new Term("f1", "v1"), new ConstantFieldBoost(3f));
    final Term term = termQuery.getTerm();
    assertEquals("f1", term.field());
    assertEquals("v1", term.text());

    TopDocs topDocs = indexSearcher.search(termQuery, 10);

    final Weight weight = termQuery.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();

    assertTrue(explainText.contains("4.5 = boost")); // 4.5 (query) but ignore field boost
    assertTrue(explainText.contains("4 = docFreq")); // 4 * v1
    assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field
源代码23 项目: lumongo   文件: DocFreq.java
public DocFreq(IndexReader indexReader, String field) {
	this.indexReader = indexReader;
	this.field = field;
	this.docFreqMap = new HashMap<>();
	this.similarity = new ClassicSimilarity();
	this.numDocs = indexReader.numDocs();
源代码24 项目: lumongo   文件: LumongoSegment.java
private PerFieldSimilarityWrapper getSimilarity(final QueryWithFilters queryWithFilters) {
	return new PerFieldSimilarityWrapper() {
		public Similarity get(String name) {

			AnalyzerSettings analyzerSettings = indexConfig.getAnalyzerSettingsForIndexField(name);
			AnalyzerSettings.Similarity similarity = AnalyzerSettings.Similarity.BM25;
			if (analyzerSettings != null) {
				similarity = analyzerSettings.getSimilarity();

			AnalyzerSettings.Similarity fieldSimilarityOverride = queryWithFilters.getFieldSimilarityOverride(name);
			if (fieldSimilarityOverride != null) {
				similarity = fieldSimilarityOverride;

			if (AnalyzerSettings.Similarity.TFIDF.equals(similarity)) {
				return new ClassicSimilarity();
			else if (AnalyzerSettings.Similarity.BM25.equals(similarity)) {
				return new BM25Similarity();
			else if (AnalyzerSettings.Similarity.CONSTANT.equals(similarity)) {
				return new ConstantSimilarity();
			else if (AnalyzerSettings.Similarity.TF.equals(similarity)) {
				return new TFSimilarity();
			else {
				throw new RuntimeException("Unknown similarity type <" + similarity + ">");
源代码25 项目: lucene-solr   文件: MoreLikeThis.java
 * Constructor requiring an IndexReader.
public MoreLikeThis(IndexReader ir) {
  this(ir, new ClassicSimilarity());
源代码26 项目: lucene-solr   文件: SweetSpotSimilarityTest.java
public void testSweetSpotTf() {

  SweetSpotSimilarity ss = new SweetSpotSimilarity();

  TFIDFSimilarity d = new ClassicSimilarity();
  TFIDFSimilarity s = ss;
  // tf equal

  ss.setBaselineTfFactors(0.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertEquals("tf: i="+i,
                 d.tf(i), s.tf(i), 0.0f);

  // tf higher

  ss.setBaselineTfFactors(1.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertTrue("tf: i="+i+" : d="+d.tf(i)+
               " < s="+s.tf(i),
               d.tf(i) < s.tf(i));

  // tf flat

  ss.setBaselineTfFactors(1.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
  ss.setBaselineTfFactors(2.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
  for (int i = 6; i <=1000; i++) {
    assertTrue("tf: i="+i+" : s="+s.tf(i)+
               " < d="+d.tf(i),
               s.tf(i) < d.tf(i));

  // stupidity
  assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
源代码27 项目: lucene-solr   文件: TestComplexExplanations.java
public void setUp() throws Exception {
  // TODO: switch to BM25?
  searcher.setSimilarity(new ClassicSimilarity());
源代码28 项目: lucene-solr   文件: TestFuzzyQuery.java
public void testMultipleQueriesIdfWorks() throws Exception {
  // With issue LUCENE-329 - it could be argued a MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite
  // is the solution as it disables IDF.
  // However - IDF is still useful as in this case where there are multiple FuzzyQueries.
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory);

  addDoc("michael smith", writer);
  addDoc("michael lucero", writer);
  addDoc("doug cutting", writer);
  addDoc("doug cuttin", writer);
  addDoc("michael wardle", writer);
  addDoc("micheal vegas", writer);
  addDoc("michael lydon", writer);

  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework


  BooleanQuery.Builder query = new BooleanQuery.Builder();
  String commonSearchTerm = "michael";
  FuzzyQuery commonQuery = new FuzzyQuery(new Term("field", commonSearchTerm), 2, 1);
  query.add(commonQuery, Occur.SHOULD);

  String rareSearchTerm = "cutting";
  FuzzyQuery rareQuery = new FuzzyQuery(new Term("field", rareSearchTerm), 2, 1);
  query.add(rareQuery, Occur.SHOULD);
  ScoreDoc[] hits = searcher.search(query.build(), 1000).scoreDocs;

  // Matches on the rare surname should be worth more than matches on the common forename
  assertEquals(7, hits.length);
  Document bestDoc = searcher.doc(hits[0].doc);
  String topMatch = bestDoc.get("field");

  Document runnerUpDoc = searcher.doc(hits[1].doc);
  String runnerUpMatch = runnerUpDoc.get("field");

  Document worstDoc = searcher.doc(hits[hits.length - 1].doc);
  String worstMatch = worstDoc.get("field");
  assertTrue(worstMatch.contains("micheal")); //misspelling of common name

源代码29 项目: lucene-solr   文件: TestBoolean2.java
public void testRandomQueries() throws Exception {
  String[] vals = {"w1","w2","w3","w4","w5","xx","yy","zzz"};

  int tot=0;

  BooleanQuery q1 = null;
  try {

    // increase number of iterations for more complete testing
    int num = atLeast(3);
    for (int i=0; i<num; i++) {
      int level = random().nextInt(3);
      q1 = randBoolQuery(new Random(random().nextLong()), random().nextBoolean(), level, field, vals, null).build();
      // Can't sort by relevance since floating point numbers may not quite
      // match up.
      Sort sort = Sort.INDEXORDER;

      QueryUtils.check(random(), q1,searcher); // baseline sim
      try {
        // a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop.
        searcher.setSimilarity(bigSearcher.getSimilarity()); // random sim
        QueryUtils.check(random(), q1, searcher);
      } finally {
        searcher.setSimilarity(new ClassicSimilarity()); // restore

      // check diff (randomized) scorers (from AssertingSearcher) produce the same results
      TopFieldCollector collector = TopFieldCollector.create(sort, 1000, 1);
      searcher.search(q1, collector);
      ScoreDoc[] hits1 = collector.topDocs().scoreDocs;
      collector = TopFieldCollector.create(sort, 1000, 1);
      searcher.search(q1, collector);
      ScoreDoc[] hits2 = collector.topDocs().scoreDocs;
      CheckHits.checkEqual(q1, hits1, hits2);

      BooleanQuery.Builder q3 = new BooleanQuery.Builder();
      q3.add(q1, BooleanClause.Occur.SHOULD);
      q3.add(new PrefixQuery(new Term("field2", "b")), BooleanClause.Occur.SHOULD);
      assertEquals(mulFactor*collector.totalHits + NUM_EXTRA_DOCS/2, bigSearcher.count(q3.build()));

      // test diff (randomized) scorers produce the same results on bigSearcher as well
      collector = TopFieldCollector.create(sort, 1000 * mulFactor, 1);
      bigSearcher.search(q1, collector);
      hits1 = collector.topDocs().scoreDocs;
      collector = TopFieldCollector.create(sort, 1000 * mulFactor, 1);
      bigSearcher.search(q1, collector);
      hits2 = collector.topDocs().scoreDocs;
      CheckHits.checkEqual(q1, hits1, hits2);

  } catch (Exception e) {
    // For easier debugging
    System.out.println("failed query: " + q1);
    throw e;

  // System.out.println("Total hits:"+tot);
源代码30 项目: lucene-solr   文件: TestQueryRescorer.java
public static IndexWriterConfig newIndexWriterConfig() {
  // We rely on more tokens = lower score:
  return LuceneTestCase.newIndexWriterConfig().setSimilarity(new ClassicSimilarity());