org.apache.lucene.index.IndexWriterConfig#setSimilarity ( )源码实例Demo

下面列出了org.apache.lucene.index.IndexWriterConfig#setSimilarity ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: lucene-solr   文件: TestTaxonomyFacetCounts.java
public void testReallyNoNormsForDrillDown() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(new PerFieldSimilarityWrapper() {
      final Similarity sim = new ClassicSimilarity();

      @Override
      public Similarity get(String name) {
        assertEquals("field", name);
        return sim;
      }
    });
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path"));
  writer.addDocument(config.build(taxoWriter, doc));
  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}
 
源代码2 项目: vscode-extension   文件: test.java
private IndexWriterConfig getIndexWriterConfig() {
    final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
    iwc.setCommitOnClose(false); // we by default don't commit on close
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
    // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
    boolean verbose = false;
    try {
        verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
    } catch (Exception ignore) {
    }
    iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
    iwc.setMergeScheduler(mergeScheduler);
    // Give us the opportunity to upgrade old segments while performing
    // background merges
    MergePolicy mergePolicy = config().getMergePolicy();
    // always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
    iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
    if (softDeleteEnabled) {
        mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
            new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
    }
    iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
    iwc.setSimilarity(engineConfig.getSimilarity());
    iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
    iwc.setCodec(engineConfig.getCodec());
    iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
    if (config().getIndexSort() != null) {
        iwc.setIndexSort(config().getIndexSort());
    }
    return iwc;
}
 
源代码3 项目: lucene-solr   文件: SolrIndexConfig.java
public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException {
  IndexSchema schema = core.getLatestSchema();
  IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
  if (maxBufferedDocs != -1)
    iwc.setMaxBufferedDocs(maxBufferedDocs);

  if (ramBufferSizeMB != -1)
    iwc.setRAMBufferSizeMB(ramBufferSizeMB);

  if (ramPerThreadHardLimitMB != -1) {
    iwc.setRAMPerThreadHardLimitMB(ramPerThreadHardLimitMB);
  }

  iwc.setSimilarity(schema.getSimilarity());
  MergePolicy mergePolicy = buildMergePolicy(core.getResourceLoader(), schema);
  iwc.setMergePolicy(mergePolicy);
  MergeScheduler mergeScheduler = buildMergeScheduler(core.getResourceLoader());
  iwc.setMergeScheduler(mergeScheduler);
  iwc.setInfoStream(infoStream);

  if (mergePolicy instanceof SortingMergePolicy) {
    Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort();
    iwc.setIndexSort(indexSort);
  }

  iwc.setUseCompoundFile(useCompoundFile);

  if (mergedSegmentWarmerInfo != null) {
    // TODO: add infostream -> normal logging system (there is an issue somewhere)
    @SuppressWarnings({"rawtypes"})
    IndexReaderWarmer warmer = core.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
                                                                      IndexReaderWarmer.class,
                                                                      null,
                                                                      new Class[] { InfoStream.class },
                                                                      new Object[] { iwc.getInfoStream() });
    iwc.setMergedSegmentWarmer(warmer);
  }

  return iwc;
}
 
源代码4 项目: uncc2014watsonsim   文件: Lucene.java
public Lucene(Path path) throws IOException {
	/* Setup Lucene */
       Directory dir = FSDirectory.open(path);
       // here we are using a standard analyzer, there are a lot of analyzers available to our use.
       Analyzer analyzer = new StandardAnalyzer();
       IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
       //this mode by default overwrites the previous index, not a very good option in real usage
       iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
       iwc.setSimilarity(new BM25Similarity());
       index = new IndexWriter(dir, iwc);
}
 
源代码5 项目: querqy   文件: LuceneTermQueryBuilderTest.java
@Test
public void testThatQueryUsesTermButNoFieldBoost() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);


    TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    final TermQuery termQuery = new LuceneTermQueryBuilder()
            .createTermQuery(new Term("f1", "v1"), new ConstantFieldBoost(3f));
    final Term term = termQuery.getTerm();
    assertEquals("f1", term.field());
    assertEquals("v1", term.text());

    TopDocs topDocs = indexSearcher.search(termQuery, 10);

    final Weight weight = termQuery.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();

    assertTrue(explainText.contains("4.5 = boost")); // 4.5 (query) but ignore field boost
    assertTrue(explainText.contains("4 = docFreq")); // 4 * v1
    assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field
}
 
源代码6 项目: rdf4j   文件: LuceneIndex.java
/**
 * Method produces {@link IndexWriterConfig} using settings.
 *
 * @return
 */
private IndexWriterConfig getIndexWriterConfig() {
	IndexWriterConfig cnf = new IndexWriterConfig(analyzer);
	cnf.setSimilarity(similarity);
	return cnf;
}
 
源代码7 项目: modernmt   文件: LuceneTranslationMemory.java
public LuceneTranslationMemory(Directory directory, DocumentBuilder documentBuilder, QueryBuilder queryBuilder, Rescorer rescorer, AnalyzerFactory analyzerFactory, int minQuerySize) throws IOException {
    this.indexDirectory = directory;
    this.queryBuilder = queryBuilder;
    this.rescorer = rescorer;
    this.documentBuilder = documentBuilder;
    this.analyzerFactory = analyzerFactory;
    this.shortQueryAnalyzer = analyzerFactory.createShortQueryAnalyzer();
    this.longQueryAnalyzer = analyzerFactory.createLongQueryAnalyzer();
    this.minQuerySize = minQuerySize;

    // Index writer setup
    IndexWriterConfig indexConfig = new IndexWriterConfig(Version.LUCENE_4_10_4, new DelegatingAnalyzerWrapper(PER_FIELD_REUSE_STRATEGY) {
        @Override
        protected Analyzer getWrappedAnalyzer(String fieldName) {
            if (documentBuilder.isHashField(fieldName))
                return analyzerFactory.createHashAnalyzer();
            else
                return analyzerFactory.createContentAnalyzer();
        }
    });

    indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    indexConfig.setSimilarity(analyzerFactory.createSimilarity());

    this.indexWriter = new IndexWriter(this.indexDirectory, indexConfig);

    // Ensure index exists
    if (!DirectoryReader.indexExists(directory))
        this.indexWriter.commit();

    // Read channels status
    IndexSearcher searcher = this.getIndexSearcher();

    Query query = this.queryBuilder.getChannels(this.documentBuilder);
    TopDocs docs = searcher.search(query, 1);

    if (docs.scoreDocs.length > 0) {
        Document channelsDocument = searcher.doc(docs.scoreDocs[0].doc);
        this.channels = this.documentBuilder.asChannels(channelsDocument);
    } else {
        this.channels = new HashMap<>();
    }
}
 
源代码8 项目: querqy   文件: DependentTermQueryBuilderTest.java
@Test
public void testCreateWeight() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);

    TestUtil.addNumDocsWithTextField("f1", "v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f2", "v1 v1", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection();

    Term qTerm1 = new Term("f1", "v1");
    Term qTerm2 = new Term("f2", "v1");
    dfc.newClause();
    dfc.prepareTerm(qTerm1);
    dfc.prepareTerm(qTerm2);
    dfc.finishedUserQuery();

    DependentTermQueryBuilder.DependentTermQuery query1 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm1, fieldBoost1);
    DependentTermQueryBuilder.DependentTermQuery query2 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm2, fieldBoost2);


    TopDocs topDocs = indexSearcher.search(query2, 10);

    final Weight weight2 = query2.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight2.explain(indexReader.leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();
    assertTrue(explainText.contains("9.0 = boost")); // 4.5 (query) * 2.0 (field)
    assertTrue(explainText.contains("4 = docFreq")); // 4 * df of f1:v1
    assertTrue(explainText.contains("2.0 = freq")); // don't use tf

    indexReader.close();
    directory.close();
    analyzer.close();

}
 
源代码9 项目: querqy   文件: SimilarityTermQueryBuilderTest.java
@Test
public void testCreateWeight() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);


    TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    Term term = new Term("f1", "v1");

    SimilarityTermQuery query = new SimilarityTermQueryBuilder().createTermQuery(term, fieldBoost2);

    TopDocs topDocs = indexSearcher.search(query, 10);

    final Weight weight = query.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();

    assertTrue(explainText.contains("9.0 = boost")); // 4.5 (query) * 2.0 (field)
    assertTrue(explainText.contains("4 = docFreq")); // 4 * v1
    assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field

    indexReader.close();
    directory.close();
    analyzer.close();

}
 
源代码10 项目: querqy   文件: DependentTermQueryBuilderTest.java
@Test
public void testPostingsVsMaxScore() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);

    TestUtil.addNumDocsWithTextField("f1", "v1", indexWriter, 1);
    TestUtil.addNumDocsWithTextField("f2", "v1 v2", indexWriter, 1);


    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection();

    Term qTerm1 = new Term("f2", "v1");
    Term qTerm2 = new Term("f2", "v2");
    dfc.newClause();
    dfc.prepareTerm(qTerm1);
    dfc.newClause();
    dfc.prepareTerm(qTerm2);
    dfc.finishedUserQuery();


    DependentTermQueryBuilder.DependentTermQuery query1 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm1, fieldBoost1);
    DependentTermQueryBuilder.DependentTermQuery query2 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm2, fieldBoost2);


    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(query1, BooleanClause.Occur.SHOULD);
    builder.add(query2, BooleanClause.Occur.SHOULD);
    builder.setMinimumNumberShouldMatch(2);

    BooleanQuery bq = builder.build();

    // Query execution will call org.apache.lucene.search.Scorer.getMaxScore which might consume
    // the postingsEnum so that we don't get any hit
    TopDocs topDocs = indexSearcher.search(bq, 10);
    assertEquals(1, topDocs.scoreDocs.length);



}