下面列出了org.apache.lucene.index.IndexWriterConfig#setSimilarity ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public void testReallyNoNormsForDrillDown() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(new PerFieldSimilarityWrapper() {
final Similarity sim = new ClassicSimilarity();
@Override
public Similarity get(String name) {
assertEquals("field", name);
return sim;
}
});
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.add(newTextField("field", "text", Field.Store.NO));
doc.add(new FacetField("a", "path"));
writer.addDocument(config.build(taxoWriter, doc));
writer.close();
IOUtils.close(taxoWriter, dir, taxoDir);
}
private IndexWriterConfig getIndexWriterConfig() {
final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
iwc.setCommitOnClose(false); // we by default don't commit on close
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
// with tests.verbose, lucene sets this up: plumb to align with filesystem stream
boolean verbose = false;
try {
verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
} catch (Exception ignore) {
}
iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
iwc.setMergeScheduler(mergeScheduler);
// Give us the opportunity to upgrade old segments while performing
// background merges
MergePolicy mergePolicy = config().getMergePolicy();
// always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
if (softDeleteEnabled) {
mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
}
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
iwc.setSimilarity(engineConfig.getSimilarity());
iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
iwc.setCodec(engineConfig.getCodec());
iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
if (config().getIndexSort() != null) {
iwc.setIndexSort(config().getIndexSort());
}
return iwc;
}
public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException {
IndexSchema schema = core.getLatestSchema();
IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
if (maxBufferedDocs != -1)
iwc.setMaxBufferedDocs(maxBufferedDocs);
if (ramBufferSizeMB != -1)
iwc.setRAMBufferSizeMB(ramBufferSizeMB);
if (ramPerThreadHardLimitMB != -1) {
iwc.setRAMPerThreadHardLimitMB(ramPerThreadHardLimitMB);
}
iwc.setSimilarity(schema.getSimilarity());
MergePolicy mergePolicy = buildMergePolicy(core.getResourceLoader(), schema);
iwc.setMergePolicy(mergePolicy);
MergeScheduler mergeScheduler = buildMergeScheduler(core.getResourceLoader());
iwc.setMergeScheduler(mergeScheduler);
iwc.setInfoStream(infoStream);
if (mergePolicy instanceof SortingMergePolicy) {
Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort();
iwc.setIndexSort(indexSort);
}
iwc.setUseCompoundFile(useCompoundFile);
if (mergedSegmentWarmerInfo != null) {
// TODO: add infostream -> normal logging system (there is an issue somewhere)
@SuppressWarnings({"rawtypes"})
IndexReaderWarmer warmer = core.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
IndexReaderWarmer.class,
null,
new Class[] { InfoStream.class },
new Object[] { iwc.getInfoStream() });
iwc.setMergedSegmentWarmer(warmer);
}
return iwc;
}
public Lucene(Path path) throws IOException {
/* Setup Lucene */
Directory dir = FSDirectory.open(path);
// here we are using a standard analyzer, there are a lot of analyzers available to our use.
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
//this mode by default overwrites the previous index, not a very good option in real usage
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
iwc.setSimilarity(new BM25Similarity());
index = new IndexWriter(dir, iwc);
}
@Test
public void testThatQueryUsesTermButNoFieldBoost() throws Exception {
Analyzer analyzer = new StandardAnalyzer();
Directory directory = new ByteBuffersDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setSimilarity(new ClassicSimilarity());
IndexWriter indexWriter = new IndexWriter(directory, config);
TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4);
TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
indexSearcher.setSimilarity(new ClassicSimilarity());
final TermQuery termQuery = new LuceneTermQueryBuilder()
.createTermQuery(new Term("f1", "v1"), new ConstantFieldBoost(3f));
final Term term = termQuery.getTerm();
assertEquals("f1", term.field());
assertEquals("v1", term.text());
TopDocs topDocs = indexSearcher.search(termQuery, 10);
final Weight weight = termQuery.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc);
String explainText = explain.toString();
assertTrue(explainText.contains("4.5 = boost")); // 4.5 (query) but ignore field boost
assertTrue(explainText.contains("4 = docFreq")); // 4 * v1
assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field
}
/**
* Method produces {@link IndexWriterConfig} using settings.
*
* @return
*/
private IndexWriterConfig getIndexWriterConfig() {
IndexWriterConfig cnf = new IndexWriterConfig(analyzer);
cnf.setSimilarity(similarity);
return cnf;
}
public LuceneTranslationMemory(Directory directory, DocumentBuilder documentBuilder, QueryBuilder queryBuilder, Rescorer rescorer, AnalyzerFactory analyzerFactory, int minQuerySize) throws IOException {
this.indexDirectory = directory;
this.queryBuilder = queryBuilder;
this.rescorer = rescorer;
this.documentBuilder = documentBuilder;
this.analyzerFactory = analyzerFactory;
this.shortQueryAnalyzer = analyzerFactory.createShortQueryAnalyzer();
this.longQueryAnalyzer = analyzerFactory.createLongQueryAnalyzer();
this.minQuerySize = minQuerySize;
// Index writer setup
IndexWriterConfig indexConfig = new IndexWriterConfig(Version.LUCENE_4_10_4, new DelegatingAnalyzerWrapper(PER_FIELD_REUSE_STRATEGY) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
if (documentBuilder.isHashField(fieldName))
return analyzerFactory.createHashAnalyzer();
else
return analyzerFactory.createContentAnalyzer();
}
});
indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
indexConfig.setSimilarity(analyzerFactory.createSimilarity());
this.indexWriter = new IndexWriter(this.indexDirectory, indexConfig);
// Ensure index exists
if (!DirectoryReader.indexExists(directory))
this.indexWriter.commit();
// Read channels status
IndexSearcher searcher = this.getIndexSearcher();
Query query = this.queryBuilder.getChannels(this.documentBuilder);
TopDocs docs = searcher.search(query, 1);
if (docs.scoreDocs.length > 0) {
Document channelsDocument = searcher.doc(docs.scoreDocs[0].doc);
this.channels = this.documentBuilder.asChannels(channelsDocument);
} else {
this.channels = new HashMap<>();
}
}
@Test
public void testCreateWeight() throws Exception {
Analyzer analyzer = new StandardAnalyzer();
Directory directory = new ByteBuffersDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setSimilarity(new ClassicSimilarity());
IndexWriter indexWriter = new IndexWriter(directory, config);
TestUtil.addNumDocsWithTextField("f1", "v1", indexWriter, 4);
TestUtil.addNumDocsWithTextField("f2", "v1 v1", indexWriter, 1);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
indexSearcher.setSimilarity(new ClassicSimilarity());
DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection();
Term qTerm1 = new Term("f1", "v1");
Term qTerm2 = new Term("f2", "v1");
dfc.newClause();
dfc.prepareTerm(qTerm1);
dfc.prepareTerm(qTerm2);
dfc.finishedUserQuery();
DependentTermQueryBuilder.DependentTermQuery query1 = new DependentTermQueryBuilder(dfc)
.createTermQuery(qTerm1, fieldBoost1);
DependentTermQueryBuilder.DependentTermQuery query2 = new DependentTermQueryBuilder(dfc)
.createTermQuery(qTerm2, fieldBoost2);
TopDocs topDocs = indexSearcher.search(query2, 10);
final Weight weight2 = query2.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
final Explanation explain = weight2.explain(indexReader.leaves().get(0), topDocs.scoreDocs[0].doc);
String explainText = explain.toString();
assertTrue(explainText.contains("9.0 = boost")); // 4.5 (query) * 2.0 (field)
assertTrue(explainText.contains("4 = docFreq")); // 4 * df of f1:v1
assertTrue(explainText.contains("2.0 = freq")); // don't use tf
indexReader.close();
directory.close();
analyzer.close();
}
@Test
public void testCreateWeight() throws Exception {
Analyzer analyzer = new StandardAnalyzer();
Directory directory = new ByteBuffersDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setSimilarity(new ClassicSimilarity());
IndexWriter indexWriter = new IndexWriter(directory, config);
TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4);
TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
indexSearcher.setSimilarity(new ClassicSimilarity());
Term term = new Term("f1", "v1");
SimilarityTermQuery query = new SimilarityTermQueryBuilder().createTermQuery(term, fieldBoost2);
TopDocs topDocs = indexSearcher.search(query, 10);
final Weight weight = query.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc);
String explainText = explain.toString();
assertTrue(explainText.contains("9.0 = boost")); // 4.5 (query) * 2.0 (field)
assertTrue(explainText.contains("4 = docFreq")); // 4 * v1
assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field
indexReader.close();
directory.close();
analyzer.close();
}
@Test
public void testPostingsVsMaxScore() throws Exception {
Analyzer analyzer = new StandardAnalyzer();
Directory directory = new ByteBuffersDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setSimilarity(new ClassicSimilarity());
IndexWriter indexWriter = new IndexWriter(directory, config);
TestUtil.addNumDocsWithTextField("f1", "v1", indexWriter, 1);
TestUtil.addNumDocsWithTextField("f2", "v1 v2", indexWriter, 1);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
indexSearcher.setSimilarity(new ClassicSimilarity());
DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection();
Term qTerm1 = new Term("f2", "v1");
Term qTerm2 = new Term("f2", "v2");
dfc.newClause();
dfc.prepareTerm(qTerm1);
dfc.newClause();
dfc.prepareTerm(qTerm2);
dfc.finishedUserQuery();
DependentTermQueryBuilder.DependentTermQuery query1 = new DependentTermQueryBuilder(dfc)
.createTermQuery(qTerm1, fieldBoost1);
DependentTermQueryBuilder.DependentTermQuery query2 = new DependentTermQueryBuilder(dfc)
.createTermQuery(qTerm2, fieldBoost2);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(query1, BooleanClause.Occur.SHOULD);
builder.add(query2, BooleanClause.Occur.SHOULD);
builder.setMinimumNumberShouldMatch(2);
BooleanQuery bq = builder.build();
// Query execution will call org.apache.lucene.search.Scorer.getMaxScore which might consume
// the postingsEnum so that we don't get any hit
TopDocs topDocs = indexSearcher.search(bq, 10);
assertEquals(1, topDocs.scoreDocs.length);
}