下面列出了怎么用org.apache.lucene.search.similarities.Similarity的API类实例代码及写法,或者点击链接到github查看源代码。
public SimpleQueryParallelCall(AtomicBoolean running, String table, QueryStatus status, Query query,
Selector selector, Meter queriesInternalMeter, ShardServerContext shardServerContext, boolean runSlow,
int fetchCount, int maxHeapPerRowFetch, Similarity similarity, TableContext context, Sort sort,
DeepPagingCache deepPagingCache, MemoryAllocationWatcher memoryAllocationWatcher) {
_running = running;
_table = table;
_status = status;
_query = query;
_selector = selector;
_queriesInternalMeter = queriesInternalMeter;
_shardServerContext = shardServerContext;
_runSlow = runSlow;
_fetchCount = fetchCount;
_maxHeapPerRowFetch = maxHeapPerRowFetch;
_similarity = similarity;
_context = context;
_sort = sort;
_deepPagingCache = deepPagingCache;
_memoryAllocationWatcher = memoryAllocationWatcher;
}
/**
* whitebox checks the Similarity for the specified field according to {@link SolrCore#getLatestSchema}
*
* Executes each of the specified Similarity-accepting validators.
*/
@SafeVarargs
@SuppressWarnings({"unchecked", "varargs"})
private static <T extends Similarity> void assertFieldSimilarity(String fieldname, Class<T> expected, Consumer<T>... validators) {
CoreContainer cc = jetty.getCoreContainer();
try (SolrCore core = cc.getCore("collection1")) {
SimilarityFactory simfac = core.getLatestSchema().getSimilarityFactory();
assertNotNull(simfac);
assertTrue("test only works with SchemaSimilarityFactory",
simfac instanceof SchemaSimilarityFactory);
Similarity mainSim = core.getLatestSchema().getSimilarity();
assertNotNull(mainSim);
// sanity check simfac vs sim in use - also verify infom called on simfac, otherwise exception
assertEquals(mainSim, simfac.getSimilarity());
assertTrue("test only works with PerFieldSimilarityWrapper, SchemaSimilarityFactory redefined?",
mainSim instanceof PerFieldSimilarityWrapper);
Similarity fieldSim = ((PerFieldSimilarityWrapper)mainSim).get(fieldname);
assertEquals("wrong sim for field=" + fieldname, expected, fieldSim.getClass());
Arrays.asList(validators).forEach(v -> v.accept((T)fieldSim));
}
}
public void testNorm() throws Exception {
Similarity saved = searcher.getSimilarity();
try {
// no norm field (so agnostic to indexed similarity)
searcher.setSimilarity(new ClassicSimilarity());
ValueSource vs = new NormValueSource("byte");
assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });
// regardless of whether norms exist, value source exists == 0
assertAllExist(vs);
vs = new NormValueSource("text");
assertAllExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
public void testTF() throws Exception {
Similarity saved = searcher.getSimilarity();
try {
// no norm field (so agnostic to indexed similarity)
searcher.setSimilarity(new ClassicSimilarity());
ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test"));
assertHits(new FunctionQuery(vs),
new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) });
assertAllExist(vs);
vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"));
assertHits(new FunctionQuery(vs), new float[] { 0f, 1f });
assertAllExist(vs);
// regardless of whether norms exist, value source exists == 0
vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus"));
assertHits(new FunctionQuery(vs), new float[] { 0F, 0F });
assertAllExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
static SimilarityFactory readSimilarity(SolrResourceLoader loader, Node node) {
if (node==null) {
return null;
} else {
SimilarityFactory similarityFactory;
final String classArg = ((Element) node).getAttribute(SimilarityFactory.CLASS_NAME);
final Object obj = loader.newInstance(classArg, Object.class, "search.similarities.");
if (obj instanceof SimilarityFactory) {
// configure a factory, get a similarity back
final NamedList<Object> namedList = DOMUtil.childNodesToNamedList(node);
namedList.add(SimilarityFactory.CLASS_NAME, classArg);
SolrParams params = namedList.toSolrParams();
similarityFactory = (SimilarityFactory)obj;
similarityFactory.init(params);
} else {
// just like always, assume it's a Similarity and get a ClassCastException - reasonable error handling
similarityFactory = new SimilarityFactory() {
@Override
public Similarity getSimilarity() {
return (Similarity) obj;
}
};
}
return similarityFactory;
}
}
public void testReallyNoNormsForDrillDown() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(new PerFieldSimilarityWrapper() {
final Similarity sim = new ClassicSimilarity();
@Override
public Similarity get(String name) {
assertEquals("field", name);
return sim;
}
});
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.add(newTextField("field", "text", Field.Store.NO));
doc.add(new FacetField("a", "path"));
writer.addDocument(config.build(taxoWriter, doc));
writer.close();
IOUtils.close(taxoWriter, dir, taxoDir);
}
/**
* Creates a {@link KNearestNeighborClassifier}.
*
* @param indexReader the reader on the index to be used for classification
* @param analyzer an {@link Analyzer} used to analyze unseen text
* @param similarity the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
* (defaults to {@link org.apache.lucene.search.similarities.BM25Similarity})
* @param query a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
* if all the indexed docs should be used
* @param k the no. of docs to select in the MLT results to find the nearest neighbor
* @param minDocsFreq {@link MoreLikeThis#minDocFreq} parameter
* @param minTermFreq {@link MoreLikeThis#minTermFreq} parameter
* @param classFieldName the name of the field used as the output for the classifier
* @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
*/
public KNearestNeighborClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k, int minDocsFreq,
int minTermFreq, String classFieldName, String... textFieldNames) {
this.textFieldNames = textFieldNames;
this.classFieldName = classFieldName;
this.mlt = new MoreLikeThis(indexReader);
this.mlt.setAnalyzer(analyzer);
this.mlt.setFieldNames(textFieldNames);
this.indexSearcher = new IndexSearcher(indexReader);
if (similarity != null) {
this.indexSearcher.setSimilarity(similarity);
} else {
this.indexSearcher.setSimilarity(new BM25Similarity());
}
if (minDocsFreq > 0) {
mlt.setMinDocFreq(minDocsFreq);
}
if (minTermFreq > 0) {
mlt.setMinTermFreq(minTermFreq);
}
this.query = query;
this.k = k;
}
public static final MatcherFactory<ScoringMatch> matchWithSimilarity(Similarity similarity) {
return searcher -> {
searcher.setSimilarity(similarity);
return new CollectingMatcher<ScoringMatch>(searcher, ScoreMode.COMPLETE) {
@Override
protected ScoringMatch doMatch(String queryId, int doc, Scorable scorer) throws IOException {
float score = scorer.score();
if (score > 0)
return new ScoringMatch(queryId, score);
return null;
}
@Override
public ScoringMatch resolve(ScoringMatch match1, ScoringMatch match2) {
return new ScoringMatch(match1.getQueryId(), match1.getScore() + match2.getScore());
}
};
};
}
@Override
public SpanScorer scorer(LeafReaderContext context) throws IOException {
if (field == null)
return null;
Terms terms = context.reader().terms(field);
if (terms != null && !terms.hasPositions()) {
throw new IllegalStateException("field \"" + field +
"\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")");
}
final Spans spans = getSpans(context, Postings.PAYLOADS);
if (spans == null) {
return null;
}
final Similarity.SimScorer docScorer = getSimScorer(context);
return new SpanScorer(this, spans, docScorer);
}
@Inject
public SimilarityService(Index index, IndexSettingsService indexSettingsService,
final SimilarityLookupService similarityLookupService, final MapperService mapperService) {
super(index, indexSettingsService.getSettings());
this.similarityLookupService = similarityLookupService;
this.mapperService = mapperService;
Similarity defaultSimilarity = similarityLookupService.similarity(SimilarityLookupService.DEFAULT_SIMILARITY).get();
// Expert users can configure the base type as being different to default, but out-of-box we use default.
Similarity baseSimilarity = (similarityLookupService.similarity("base") != null) ? similarityLookupService.similarity("base").get() :
defaultSimilarity;
this.perFieldSimilarity = (mapperService != null) ? new PerFieldSimilarity(defaultSimilarity, baseSimilarity, mapperService) :
defaultSimilarity;
}
public static Query joinUtilHelper(String parentType, ParentChildIndexFieldData parentChildIndexFieldData, Similarity similarity, Query toQuery, ScoreType scoreType, Query innerQuery, int minChildren, int maxChildren) throws IOException {
ScoreMode scoreMode;
// TODO: move entirely over from ScoreType to org.apache.lucene.join.ScoreMode, when we drop the 1.x parent child code.
switch (scoreType) {
case NONE:
scoreMode = ScoreMode.None;
break;
case MIN:
scoreMode = ScoreMode.Min;
break;
case MAX:
scoreMode = ScoreMode.Max;
break;
case SUM:
scoreMode = ScoreMode.Total;
break;
case AVG:
scoreMode = ScoreMode.Avg;
break;
default:
throw new UnsupportedOperationException("score type [" + scoreType + "] not supported");
}
// 0 in pre 2.x p/c impl means unbounded
if (maxChildren == 0) {
maxChildren = Integer.MAX_VALUE;
}
return new LateParsingQuery(toQuery, innerQuery, minChildren, maxChildren, parentType, scoreMode, parentChildIndexFieldData, similarity);
}
LateParsingQuery(Query toQuery, Query innerQuery, int minChildren, int maxChildren, String parentType, ScoreMode scoreMode, ParentChildIndexFieldData parentChildIndexFieldData, Similarity similarity) {
this.toQuery = toQuery;
this.innerQuery = innerQuery;
this.minChildren = minChildren;
this.maxChildren = maxChildren;
this.parentType = parentType;
this.scoreMode = scoreMode;
this.parentChildIndexFieldData = parentChildIndexFieldData;
this.similarity = similarity;
}
/**
* Creates a new {@link org.elasticsearch.index.engine.EngineConfig}
*/
public EngineConfig(ShardId shardId, ThreadPool threadPool, ShardIndexingService indexingService,
Settings indexSettings, IndicesWarmer warmer, Store store, SnapshotDeletionPolicy deletionPolicy,
MergePolicy mergePolicy, MergeSchedulerConfig mergeSchedulerConfig, Analyzer analyzer,
Similarity similarity, CodecService codecService, Engine.FailedEngineListener failedEngineListener,
TranslogRecoveryPerformer translogRecoveryPerformer, QueryCache queryCache, QueryCachingPolicy queryCachingPolicy, IndexSearcherWrappingService wrappingService, TranslogConfig translogConfig) {
this.shardId = shardId;
this.indexSettings = indexSettings;
this.threadPool = threadPool;
this.indexingService = indexingService;
this.warmer = warmer;
this.store = store;
this.deletionPolicy = deletionPolicy;
this.mergePolicy = mergePolicy;
this.mergeSchedulerConfig = mergeSchedulerConfig;
this.analyzer = analyzer;
this.similarity = similarity;
this.codecService = codecService;
this.failedEngineListener = failedEngineListener;
this.wrappingService = wrappingService;
this.optimizeAutoGenerateId = indexSettings.getAsBoolean(EngineConfig.INDEX_OPTIMIZE_AUTOGENERATED_ID_SETTING, false);
this.compoundOnFlush = indexSettings.getAsBoolean(EngineConfig.INDEX_COMPOUND_ON_FLUSH, compoundOnFlush);
codecName = indexSettings.get(EngineConfig.INDEX_CODEC_SETTING, EngineConfig.DEFAULT_CODEC_NAME);
// We start up inactive and rely on IndexingMemoryController to give us our fair share once we start indexing:
indexingBufferSize = IndexingMemoryController.INACTIVE_SHARD_INDEXING_BUFFER;
gcDeletesInMillis = indexSettings.getAsTime(INDEX_GC_DELETES_SETTING, EngineConfig.DEFAULT_GC_DELETES).millis();
versionMapSizeSetting = indexSettings.get(INDEX_VERSION_MAP_SIZE, DEFAULT_VERSION_MAP_SIZE);
updateVersionMapSize();
this.translogRecoveryPerformer = translogRecoveryPerformer;
this.forceNewTranslog = indexSettings.getAsBoolean(INDEX_FORCE_NEW_TRANSLOG, false);
this.queryCache = queryCache;
this.queryCachingPolicy = queryCachingPolicy;
this.translogConfig = translogConfig;
}
/** Sole constructor. */
public PhraseCountScorer(CustomSpanWeight weight, Spans spans, Similarity.SimScorer docScorer, boolean weightedCount) {
super(weight);
this.spans = Objects.requireNonNull(spans);
this.docScorer = docScorer;
this.weightedCount = weightedCount;
}
private static SimpleOrderedMap<Object> getSimilarityInfo(
Similarity similarity) {
SimpleOrderedMap<Object> toReturn = new SimpleOrderedMap<>();
if (similarity != null) {
toReturn.add("className", similarity.getClass().getName());
toReturn.add("details", similarity.toString());
}
return toReturn;
}
@Override
public Similarity getInstance(Map<String, String> params) throws IOException {
String dict = params.get("dict");
String normLowerBound = params.get("norm");
Similarity similarity;
if (Strings.isNullOrEmpty(normLowerBound)) {
similarity = new LindenSimilarity(IDFManager.createInstance(dict));
} else {
similarity = new LindenSimilarity(IDFManager.createInstance(dict), Float.parseFloat(normLowerBound));
}
return similarity;
}
/** bm25 with parameters */
public void testParameters() throws Exception {
Similarity sim = getSimilarity("text_params");
assertEquals(BM25Similarity.class, sim.getClass());
BM25Similarity bm25 = (BM25Similarity) sim;
assertEquals(1.2f, bm25.getK1(), 0.01f);
assertEquals(0.76f, bm25.getB(), 0.01f);
}
public Explanation explain(Similarity similarity, Query query, int doc) {
if (!isMatched(doc)) {
return null;
}
ComplexExplanation result = new ComplexExplanation();
result.setDescription(
"weight(" + query + " in " + doc + ") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(matchedFreq, "termFreq=" + matchedFreq));
result.addDetail(scoreExplanation);
result.setValue(scoreExplanation.getValue());
result.setMatch(true);
return result;
}
/** jelinek-mercer with parameters */
public void testParameters() throws Exception {
Similarity sim = getSimilarity("text_params");
assertEquals(LMJelinekMercerSimilarity.class, sim.getClass());
LMJelinekMercerSimilarity lm = (LMJelinekMercerSimilarity) sim;
assertEquals(0.4f, lm.getLambda(), 0.01f);
}
/**
* Set the Similarity to be used for calculating field norms
*/
public void setSimilarity(Similarity similarity) {
if (frozen)
throw new IllegalArgumentException("Cannot set Similarity when MemoryIndex is frozen");
if (this.normSimilarity == similarity)
return;
this.normSimilarity = similarity;
//invalidate any cached norms that may exist
for (Info info : fields.values()) {
info.norm = null;
}
}
@Test
public void testSimilarities() throws IOException {
MemoryIndex mi = new MemoryIndex();
mi.addField("f1", "a long text field that contains many many terms", analyzer);
IndexSearcher searcher = mi.createSearcher();
LeafReader reader = (LeafReader) searcher.getIndexReader();
NumericDocValues norms = reader.getNormValues("f1");
assertEquals(0, norms.nextDoc());
float n1 = norms.longValue();
// Norms are re-computed when we change the Similarity
mi.setSimilarity(new Similarity() {
@Override
public long computeNorm(FieldInvertState state) {
return 74;
}
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new UnsupportedOperationException();
}
});
norms = reader.getNormValues("f1");
assertEquals(0, norms.nextDoc());
float n2 = norms.longValue();
assertTrue(n1 != n2);
TestUtil.checkReader(reader);
}
private Similarity createSimilarity(SimilarityConfig config) {
Similarity similarity;
if (config.isUseClassicSimilarity()) {
ClassicSimilarity tfidf = new ClassicSimilarity();
tfidf.setDiscountOverlaps(config.isDiscountOverlaps());
similarity = tfidf;
} else {
BM25Similarity bm25 = new BM25Similarity(config.getK1(), config.getB());
bm25.setDiscountOverlaps(config.isDiscountOverlaps());
similarity = bm25;
}
return similarity;
}
static TFIDFSimilarity asTFIDF(Similarity sim, String field) {
while (sim instanceof PerFieldSimilarityWrapper) {
sim = ((PerFieldSimilarityWrapper)sim).get(field);
}
if (sim instanceof TFIDFSimilarity) {
return (TFIDFSimilarity)sim;
} else {
return null;
}
}
@Override
public Similarity.SimScorer scorer(float weight) {
return new Similarity.SimScorer() {
@Override
public float score(float freq, long norm) {
// should be f / (f + k) but we rewrite it to
// 1 - k / (f + k) to make sure it doesn't decrease
// with f in spite of rounding
return weight * (1.0f - pivot / (pivot + freq));
}
};
}
@Override
public Similarity.SimScorer scorer(float weight) {
return new Similarity.SimScorer() {
@Override
public float score(float freq, long norm) {
// should be f^a / (f^a + k^a) but we rewrite it to
// 1 - k^a / (f + k^a) to make sure it doesn't decrease
// with f in spite of rounding
return (float) (weight * (1.0f - pivotPa / (Math.pow(freq, a) + pivotPa)));
}
};
}
public void testIDF() throws Exception {
Similarity saved = searcher.getSimilarity();
try {
searcher.setSimilarity(new ClassicSimilarity());
ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
assertHits(new FunctionQuery(vs), new float[] { 1.0f, 1.0f });
assertAllExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
public void testNorm() throws Exception {
Similarity saved = searcher.getSimilarity();
try {
// no norm field (so agnostic to indexed similarity)
searcher.setSimilarity(sim);
assertHits(new FunctionQuery(
new NormValueSource("text")),
new float[] { 0f, 0f });
} finally {
searcher.setSimilarity(saved);
}
}
/**
* Creates a {@link KNearestFuzzyClassifier}.
*
* @param indexReader the reader on the index to be used for classification
* @param analyzer an {@link Analyzer} used to analyze unseen text
* @param similarity the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
* (defaults to {@link BM25Similarity})
* @param query a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
* if all the indexed docs should be used
* @param k the no. of docs to select in the MLT results to find the nearest neighbor
* @param classFieldName the name of the field used as the output for the classifier
* @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
*/
public KNearestFuzzyClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k,
String classFieldName, String... textFieldNames) {
this.textFieldNames = textFieldNames;
this.classFieldName = classFieldName;
this.analyzer = analyzer;
this.indexSearcher = new IndexSearcher(indexReader);
if (similarity != null) {
this.indexSearcher.setSimilarity(similarity);
} else {
this.indexSearcher.setSimilarity(new BM25Similarity());
}
this.query = query;
this.k = k;
}
/** bm25 with parameters */
public void testParameters() throws Exception {
Similarity sim = getSimilarity("legacy_text_params");
assertEquals(LegacyBM25Similarity.class, sim.getClass());
LegacyBM25Similarity bm25 = (LegacyBM25Similarity) sim;
assertEquals(1.2f, bm25.getK1(), 0.01f);
assertEquals(0.76f, bm25.getB(), 0.01f);
}
/** dfr with parametrized normalization */
public void testParameters() throws Exception {
Similarity sim = getSimilarity("text_params");
assertEquals(DFRSimilarity.class, sim.getClass());
DFRSimilarity dfr = (DFRSimilarity) sim;
assertEquals(BasicModelIF.class, dfr.getBasicModel().getClass());
assertEquals(AfterEffectB.class, dfr.getAfterEffect().getClass());
assertEquals(NormalizationH3.class, dfr.getNormalization().getClass());
NormalizationH3 norm = (NormalizationH3) dfr.getNormalization();
assertEquals(900f, norm.getMu(), 0.01f);
}