下面列出了怎么用org.apache.lucene.search.suggest.Lookup的API类实例代码及写法,或者点击链接到github查看源代码。
private static void lookup(AnalyzingInfixSuggester suggester, String keyword,
String region) throws IOException {
HashSet<BytesRef> contexts = new HashSet<BytesRef>();
contexts.add(new BytesRef(region.getBytes("UTF8")));
//先以contexts为过滤条件进行过滤,再以keyword为关键字进行筛选,根据weight值排序返回前2条
//第3个布尔值即是否每个Term都要匹配,第4个参数表示是否需要关键字高亮
List<Lookup.LookupResult> results = suggester.lookup(keyword, 2, true, false);
System.out.println("-- \"" + keyword + "\" (" + region + "):");
for (Lookup.LookupResult result : results) {
System.out.println(result.key);
//从payload中反序列化出Product对象
BytesRef bytesRef = result.payload;
InputStream is = Tools.bytes2InputStream(bytesRef.bytes);
Product product = (Product) Tools.deSerialize(is);
System.out.println("product-Name:" + product.getName());
System.out.println("product-regions:" + product.getRegions());
System.out.println("product-image:" + product.getImage());
System.out.println("product-numberSold:" + product.getNumberSold());
}
System.out.println();
}
@Override
public int compare(Lookup.LookupResult o1, Lookup.LookupResult o2) {
// order on weight
if (o1.value > o2.value) {
return 1;
} else if (o1.value < o2.value) {
return -1;
}
// otherwise on alphabetic order
int keyCompare = CHARSEQUENCE_COMPARATOR.compare(o1.key, o2.key);
if (keyCompare != 0) {
return keyCompare;
}
// if same weight and title, use the payload if there is one
if (o1.payload != null) {
return o1.payload.compareTo(o2.payload);
}
return 0;
}
private List<Lookup.LookupResult> duplicateCheck(Input[] inputs, int expectedSuggestionCount) throws IOException {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newDirectory(), a, a, AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,10, false);
InputArrayIterator inputArrayIterator = new InputArrayIterator(inputs);
suggester.build(inputArrayIterator);
List<Lookup.LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(expectedSuggestionCount, results.size());
suggester.close();
a.close();
return results;
}
@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
Object fieldTypeName = params.get(QUERY_ANALYZER);
if (fieldTypeName == null) {
throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
}
FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
if (ft == null) {
throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
}
Analyzer indexAnalyzer = ft.getIndexAnalyzer();
Analyzer queryAnalyzer = ft.getQueryAnalyzer();
int grams = (params.get(NGRAMS) != null)
? Integer.parseInt(params.get(NGRAMS).toString())
: FreeTextSuggester.DEFAULT_GRAMS;
byte separator = (params.get(SEPARATOR) != null)
? params.get(SEPARATOR).toString().getBytes(StandardCharsets.UTF_8)[0]
: FreeTextSuggester.DEFAULT_SEPARATOR;
return new FreeTextSuggester(indexAnalyzer, queryAnalyzer, grams, separator);
}
/**
* Add an element to the tree respecting a size limit
*
* @param results the tree to add in
* @param result the result we try to add
* @param num size limit
*/
private static void boundedTreeAdd(TreeSet<Lookup.LookupResult> results, Lookup.LookupResult result, int num) {
if (results.size() >= num) {
if (results.first().value < result.value) {
results.pollFirst();
} else {
return;
}
}
results.add(result);
}
@Override
protected boolean lessThan(SuggestScoreDoc a, SuggestScoreDoc b) {
if (a.score == b.score) {
// tie break by completion key
int cmp = Lookup.CHARSEQUENCE_COMPARATOR.compare(a.key, b.key);
// prefer smaller doc id, in case of a tie
return cmp != 0 ? cmp > 0 : a.doc > b.doc;
}
return a.score < b.score;
}
public void /*testT*/rying() throws IOException {
BytesRef lake = new BytesRef("lake");
BytesRef star = new BytesRef("star");
BytesRef ret = new BytesRef("ret");
Input keys[] = new Input[]{
new Input("top of the lake", 15, lake),
new Input("star wars: episode v - the empire strikes back", 12, star),
new Input("the returned", 10, ret),
};
Path tempDir = createTempDir("BlendedInfixSuggesterTest");
Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
// if factor is small, we don't get the expected element
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a, a,
AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS,
BlendedInfixSuggester.BlenderType.POSITION_RECIPROCAL,
BlendedInfixSuggester.DEFAULT_NUM_FACTOR, false);
suggester.build(new InputArrayIterator(keys));
List<Lookup.LookupResult> responses = suggester.lookup("the", 4, true, false);
for (Lookup.LookupResult response : responses) {
System.out.println(response);
}
suggester.close();
}
private static long getInResults(BlendedInfixSuggester suggester, String prefix, BytesRef payload, int num) throws IOException {
List<Lookup.LookupResult> responses = suggester.lookup(prefix, num, true, false);
for (Lookup.LookupResult response : responses) {
if (response.payload.equals(payload)) {
return response.value;
}
}
return -1;
}
@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null
? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString())
: true;
return new WFSTCompletionLookup(getTempDir(), "suggester", exactMatchFirst);
}
@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
int buckets = params.get(WEIGHT_BUCKETS) != null
? Integer.parseInt(params.get(WEIGHT_BUCKETS).toString())
: 10;
boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null
? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString())
: true;
return new FSTCompletionLookup(getTempDir(), "suggester", buckets, exactMatchFirst);
}
public Lookup getLookup(CompletionFieldMapper.CompletionFieldType mapper, CompletionSuggestionContext suggestionContext) {
return lookup.getLookup(mapper, suggestionContext);
}
@Override
public List<Lookup.LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) throws IOException {
// Don't * numFactor here since we do it down below, once, in the call chain:
return super.lookup(key, contexts, onlyMorePopular, num);
}
@Override
public List<Lookup.LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
// Don't * numFactor here since we do it down below, once, in the call chain:
return super.lookup(key, contexts, num, allTermsRequired, doHighlight);
}
@Override
public List<Lookup.LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
// Don't * numFactor here since we do it down below, once, in the call chain:
return super.lookup(key, contextInfo, num, allTermsRequired, doHighlight);
}
@Override
public List<Lookup.LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
/** We need to do num * numFactor here only because it is the last call in the lookup chain*/
return super.lookup(key, contextQuery, num * numFactor, allTermsRequired, doHighlight);
}
@Override
protected List<Lookup.LookupResult> createResults(IndexSearcher searcher, TopFieldDocs hits, int num, CharSequence key,
boolean doHighlight, Set<String> matchedTokens, String prefixToken)
throws IOException {
TreeSet<Lookup.LookupResult> results = new TreeSet<>(LOOKUP_COMP);
// we reduce the num to the one initially requested
int actualNum = num / numFactor;
for (int i = 0; i < hits.scoreDocs.length; i++) {
FieldDoc fd = (FieldDoc) hits.scoreDocs[i];
BinaryDocValues textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME);
assert textDV != null;
textDV.advance(fd.doc);
final String text = textDV.binaryValue().utf8ToString();
long weight = (Long) fd.fields[0];
// This will just be null if app didn't pass payloads to build():
// TODO: maybe just stored fields? they compress...
BinaryDocValues payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads");
BytesRef payload;
if (payloadsDV != null) {
if (payloadsDV.advance(fd.doc) == fd.doc) {
payload = BytesRef.deepCopyOf(payloadsDV.binaryValue());
} else {
payload = new BytesRef(BytesRef.EMPTY_BYTES);
}
} else {
payload = null;
}
double coefficient;
if (text.startsWith(key.toString())) {
// if hit starts with the key, we don't change the score
coefficient = 1;
} else {
coefficient = createCoefficient(searcher, fd.doc, matchedTokens, prefixToken);
}
if (weight == 0) {
weight = 1;
}
if (weight < 1 / LINEAR_COEF && weight > -1 / LINEAR_COEF) {
weight *= 1 / LINEAR_COEF;
}
long score = (long) (weight * coefficient);
LookupResult result;
if (doHighlight) {
result = new LookupResult(text, highlight(text, matchedTokens, prefixToken), score, payload);
} else {
result = new LookupResult(text, score, payload);
}
boundedTreeAdd(results, result, actualNum);
}
return new ArrayList<>(results.descendingSet());
}
@Override
public int compareTo(SuggestScoreDoc o) {
return Lookup.CHARSEQUENCE_COMPARATOR.compare(key, o.key);
}
public void testBlendedInfixSuggesterDedupsOnWeightTitleAndPayload() throws Exception {
//exactly same inputs
Input[] inputDocuments = new Input[] {
new Input("lend me your ear", 7, new BytesRef("uid1")),
new Input("lend me your ear", 7, new BytesRef("uid1")),
};
duplicateCheck(inputDocuments, 1);
// inputs differ on payload
inputDocuments = new Input[] {
new Input("lend me your ear", 7, new BytesRef("uid1")),
new Input("lend me your ear", 7, new BytesRef("uid2")),
};
duplicateCheck(inputDocuments, 2);
//exactly same input without payloads
inputDocuments = new Input[] {
new Input("lend me your ear", 7),
new Input("lend me your ear", 7),
};
duplicateCheck(inputDocuments, 1);
//Same input with first has payloads, second does not
inputDocuments = new Input[] {
new Input("lend me your ear", 7, new BytesRef("uid1")),
new Input("lend me your ear", 7),
};
duplicateCheck(inputDocuments, 2);
/**same input, first not having a payload, the second having payload
* we would expect 2 entries out but we are getting only 1 because
* the InputArrayIterator#hasPayloads() returns false because the first
* item has no payload, therefore, when ingested, none of the 2 input has payload and become 1
*/
inputDocuments = new Input[] {
new Input("lend me your ear", 7),
new Input("lend me your ear", 7, new BytesRef("uid2")),
};
List<Lookup.LookupResult> results = duplicateCheck(inputDocuments, 1);
assertNull(results.get(0).payload);
//exactly same inputs but different weight
inputDocuments = new Input[] {
new Input("lend me your ear", 1, new BytesRef("uid1")),
new Input("lend me your ear", 7, new BytesRef("uid1")),
};
duplicateCheck(inputDocuments, 2);
//exactly same inputs but different text
inputDocuments = new Input[] {
new Input("lend me your earings", 7, new BytesRef("uid1")),
new Input("lend me your ear", 7, new BytesRef("uid1")),
};
duplicateCheck(inputDocuments, 2);
}
@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
log.info("init: {}", params);
return new JaspellLookup();
}
@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
// mandatory parameter
Object fieldTypeName = params.get(AnalyzingLookupFactory.QUERY_ANALYZER);
if (fieldTypeName == null) {
throw new IllegalArgumentException("Error in configuration: " + AnalyzingLookupFactory.QUERY_ANALYZER + " parameter is mandatory");
}
// retrieve index and query analyzers for the field
FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
if (ft == null) {
throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
}
Analyzer indexAnalyzer = ft.getIndexAnalyzer();
Analyzer queryAnalyzer = ft.getQueryAnalyzer();
// optional parameters
boolean exactMatchFirst = (params.get(AnalyzingLookupFactory.EXACT_MATCH_FIRST) != null)
? Boolean.valueOf(params.get(AnalyzingLookupFactory.EXACT_MATCH_FIRST).toString())
: true;
boolean preserveSep = (params.get(AnalyzingLookupFactory.PRESERVE_SEP) != null)
? Boolean.valueOf(params.get(AnalyzingLookupFactory.PRESERVE_SEP).toString())
: true;
int options = 0;
if (exactMatchFirst) {
options |= FuzzySuggester.EXACT_FIRST;
}
if (preserveSep) {
options |= FuzzySuggester.PRESERVE_SEP;
}
int maxSurfaceFormsPerAnalyzedForm = (params.get(AnalyzingLookupFactory.MAX_SURFACE_FORMS) != null)
? Integer.parseInt(params.get(AnalyzingLookupFactory.MAX_SURFACE_FORMS).toString())
: 256;
int maxGraphExpansions = (params.get(AnalyzingLookupFactory.MAX_EXPANSIONS) != null)
? Integer.parseInt(params.get(AnalyzingLookupFactory.MAX_EXPANSIONS).toString())
: -1;
boolean preservePositionIncrements = params.get(AnalyzingLookupFactory.PRESERVE_POSITION_INCREMENTS) != null
? Boolean.valueOf(params.get(AnalyzingLookupFactory.PRESERVE_POSITION_INCREMENTS).toString())
: false;
int maxEdits = (params.get(MAX_EDITS) != null)
? Integer.parseInt(params.get(MAX_EDITS).toString())
: FuzzySuggester.DEFAULT_MAX_EDITS;
boolean transpositions = (params.get(TRANSPOSITIONS) != null)
? Boolean.parseBoolean(params.get(TRANSPOSITIONS).toString())
: FuzzySuggester.DEFAULT_TRANSPOSITIONS;
int nonFuzzyPrefix = (params.get(NON_FUZZY_PREFIX) != null)
? Integer.parseInt(params.get(NON_FUZZY_PREFIX).toString())
:FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX;
int minFuzzyLength = (params.get(MIN_FUZZY_LENGTH) != null)
? Integer.parseInt(params.get(MIN_FUZZY_LENGTH).toString())
:FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH;
boolean unicodeAware = (params.get(UNICODE_AWARE) != null)
? Boolean.valueOf(params.get(UNICODE_AWARE).toString())
: FuzzySuggester.DEFAULT_UNICODE_AWARE;
return new FuzzySuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm,
maxGraphExpansions, preservePositionIncrements, maxEdits, transpositions, nonFuzzyPrefix,
minFuzzyLength, unicodeAware);
}
@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
// mandatory parameter
Object fieldTypeName = params.get(QUERY_ANALYZER);
if (fieldTypeName == null) {
throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
}
FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
if (ft == null) {
throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
}
Analyzer indexAnalyzer = ft.getIndexAnalyzer();
Analyzer queryAnalyzer = ft.getQueryAnalyzer();
// optional parameters
boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null
? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString())
: true;
boolean preserveSep = params.get(PRESERVE_SEP) != null
? Boolean.valueOf(params.get(PRESERVE_SEP).toString())
: true;
int flags = 0;
if (exactMatchFirst) {
flags |= AnalyzingSuggester.EXACT_FIRST;
}
if (preserveSep) {
flags |= AnalyzingSuggester.PRESERVE_SEP;
}
int maxSurfaceFormsPerAnalyzedForm = params.get(MAX_SURFACE_FORMS) != null
? Integer.parseInt(params.get(MAX_SURFACE_FORMS).toString())
: 256;
int maxGraphExpansions = params.get(MAX_EXPANSIONS) != null
? Integer.parseInt(params.get(MAX_EXPANSIONS).toString())
: -1;
boolean preservePositionIncrements = params.get(PRESERVE_POSITION_INCREMENTS) != null
? Boolean.valueOf(params.get(PRESERVE_POSITION_INCREMENTS).toString())
: false;
return new AnalyzingSuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm,
maxGraphExpansions, preservePositionIncrements);
}
@Override
public Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core) {
return new TSTLookup(getTempDir(), "suggester");
}
public void testSuggesterCountForAllLookups() throws IOException {
Input keys[] = new Input[]{
new Input("lend me your ears", 1),
new Input("as you sow so shall you reap", 1),
};
Path tempDir = createTempDir("BlendedInfixSuggesterTest");
Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
// BlenderType.LINEAR is used by default (remove position*10%)
BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a);
suggester.build(new InputArrayIterator(keys));
String term = "you";
List<Lookup.LookupResult> responses = suggester.lookup(term, false, 1);
assertEquals(1, responses.size());
responses = suggester.lookup(term, false, 2);
assertEquals(2, responses.size());
responses = suggester.lookup(term, 1, false, false);
assertEquals(1, responses.size());
responses = suggester.lookup(term, 2, false, false);
assertEquals(2, responses.size());
responses = suggester.lookup(term, (Map<BytesRef, BooleanClause.Occur>) null, 1, false, false);
assertEquals(1, responses.size());
responses = suggester.lookup(term, (Map<BytesRef, BooleanClause.Occur>) null, 2, false, false);
assertEquals(2, responses.size());
responses = suggester.lookup(term, (Set<BytesRef>) null, 1, false, false);
assertEquals(1, responses.size());
responses = suggester.lookup(term, (Set<BytesRef>) null, 2, false, false);
assertEquals(2, responses.size());
responses = suggester.lookup(term, null, false, 1);
assertEquals(1, responses.size());
responses = suggester.lookup(term, null, false, 2);
assertEquals(2, responses.size());
responses = suggester.lookup(term, (BooleanQuery) null, 1, false, false);
assertEquals(1, responses.size());
responses = suggester.lookup(term, (BooleanQuery) null, 2, false, false);
assertEquals(2, responses.size());
suggester.close();
}
/**
* Create a Lookup using config options in <code>params</code> and
* current <code>core</code>
*/
public abstract Lookup create(@SuppressWarnings({"rawtypes"})NamedList params, SolrCore core);
public abstract Lookup getLookup(CompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext);