下面列出了怎么用org.apache.lucene.search.spell.SuggestMode的API类实例代码及写法,或者点击链接到github查看源代码。
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
if (terms == null) {
throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
}
this.spellchecker = spellchecker;
this.field = field;
this.numCandidates = numCandidates;
this.suggestMode = suggestMode;
this.reader = reader;
final long dictSize = terms.getSumTotalTermFreq();
this.useTotalTermFrequency = dictSize != -1;
this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;
this.preFilter = preFilter;
this.postFilter = postFilter;
this.nonErrorLikelihood = nonErrorLikelihood;
float thresholdFrequency = spellchecker.getThresholdFrequency();
this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
termsEnum = terms.iterator();
}
@Override
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
Candidate original = set.originalTerm;
BytesRef term = preFilter(original.term, spare, byteSpare);
final long frequency = original.frequency;
spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
for (int i = 0; i < suggestSimilar.length; i++) {
SuggestWord suggestWord = suggestSimilar[i];
BytesRef candidate = new BytesRef(suggestWord.string);
postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
}
set.addCandidates(candidates);
return set;
}
protected List<SuggestWord[]> suggestWordbreaks(final Term term) throws IOException {
final SuggestWord[][] rawSuggestions = wordBreakSpellChecker
.suggestWordBreaks(toLuceneTerm(term), decompoundsToQuery, indexReader, SuggestMode.SUGGEST_ALWAYS,
WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
if (rawSuggestions.length == 0) {
return Collections.emptyList();
}
if (!verifyDecompoundCollation) {
return Arrays.stream(rawSuggestions)
.filter(suggestion -> suggestion != null && suggestion.length > 1)
.limit(maxDecompoundExpansions).collect(Collectors.toList());
}
final IndexSearcher searcher = new IndexSearcher(indexReader);
return Arrays.stream(rawSuggestions)
.filter(suggestion -> suggestion != null && suggestion.length > 1)
.map(suggestion -> new MaxSortable<>(suggestion, countCollatedMatches(suggestion, searcher)))
.filter(sortable -> sortable.count > 0)
.sorted()
.limit(maxDecompoundExpansions) // TODO: use PriorityQueue
.map(sortable -> sortable.obj)
.collect(Collectors.toList());
}
private List<String> getUsingSpellcheck(String searchQuery) throws IOException {
SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS);
List<String> result = new ArrayList<>();
for(SuggestWord suggestion : suggestions) {
result.add(suggestion.string);
}
return result;
}
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
int count, SuggestMode suggestMode, boolean extendedResults,
float accuracy, SolrParams customParams) {
this.tokens = tokens;
this.reader = reader;
this.count = count;
this.suggestMode = suggestMode;
this.extendedResults = extendedResults;
this.accuracy = accuracy;
this.customParams = customParams;
}
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
int count, int alternativeTermCount, SuggestMode suggestMode,
boolean extendedResults, float accuracy, SolrParams customParams) {
this.tokens = tokens;
this.reader = reader;
this.count = count;
this.alternativeTermCount = alternativeTermCount;
this.suggestMode = suggestMode;
this.extendedResults = extendedResults;
this.accuracy = accuracy;
this.customParams = customParams;
}
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
log.debug("getSuggestions: {}", options.tokens);
if (lookup == null) {
log.info("Lookup is null - invoke spellchecker.build first");
return EMPTY_RESULT;
}
SpellingResult res = new SpellingResult();
CharsRef scratch = new CharsRef();
for (Token t : options.tokens) {
scratch.chars = t.buffer();
scratch.offset = 0;
scratch.length = t.length();
boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
!(lookup instanceof WFSTCompletionLookup) &&
!(lookup instanceof AnalyzingSuggester);
List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
if (suggestions == null) {
continue;
}
if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
Collections.sort(suggestions);
}
for (LookupResult lr : suggestions) {
res.add(t, lr.key.toString(), (int)lr.value);
}
}
return res;
}
public SuggestMode suggestMode() {
return suggestMode;
}
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}
@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) {
return;
}
boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
String q = params.get(SPELLCHECK_Q);
SolrSpellChecker spellChecker = getSpellChecker(params);
Collection<Token> tokens = null;
if (q != null) {
//we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker
tokens = getTokens(q, spellChecker.getQueryAnalyzer());
} else {
q = rb.getQueryString();
if (q == null) {
q = params.get(CommonParams.Q);
}
tokens = queryConverter.convert(q);
}
if (tokens != null && tokens.isEmpty() == false) {
if (spellChecker != null) {
int count = params.getInt(SPELLCHECK_COUNT, 1);
boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR);
boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false);
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT, 0);
//If specified, this can be a discrete # of results, or a percentage of fq results.
Integer maxResultsForSuggest = maxResultsForSuggest(rb);
ModifiableSolrParams customParams = new ModifiableSolrParams();
for (String checkerName : getDictionaryNames(params)) {
customParams.add(getCustomParams(checkerName, params));
}
Number hitsLong = (Number) rb.rsp.getToLog().get("hits");
long hits = 0;
if (hitsLong == null) {
hits = rb.getNumberDocumentsFound();
} else {
hits = hitsLong.longValue();
}
SpellingResult spellingResult = null;
if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
if (onlyMorePopular) {
suggestMode = SuggestMode.SUGGEST_MORE_POPULAR;
} else if (alternativeTermCount > 0) {
suggestMode = SuggestMode.SUGGEST_ALWAYS;
}
IndexReader reader = rb.req.getSearcher().getIndexReader();
SpellingOptions options = new SpellingOptions(tokens, reader, count,
alternativeTermCount, suggestMode, extendedResults, accuracy,
customParams);
spellingResult = spellChecker.getSuggestions(options);
} else {
spellingResult = new SpellingResult();
}
boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);
@SuppressWarnings({"rawtypes"})
NamedList response = new SimpleOrderedMap();
@SuppressWarnings({"rawtypes"})
NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults);
response.add("suggestions", suggestions);
if (extendedResults) {
response.add("correctlySpelled", isCorrectlySpelled);
}
if (collate) {
addCollationsToResponse(params, spellingResult, rb, q, response, spellChecker.isSuggestionsMayOverlap());
}
if (shardRequest) {
addOriginalTermsToResponse(response, tokens);
}
rb.rsp.add("spellcheck", response);
} else {
throw new SolrException(SolrException.ErrorCode.NOT_FOUND,
"Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params)));
}
}
}
@Test
@SuppressWarnings({"unchecked"})
public void testExtendedResults() throws Exception {
IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
@SuppressWarnings({"rawtypes"})
NamedList spellchecker = new NamedList();
spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
File indexDir = createTempDir().toFile();
indexDir.mkdirs();
spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
SolrCore core = h.getCore();
String dictName = checker.init(spellchecker, core);
assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
h.getCore().withSearcher(searcher -> {
checker.build(core, searcher);
IndexReader reader = searcher.getIndexReader();
Collection<Token> tokens = queryConverter.convert("documemt");
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("documemt is null and it shouldn't be", suggestions != null);
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true);
assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);
//test something not in the spell checker
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions size should be 0", suggestions.size()==0);
spellOpts.tokens = queryConverter.convert("document");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
return null;
});
}
@Test
@SuppressWarnings({"unchecked"})
public void testAlternateLocation() throws Exception {
String[] ALT_DOCS = new String[]{
"jumpin jack flash",
"Sargent Peppers Lonely Hearts Club Band",
"Born to Run",
"Thunder Road",
"Londons Burning",
"A Horse with No Name",
"Sweet Caroline"
};
IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
@SuppressWarnings({"rawtypes"})
NamedList spellchecker = new NamedList();
spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
File tmpDir = createTempDir().toFile();
File indexDir = new File(tmpDir, "spellingIdx");
//create a standalone index
File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
Directory dir = newFSDirectory(altIndexDir.toPath());
IndexWriter iw = new IndexWriter(
dir,
new IndexWriterConfig(new WhitespaceAnalyzer())
);
for (int i = 0; i < ALT_DOCS.length; i++) {
Document doc = new Document();
doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
iw.addDocument(doc);
}
iw.forceMerge(1);
iw.close();
dir.close();
indexDir.mkdirs();
spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
SolrCore core = h.getCore();
String dictName = checker.init(spellchecker, core);
assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
h.getCore().withSearcher(searcher -> {
checker.build(core, searcher);
IndexReader reader = searcher.getIndexReader();
Collection<Token> tokens = queryConverter.convert("flesh");
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("flesh is null and it shouldn't be", suggestions != null);
assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);
//test something not in the spell checker
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions size should be 0", suggestions.size()==0);
spellOpts.tokens = queryConverter.convert("Caroline");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
return null;
});
}
protected CombineSuggestion[] suggestCombination(final Iterator<Term> terms) throws IOException {
final List<org.apache.lucene.index.Term> luceneTerms = new ArrayList<>(COMPOUND_WINDOW);
terms.forEachRemaining(term -> luceneTerms.add(toLuceneTerm(term)));
return wordBreakSpellChecker.suggestWordCombinations(
luceneTerms.toArray(new org.apache.lucene.index.Term[0]), 10, indexReader, SuggestMode.SUGGEST_ALWAYS);
}