下面列出了怎么用org.apache.lucene.search.spell.DirectSpellChecker的API类实例代码及写法,或者点击链接到github查看源代码。
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
final IndexReader indexReader = searcher.getIndexReader();
TermSuggestion response = new TermSuggestion(
name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
);
List<Token> tokens = queryTerms(suggestion, spare);
for (Token token : tokens) {
// TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
);
Text key = new Text(new BytesArray(token.term.bytes()));
TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
for (SuggestWord suggestWord : suggestedWords) {
Text word = new Text(suggestWord.string);
resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
}
response.addTerm(resultEntry);
}
return response;
}
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
if (terms == null) {
throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
}
this.spellchecker = spellchecker;
this.field = field;
this.numCandidates = numCandidates;
this.suggestMode = suggestMode;
this.reader = reader;
final long dictSize = terms.getSumTotalTermFreq();
this.useTotalTermFrequency = dictSize != -1;
this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;
this.preFilter = preFilter;
this.postFilter = postFilter;
this.nonErrorLikelihood = nonErrorLikelihood;
float thresholdFrequency = spellchecker.getThresholdFrequency();
this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
termsEnum = terms.iterator();
}
@Override
public void indexingDone() {
try {
spellChecker = new DirectSpellChecker();
spellChecker.setMaxEdits(2);
spellChecker.setAccuracy(0.1f);
spellChecker.setMinPrefix(0);
reader = DirectoryReader.open(writer);
fuzzySuggester = new FuzzySuggester(directory, "", writer.getAnalyzer());
Dictionary dict = new DocumentValueSourceDictionary(reader, WORD_FIELD, new LongValuesSource() {
@Override
public boolean needsScores() {
return false;
}
@Override
public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
return null;
}
});
fuzzySuggester.build(dict);
writer.close();
searcher = new IndexSearcher(DirectoryReader.open(directory));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}
@Override
@SuppressWarnings({"unchecked"})
public String init(@SuppressWarnings({"rawtypes"})NamedList config, SolrCore core) {
SolrParams params = config.toSolrParams();
log.info("init: {}", config);
String name = super.init(config, core);
Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR;
String compClass = (String) config.get(COMPARATOR_CLASS);
if (compClass != null) {
if (compClass.equalsIgnoreCase(SCORE_COMP))
comp = SuggestWordQueue.DEFAULT_COMPARATOR;
else if (compClass.equalsIgnoreCase(FREQ_COMP))
comp = new SuggestWordFrequencyComparator();
else //must be a FQCN
comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
}
StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN;
String distClass = (String) config.get(STRING_DISTANCE);
if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE))
sd = core.getResourceLoader().newInstance(distClass, StringDistance.class);
float minAccuracy = DEFAULT_ACCURACY;
Float accuracy = params.getFloat(ACCURACY);
if (accuracy != null)
minAccuracy = accuracy;
int maxEdits = DEFAULT_MAXEDITS;
Integer edits = params.getInt(MAXEDITS);
if (edits != null)
maxEdits = edits;
int minPrefix = DEFAULT_MINPREFIX;
Integer prefix = params.getInt(MINPREFIX);
if (prefix != null)
minPrefix = prefix;
int maxInspections = DEFAULT_MAXINSPECTIONS;
Integer inspections = params.getInt(MAXINSPECTIONS);
if (inspections != null)
maxInspections = inspections;
float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY;
Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY);
if (threshold != null)
minThreshold = threshold;
int minQueryLength = DEFAULT_MINQUERYLENGTH;
Integer queryLength = params.getInt(MINQUERYLENGTH);
if (queryLength != null)
minQueryLength = queryLength;
int maxQueryLength = DEFAULT_MAXQUERYLENGTH;
Integer overriddenMaxQueryLength = params.getInt(MAXQUERYLENGTH);
if (overriddenMaxQueryLength != null)
maxQueryLength = overriddenMaxQueryLength;
float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY;
Float queryFreq = params.getFloat(MAXQUERYFREQUENCY);
if (queryFreq != null)
maxQueryFrequency = queryFreq;
checker.setComparator(comp);
checker.setDistance(sd);
checker.setMaxEdits(maxEdits);
checker.setMinPrefix(minPrefix);
checker.setAccuracy(minAccuracy);
checker.setThresholdFrequency(minThreshold);
checker.setMaxInspections(maxInspections);
checker.setMinQueryLength(minQueryLength);
checker.setMaxQueryLength(maxQueryLength);
checker.setMaxQueryFrequency(maxQueryFrequency);
checker.setLowerCaseTerms(false);
return name;
}