下面列出了怎么用org.apache.lucene.search.spell.SuggestWord的API类实例代码及写法,或者点击链接到github查看源代码。
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
final IndexReader indexReader = searcher.getIndexReader();
TermSuggestion response = new TermSuggestion(
name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
);
List<Token> tokens = queryTerms(suggestion, spare);
for (Token token : tokens) {
// TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
);
Text key = new Text(new BytesArray(token.term.bytes()));
TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
for (SuggestWord suggestWord : suggestedWords) {
Text word = new Text(suggestWord.string);
resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
}
response.addTerm(resultEntry);
}
return response;
}
@Override
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
Candidate original = set.originalTerm;
BytesRef term = preFilter(original.term, spare, byteSpare);
final long frequency = original.frequency;
spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
for (int i = 0; i < suggestSimilar.length; i++) {
SuggestWord suggestWord = suggestSimilar[i];
BytesRef candidate = new BytesRef(suggestWord.string);
postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
}
set.addCandidates(candidates);
return set;
}
@Test
public void testComparator() throws Exception {
SpellCheckComponent component = (SpellCheckComponent) h.getCore().getSearchComponent("spellcheck");
assertNotNull(component);
AbstractLuceneSpellChecker spellChecker;
Comparator<SuggestWord> comp;
spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("freq");
assertNotNull(spellChecker);
comp = spellChecker.getSpellChecker().getComparator();
assertNotNull(comp);
assertTrue(comp instanceof SuggestWordFrequencyComparator);
spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("fqcn");
assertNotNull(spellChecker);
comp = spellChecker.getSpellChecker().getComparator();
assertNotNull(comp);
assertTrue(comp instanceof SampleComparator);
}
protected void decompound(final Term term) {
// determine the nodesToAdd based on the term
try {
for (final SuggestWord[] decompounded : suggestWordbreaks(term)) {
if (decompounded != null && decompounded.length > 0) {
final BooleanQuery bq = new BooleanQuery(term.getParent(), Clause.Occur.SHOULD, true);
for (final SuggestWord word : decompounded) {
final DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(bq, Clause.Occur.MUST, true);
bq.addClause(dmq);
dmq.addClause(new Term(dmq, term.getField(), word.string, true));
}
nodesToAdd.add(bq);
}
}
} catch (final IOException e) {
// IO is broken, this looks serious -> throw as RTE
throw new RuntimeException("Error decompounding " + term, e);
}
}
protected List<SuggestWord[]> suggestWordbreaks(final Term term) throws IOException {
final SuggestWord[][] rawSuggestions = wordBreakSpellChecker
.suggestWordBreaks(toLuceneTerm(term), decompoundsToQuery, indexReader, SuggestMode.SUGGEST_ALWAYS,
WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);
if (rawSuggestions.length == 0) {
return Collections.emptyList();
}
if (!verifyDecompoundCollation) {
return Arrays.stream(rawSuggestions)
.filter(suggestion -> suggestion != null && suggestion.length > 1)
.limit(maxDecompoundExpansions).collect(Collectors.toList());
}
final IndexSearcher searcher = new IndexSearcher(indexReader);
return Arrays.stream(rawSuggestions)
.filter(suggestion -> suggestion != null && suggestion.length > 1)
.map(suggestion -> new MaxSortable<>(suggestion, countCollatedMatches(suggestion, searcher)))
.filter(sortable -> sortable.count > 0)
.sorted()
.limit(maxDecompoundExpansions) // TODO: use PriorityQueue
.map(sortable -> sortable.obj)
.collect(Collectors.toList());
}
@Test
public void testNoDecompoundForSingleToken() throws IOException {
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "w1w2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1w2", false)
)
)
);
}
@Test
public void testThatDecompoundRespectsLowerCaseInputFalse() throws IOException {
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] { });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "W1w2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
rewriter.rewrite(expandedQuery);
verify(wordBreakSpellChecker).suggestWordBreaks(eq(new Term("field1", "W1w2")), anyInt(), any(), any(), any());
}
@Test
public void testThatDecompoundRespectsLowerCaseInputTrue() throws IOException {
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] { });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
true, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "W1w2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
rewriter.rewrite(expandedQuery);
verify(wordBreakSpellChecker).suggestWordBreaks(eq(new Term("field1", "w1w2")), anyInt(), any(), any(), any());
}
@Test
public void testThatCompoundRespectsLowerCaseInputTrue() throws IOException {
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] { });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
true, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "W1", false);
addTerm(query, "W2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
rewriter.rewrite(expandedQuery);
verify(wordBreakSpellChecker).suggestWordCombinations(eq(new Term[] {
new Term("field1", "w1"), new Term("field1", "w2")}), anyInt(), any(), any());
}
@Test
public void testThatCompoundRespectsLowerCaseInputFalse() throws IOException {
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] { });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "W1", false);
addTerm(query, "W2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
rewriter.rewrite(expandedQuery);
verify(wordBreakSpellChecker).suggestWordCombinations(eq(new Term[] {
new Term("field1", "W1"), new Term("field1", "W2")}), anyInt(), any(), any());
}
private List<String> getUsingSpellcheck(String searchQuery) throws IOException {
SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS);
List<String> result = new ArrayList<>();
for(SuggestWord suggestion : suggestions) {
result.add(suggestion.string);
}
return result;
}
protected int countCollatedMatches(final SuggestWord[] suggestion, final IndexSearcher searcher) {
org.apache.lucene.search.BooleanQuery.Builder builder = new org.apache.lucene.search.BooleanQuery.Builder();
for (final SuggestWord word : suggestion) {
builder.add(new org.apache.lucene.search.BooleanClause(
new TermQuery(new org.apache.lucene.index.Term(dictionaryField, word.string)),
org.apache.lucene.search.BooleanClause.Occur.FILTER));
}
try {
return searcher.count(builder.build());
} catch (final IOException e) {
throw new RuntimeException(e);
}
}
@Test
public void testDecompoundSingleTokenIntoOneTwoTokenAlternative() throws IOException {
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] { decompoundSuggestion("w1", "w2") });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "w1w2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1w2", false),
bq(
dmq(must(), term("w1", true)),
dmq(must(), term("w2", true))
)
)
)
);
}
@Test
public void testThatGeneratedSecondTermIsNotCompounded() throws IOException {
// don't de-compound
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
// compound of terms at idx 0+1
// when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
// .thenReturn(new CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "w1", false);
addTerm(query, "w2", true);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1", false)
),
dmq(
term("w2", true)
)
)
);
}
@Test
public void testThatGeneratedFirstTermIsNotCompounded() throws IOException {
// don't de-compound
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
// compound of terms at idx 0+1
// when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
// .thenReturn(new CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "w1", true);
addTerm(query, "w2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1", true)
),
dmq(
term("w2", false)
)
)
);
}
@Test
public void testDecompoundSingleTokenIntoTwoTwoTokenAlternatives() throws IOException {
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] { decompoundSuggestion("w1", "w2"), decompoundSuggestion("w", "1w2") });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "w1w2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1w2", false),
bq(
dmq(must(), term("w1", true)),
dmq(must(), term("w2", true))
),
bq(
dmq(must(), term("w", true)),
dmq(must(), term("1w2", true))
)
)
)
);
}
@Test
public void testThatOnlyMaxExpansionsAreApplied() throws IOException {
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] { decompoundSuggestion("w3", "w4"), decompoundSuggestion("w", "3w4"),
decompoundSuggestion("w3w", "4") });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 2, false);
Query query = new Query();
addTerm(query, "w3w4", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w3w4", false),
bq(
dmq(must(), term("w3", true)),
dmq(must(), term("w4", true))
),
bq(
dmq(must(), term("w", true)),
dmq(must(), term("3w4", true))
)
)
)
);
}
@Test
public void testCompoundTwoInputTokensOnly() throws IOException {
// don't de-compound
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
// compound of terms at idx 0+1
when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
.thenReturn(new CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "w1", false);
addTerm(query, "w2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1", false),
term("w1w2", true)
),
dmq(
term("w2", false),
term("w1w2", true)
)
)
);
}
@Test
public void testNoCompoundForTwoInputTokensOnly() throws IOException {
// don't de-compound
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
.thenReturn(new CombineSuggestion[] { });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "w1", false);
addTerm(query, "w2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1", false)
),
dmq(
term("w2", false)
)
)
);
}
@Test
public void testAlwaysAddReverseCompoundsForTwoWordInput() throws IOException {
// don't de-compound
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
suggestions.put(Arrays.asList("w1", "w2"), new CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });
suggestions.put(Arrays.asList("w2", "w1"), new CombineSuggestion[] { combineSuggestion("w2w1", 0, 1) });
setupWordBreakMockWithCombinations(suggestions);
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, true, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "w1", false);
addTerm(query, "w2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1", false),
term("w1w2", true),
term("w2w1", true)
),
dmq(
term("w2", false),
term("w1w2", true),
term("w2w1", true)
)
)
);
}
@Test
public void testSingleReverseCompoundTriggerWord() throws IOException {
TrieMap<Boolean> triggerWords = new TrieMap<>();
triggerWords.put("trigger", true);
// don't de-compound
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
suggestions.put(Arrays.asList("w3", "w1"), new CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
setupWordBreakMockWithCombinations(suggestions);
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, triggerWords, 5, false);
Query query = new Query();
addTerm(query, "w1", false);
addTerm(query, "trigger", false);
addTerm(query, "w3", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1", false),
term("w3w1", true)
),
dmq(
term("w3", false),
term("w3w1", true)
)
)
);
}
@Override
@SuppressWarnings({"unchecked"})
public String init(@SuppressWarnings({"rawtypes"})NamedList config, SolrCore core) {
SolrParams params = config.toSolrParams();
log.info("init: {}", config);
String name = super.init(config, core);
Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR;
String compClass = (String) config.get(COMPARATOR_CLASS);
if (compClass != null) {
if (compClass.equalsIgnoreCase(SCORE_COMP))
comp = SuggestWordQueue.DEFAULT_COMPARATOR;
else if (compClass.equalsIgnoreCase(FREQ_COMP))
comp = new SuggestWordFrequencyComparator();
else //must be a FQCN
comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
}
StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN;
String distClass = (String) config.get(STRING_DISTANCE);
if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE))
sd = core.getResourceLoader().newInstance(distClass, StringDistance.class);
float minAccuracy = DEFAULT_ACCURACY;
Float accuracy = params.getFloat(ACCURACY);
if (accuracy != null)
minAccuracy = accuracy;
int maxEdits = DEFAULT_MAXEDITS;
Integer edits = params.getInt(MAXEDITS);
if (edits != null)
maxEdits = edits;
int minPrefix = DEFAULT_MINPREFIX;
Integer prefix = params.getInt(MINPREFIX);
if (prefix != null)
minPrefix = prefix;
int maxInspections = DEFAULT_MAXINSPECTIONS;
Integer inspections = params.getInt(MAXINSPECTIONS);
if (inspections != null)
maxInspections = inspections;
float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY;
Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY);
if (threshold != null)
minThreshold = threshold;
int minQueryLength = DEFAULT_MINQUERYLENGTH;
Integer queryLength = params.getInt(MINQUERYLENGTH);
if (queryLength != null)
minQueryLength = queryLength;
int maxQueryLength = DEFAULT_MAXQUERYLENGTH;
Integer overriddenMaxQueryLength = params.getInt(MAXQUERYLENGTH);
if (overriddenMaxQueryLength != null)
maxQueryLength = overriddenMaxQueryLength;
float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY;
Float queryFreq = params.getFloat(MAXQUERYFREQUENCY);
if (queryFreq != null)
maxQueryFrequency = queryFreq;
checker.setComparator(comp);
checker.setDistance(sd);
checker.setMaxEdits(maxEdits);
checker.setMinPrefix(minPrefix);
checker.setAccuracy(minAccuracy);
checker.setThresholdFrequency(minThreshold);
checker.setMaxInspections(maxInspections);
checker.setMinQueryLength(minQueryLength);
checker.setMaxQueryLength(maxQueryLength);
checker.setMaxQueryFrequency(maxQueryFrequency);
checker.setLowerCaseTerms(false);
return name;
}
@Override
public SpellingResult getSuggestions(SpellingOptions options)
throws IOException {
log.debug("getSuggestions: {}", options.tokens);
SpellingResult result = new SpellingResult();
float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
for (Token token : options.tokens) {
String tokenText = token.toString();
Term term = new Term(field, tokenText);
int freq = options.reader.docFreq(term);
int count = (options.alternativeTermCount > 0 && freq > 0) ? options.alternativeTermCount: options.count;
SuggestWord[] suggestions = checker.suggestSimilar(term, count,options.reader, options.suggestMode, accuracy);
result.addFrequency(token, freq);
// If considering alternatives to "correctly-spelled" terms, then add the
// original as a viable suggestion.
if (options.alternativeTermCount > 0 && freq > 0) {
boolean foundOriginal = false;
SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1];
for (int i = 0; i < suggestions.length; i++) {
if (suggestions[i].string.equals(tokenText)) {
foundOriginal = true;
break;
}
suggestionsWithOrig[i + 1] = suggestions[i];
}
if (!foundOriginal) {
SuggestWord orig = new SuggestWord();
orig.freq = freq;
orig.string = tokenText;
suggestionsWithOrig[0] = orig;
suggestions = suggestionsWithOrig;
}
}
if(suggestions.length==0 && freq==0) {
List<String> empty = Collections.emptyList();
result.add(token, empty);
} else {
for (SuggestWord suggestion : suggestions) {
result.add(token, suggestion.string, suggestion.freq);
}
}
}
return result;
}
@Override
public int compare(SuggestWord suggestWord, SuggestWord suggestWord1) {
return suggestWord.string.compareTo(suggestWord1.string);
}
@Test
public void testThatCompoundingIfGeneratedIsMixedIn() throws IOException {
// don't de-compound
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
// compound of terms at idx 0+1
when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
.thenReturn(new CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, new TrieMap<>(), 5, false);
Query query = new Query();
addTerm(query, "w1", false);
addTerm(query, "w2g", true);
addTerm(query, "w2", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1", false),
term("w1w2", true)
),
dmq(
term("w2g", true)
),
dmq(
term("w2", false),
term("w1w2", true)
)
)
);
}
@Test
public void testCompoundTriggerWordWithLowerCaseInputSetToFalse() throws IOException {
TrieMap<Boolean> triggerWords = new TrieMap<>();
triggerWords.put("Trigger_Upper", true);
triggerWords.put("trigger_lower", true);
// don't de-compound
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
suggestions.put(Arrays.asList("w3", "w1"), new CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
setupWordBreakMockWithCombinations(suggestions);
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, triggerWords, 5, false);
Query query = new Query();
addTerm(query, "w1", false);
addTerm(query, "Trigger_Upper", false);
addTerm(query, "w3", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w1", false),
term("w3w1", true)
),
dmq(
term("w3", false),
term("w3w1", true)
)
)
);
Query query2 = new Query();
addTerm(query2, "w1", false);
addTerm(query2, "trigger_upper", false);
addTerm(query2, "w3", false);
ExpandedQuery expandedQuery2 = new ExpandedQuery(query2);
final ExpandedQuery rewritten2 = rewriter.rewrite(expandedQuery2);
assertThat((Query) rewritten2.getUserQuery(),
bq(
dmq(
term("w1", false)
),
dmq(
term("trigger_upper", false)
),
dmq(
term("w3", false)
)
)
);
Query query3 = new Query();
addTerm(query3, "w1", false);
addTerm(query3, "Trigger_Lower", false);
addTerm(query3, "w3", false);
ExpandedQuery expandedQuery3 = new ExpandedQuery(query3);
final ExpandedQuery rewritten3 = rewriter.rewrite(expandedQuery3);
assertThat((Query) rewritten3.getUserQuery(),
bq(
dmq(
term("w1", false)
),
dmq(
term("Trigger_Lower", false)
),
dmq(
term("w3", false)
)
)
);
}
@Test
public void testCompoundTriggerWordWithLowerCaseInputSetToTrue() throws IOException {
TrieMap<Boolean> triggerWords = new TrieMap<>();
triggerWords.put("trigger_lower", true);
// don't de-compound
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
suggestions.put(Arrays.asList("w3", "w1"), new CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
setupWordBreakMockWithCombinations(suggestions);
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
true, false, triggerWords, 5, false);
Query query1 = new Query();
addTerm(query1, "w1", false);
addTerm(query1, "trigger_lower", false);
addTerm(query1, "w3", false);
ExpandedQuery expandedQuery1 = new ExpandedQuery(query1);
final ExpandedQuery rewritten1 = rewriter.rewrite(expandedQuery1);
assertThat((Query) rewritten1.getUserQuery(),
bq(
dmq(
term("w1", false),
term("w3w1", true)
),
dmq(
term("w3", false),
term("w3w1", true)
)
)
);
Query query2 = new Query();
addTerm(query2, "w1", false);
addTerm(query2, "Trigger_Lower", false);
addTerm(query2, "w3", false);
ExpandedQuery expandedQuery2 = new ExpandedQuery(query2);
final ExpandedQuery rewritten2 = rewriter.rewrite(expandedQuery2);
assertThat((Query) rewritten2.getUserQuery(),
bq(
dmq(
term("w1", false),
term("w3w1", true)
),
dmq(
term("w3", false),
term("w3w1", true)
)
)
);
}
@Test
public void testCompoundTriggerAffectsOnlySurroundingCompound() throws IOException {
TrieMap<Boolean> triggerWords = new TrieMap<>();
triggerWords.put("trigger", true);
// don't de-compound
when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
.thenReturn(new SuggestWord[][] {new SuggestWord[] {}});
Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
suggestions.put(Arrays.asList("w0", "w1"), new CombineSuggestion[] { combineSuggestion("w0w1", 0, 1) });
suggestions.put(Arrays.asList("w3", "w1"), new CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
suggestions.put(Arrays.asList("w3", "w4"), new CombineSuggestion[] { combineSuggestion("w3w4", 0, 1) });
setupWordBreakMockWithCombinations(suggestions);
WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
false, false, triggerWords, 5, false);
Query query = new Query();
addTerm(query, "w0", false);
addTerm(query, "w1", false);
addTerm(query, "trigger", false);
addTerm(query, "w3", false);
addTerm(query, "w4", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat((Query) rewritten.getUserQuery(),
bq(
dmq(
term("w0", false),
term("w0w1", true)
),
dmq(
term("w1", false),
term("w0w1", true),
term("w3w1", true)
),
dmq(
term("w3", false),
term("w3w1", true),
term("w3w4", true)
),
dmq(
term("w4", false),
term("w3w4", true)
)
)
);
}
private static SuggestWord[] decompoundSuggestion(String... parts) {
return Arrays.stream(parts).map(WordBreakCompoundRewriterTest::suggestWord).toArray(SuggestWord[]::new);
}