下面列出了怎么用org.apache.lucene.search.spans.SpanMultiTermQueryWrapper的API类实例代码及写法,或者点击链接到github查看源代码。
@Override
public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
XContentParser parser = parseContext.parser();
Token token = parser.nextToken();
if (!MATCH_NAME.equals(parser.currentName()) || token != XContentParser.Token.FIELD_NAME) {
throw new QueryParsingException(parseContext, "spanMultiTerm must have [" + MATCH_NAME + "] multi term query clause");
}
token = parser.nextToken();
if (token != XContentParser.Token.START_OBJECT) {
throw new QueryParsingException(parseContext, "spanMultiTerm must have [" + MATCH_NAME + "] multi term query clause");
}
Query subQuery = parseContext.parseInnerQuery();
if (!(subQuery instanceof MultiTermQuery)) {
throw new QueryParsingException(parseContext, "spanMultiTerm [" + MATCH_NAME + "] must be of type multi term query");
}
parser.nextToken();
return new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
}
public void testFilteredOutSpan() throws IOException {
indexWriter.addDocument(newDoc("freezing cold stuff like stuff freedom of speech"));
initReaderSearcherHighlighter();
WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "free*"));
SpanMultiTermQueryWrapper<WildcardQuery> wildcardSpanQuery = new SpanMultiTermQueryWrapper<>(wildcardQuery);
SpanTermQuery termQuery = new SpanTermQuery(new Term("body", "speech"));
SpanQuery spanQuery = new SpanNearQuery(new SpanQuery[]{wildcardSpanQuery, termQuery}, 3, false);
BooleanQuery query = new BooleanQuery.Builder()
.add(spanQuery, BooleanClause.Occur.MUST)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
// spans' MatchesIterator exposes each underlying term; thus doesn't enclose intermediate "of"
assertArrayEquals(new String[]{"freezing cold stuff like stuff <b>freedom</b> of <b>speech</b>"}, snippets);
}
public void testRewrite() throws IOException {
SpanMultiTermQueryWrapper<WildcardQuery> fiv = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("field", "fiv*")));
SpanMultiTermQueryWrapper<WildcardQuery> hund = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("field", "hund*")));
SpanMultiTermQueryWrapper<WildcardQuery> twent = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("field", "twent*")));
SpanMultiTermQueryWrapper<WildcardQuery> nin = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("field", "nin*")));
SpanNearQuery sq = new SpanNearQuery(new SpanQuery[] {fiv, hund, twent, nin}, 0, true);
List<BytesRef> payloads = new ArrayList<>();
payloads.add(new BytesRef("pos: 0"));
payloads.add(new BytesRef("pos: 1"));
payloads.add(new BytesRef("pos: 2"));
payloads.add(new BytesRef("pos: 3"));
SpanPayloadCheckQuery query = new SpanPayloadCheckQuery(sq, payloads);
// if query wasn't rewritten properly, the query would have failed with "Rewrite first!"
checkHits(query, new int[]{529});
}
public void testWildcardProximityRewrites() throws Exception {
final SpanNearQuery snq = SpanNearQuery.newOrderedNearQuery(FIELD)
.addClause(new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(FIELD, "term*"))))
.addClause(new SpanTermQuery(new Term(FIELD, "foo")))
.build();
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", snq));
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("term1 foo"), HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(2, m.getHitCount());
}
}
/**
* Instantiates a new mtas span prefix query.
*
* @param term the term
* @param singlePosition the single position
*/
public MtasSpanPrefixQuery(Term term, boolean singlePosition) {
super(singlePosition ? 1 : null, singlePosition ? 1 : null);
PrefixQuery pfq = new PrefixQuery(term);
query = new SpanMultiTermQueryWrapper<>(pfq);
this.term = term;
this.singlePosition = singlePosition;
int i = term.text().indexOf(MtasToken.DELIMITER);
if (i >= 0) {
prefix = term.text().substring(0, i);
value = term.text().substring((i + MtasToken.DELIMITER.length()));
value = (value.length() > 0) ? value : null;
} else {
prefix = term.text();
value = null;
}
}
/**
* Instantiates a new mtas span wildcard query.
*
* @param term the term
* @param singlePosition the single position
*/
public MtasSpanWildcardQuery(Term term, boolean singlePosition) {
super(singlePosition ? 1 : null, singlePosition ? 1 : null);
WildcardQuery wcq = new WildcardQuery(term);
query = new SpanMultiTermQueryWrapper<>(wcq);
this.term = term;
this.singlePosition = singlePosition;
int i = term.text().indexOf(MtasToken.DELIMITER);
if (i >= 0) {
prefix = term.text().substring(0, i);
value = term.text().substring((i + MtasToken.DELIMITER.length()));
value = (value.length() > 0) ? value : null;
} else {
prefix = term.text();
value = null;
}
}
/**
* Instantiates a new mtas span regexp query.
*
* @param term the term
* @param singlePosition the single position
*/
public MtasSpanRegexpQuery(Term term, boolean singlePosition) {
super(singlePosition ? 1 : null, singlePosition ? 1 : null);
RegexpQuery req = new RegexpQuery(term);
query = new SpanMultiTermQueryWrapper<>(req);
this.term = term;
this.singlePosition = singlePosition;
int i = term.text().indexOf(MtasToken.DELIMITER);
if (i >= 0) {
prefix = term.text().substring(0, i);
value = term.text().substring((i + MtasToken.DELIMITER.length()));
value = (value.length() > 0) ? value : null;
} else {
prefix = term.text();
value = null;
}
}
private Query createRegexQuery ( QueryDef qd ) {
BooleanQuery query = new BooleanQuery();
List<SpanQuery> spanClausesList = new ArrayList<SpanQuery>();
String[] queryStrings;
SpanQuery[] spanClausesArray;
RegexQuery regexQuery;
for ( String fld : qd.fields ) {
spanClausesList.clear();
queryStrings = qd.query.split(" ");
spanClausesArray = new SpanQuery[queryStrings.length];
for ( String subquery : queryStrings ) {
regexQuery = new RegexQuery( new Term( fld, subquery ) );
regexQuery.setRegexImplementation( new JavaUtilRegexCapabilities() );
//if emptyable, like a query '(optional)?' or 'bla|a*', make span optional by wrapping it SpanOrQuery
if(Pattern.matches(subquery, "")){
spanClausesList.add( new SpanOrQuery( new SpanMultiTermQueryWrapper<RegexQuery>( regexQuery ) ) );
} else {
spanClausesList.add( new SpanMultiTermQueryWrapper<RegexQuery>( regexQuery ) );
}
}
spanClausesList.toArray( spanClausesArray );
query.add( new SpanNearQuery( spanClausesArray, 0, true), Occur.SHOULD );
}
return query;
}
/**
* Test it does *not* highlight the same term's not next to the span-near. "charlie" in this case.
* This particular example exercises "Rewrite" plus "MTQ" in the same query.
*/
public void testRewriteAndMtq() throws IOException {
indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
initReaderSearcherHighlighter();
SpanNearQuery snq = new SpanNearQuery(
new SpanQuery[]{
new SpanTermQuery(new Term("body", "bravo")),
new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term("body", "ch")))}, // REWRITES
0, true);
BooleanQuery query = new BooleanQuery.Builder()
.add(snq, BooleanClause.Occur.MUST)
.add(new PrefixQuery(new Term("body", "al")), BooleanClause.Occur.MUST) // MTQ
.add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST)
// add queries for other fields; we shouldn't highlight these because of that.
.add(newPhraseQuery("title", "bravo alpha"), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
}
// do again, this time with MTQ disabled. We should only find "alpha bravo".
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHandleMultiTermQuery(false);//disable but leave phrase processing enabled
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie - charlie bravo alpha"},
snippets);
}
/**
* Like {@link #testRewriteAndMtq} but no freestanding MTQ
*/
public void testRewrite() throws IOException {
indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
initReaderSearcherHighlighter();
SpanNearQuery snq = new SpanNearQuery(
new SpanQuery[]{
new SpanTermQuery(new Term("body", "bravo")),
new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term("body", "ch")))}, // REWRITES
0, true);
BooleanQuery query = new BooleanQuery.Builder()
.add(snq, BooleanClause.Occur.MUST)
// .add(new PrefixQuery(new Term("body", "al")), BooleanClause.Occur.MUST) // MTQ
.add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST)
// add queries for other fields; we shouldn't highlight these because of that.
.add(newPhraseQuery("title", "bravo alpha"), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
}
// do again, this time with MTQ disabled. We should only find "alpha bravo".
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHandleMultiTermQuery(false);//disable but leave phrase processing enabled
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
snippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie - charlie bravo alpha"},
snippets);
}
public void testSpanWildcard() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
// wrap in a SpanBoostQuery to also show we see inside it
Query query = new SpanBoostQuery(
new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))), 2.0f);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
String snippets[] = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
}
public void testSpanOr() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
Query query = new SpanOrQuery(new SpanQuery[]{childQuery});
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
String snippets[] = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
}
public void testSpanNear() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
Query query = new SpanNearQuery(new SpanQuery[]{childQuery, childQuery}, 0, false);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
String snippets[] = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
}
public void testSpanNot() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
SpanQuery include = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
SpanQuery exclude = new SpanTermQuery(new Term("body", "bogus"));
Query query = new SpanNotQuery(include, exclude);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
String snippets[] = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
}
public void testSpanPositionCheck() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
Query query = new SpanFirstQuery(childQuery, 1000000);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
String snippets[] = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
}
public void testCustomSpanQueryHighlighting() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Document doc = new Document();
doc.add(new Field("body", "alpha bravo charlie delta echo foxtrot golf hotel india juliet", fieldType));
doc.add(newTextField("id", "id", Field.Store.YES));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "foxtr*"));
SpanMultiTermQueryWrapper<WildcardQuery> wildcardQueryWrapper = new SpanMultiTermQueryWrapper<>(wildcardQuery);
SpanQuery wrappedQuery = new MyWrapperSpanQuery(wildcardQueryWrapper);
BooleanQuery query = new BooleanQuery.Builder()
.add(wrappedQuery, BooleanClause.Occur.SHOULD)
.build();
int[] docIds = new int[]{docId};
String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
assertEquals(1, snippets.length);
assertEquals("alpha bravo charlie delta echo <b>foxtrot</b> golf hotel india juliet", snippets[0]);
ir.close();
}
public void testNullPointerException() throws IOException {
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
SpanQuery wrappedquery = new SpanMultiTermQueryWrapper<>(regex);
MemoryIndex mindex = randomMemoryIndex();
mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there"));
// This throws an NPE
assertEquals(0, mindex.search(wrappedquery), 0.00001f);
TestUtil.checkReader(mindex.createSearcher().getIndexReader());
}
public void testPassesIfWrapped() throws IOException {
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<>(regex));
MemoryIndex mindex = randomMemoryIndex();
mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there"));
// This passes though
assertEquals(0, mindex.search(wrappedquery), 0.00001f);
TestUtil.checkReader(mindex.createSearcher().getIndexReader());
}
public void testRewrite() throws IOException {
SpanMultiTermQueryWrapper<WildcardQuery> xyz = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("field", "xyz*")));
PayloadScoreQuery psq = new PayloadScoreQuery(xyz, new AveragePayloadFunction(), PayloadDecoder.FLOAT_DECODER, false);
// if query wasn't rewritten properly, the query would have failed with "Rewrite first!"
searcher.search(psq, 1);
}
protected SpanNearQuery spanNearQuery(String field, int slop, String... terms) {
SpanQuery[] spanQueries = new SpanQuery[terms.length];
for (int i = 0; i < terms.length; i++) {
String term = terms[i];
spanQueries[i] = term.contains("*") || term.contains("?") ?
new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(field, term)))
: new SpanTermQuery(new Term(field, term));
}
return new SpanNearQuery(spanQueries, slop, true);
}
public void testOrderedNearWithWildcardExtractor() {
SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
new SpanMultiTermQueryWrapper<>(new RegexpQuery(new Term("field", "super.*cali.*"))),
new SpanTermQuery(new Term("field", "is"))
}, 0, true);
Set<Term> expected = Collections.singleton(new Term("field", "is"));
assertEquals(expected, collectTerms(q));
}
public void testSpanMultiTerms() {
SpanQuery q = new SpanMultiTermQueryWrapper<>(new RegexpQuery(new Term("field", "term.*")));
Set<Term> terms = collectTerms(q);
assertEquals(1, terms.size());
assertEquals(TermFilteredPresearcher.ANYTOKEN_FIELD, terms.iterator().next().field());
}
public Query parse(final String aQuery, final String aSearchField) throws IOException {
final var theTokenizer = new QueryTokenizer(aQuery);
// Now we have the terms, lets construct the query
final var theResult = new BooleanQuery.Builder();
if (!theTokenizer.getRequiredTerms().isEmpty()) {
final List<SpanQuery> theSpans = new ArrayList<>();
for (final var theTerm : theTokenizer.getRequiredTerms()) {
if (QueryUtils.isWildCard(theTerm)) {
theSpans.add(new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(aSearchField, theTerm))));
} else if (QueryUtils.isFuzzy(theTerm)) {
theSpans.add(new SpanMultiTermQueryWrapper<>(new FuzzyQuery(new Term(aSearchField, theTerm))));
} else {
// Ok, we need to check of the token would be removed due to stopwords and so on
final var theTokenizedTerm = toToken(theTerm, aSearchField);
if (!StringUtils.isEmpty(theTokenizedTerm)) {
theSpans.add(new SpanTermQuery(new Term(aSearchField, theTokenizedTerm)));
}
}
}
if (theSpans.size() > 1) {
// This is the original span, so we boost it a lot
final SpanQuery theExactMatchQuery = new SpanNearQuery(theSpans.toArray(new SpanQuery[theSpans.size()]), 0, true);
theResult.add(new BoostQuery(theExactMatchQuery, 61), BooleanClause.Occur.SHOULD);
// We expect a maximum edit distance of 10 between the searched terms in any order
// This seems to be the most useful value
final var theMaxEditDistance = 10;
for (var theSlop = 0; theSlop < theMaxEditDistance; theSlop++) {
final SpanQuery theNearQuery = new SpanNearQuery(theSpans.toArray(new SpanQuery[theSpans.size()]), theSlop, false);
theResult.add(new BoostQuery(theNearQuery, 50 + theMaxEditDistance - theSlop), BooleanClause.Occur.SHOULD);
}
}
// Finally, we just add simple term queries, but do not boost them
// This makes sure that at least the searched terms
// are found in the document
addToBooleanQuery(theTokenizer.getRequiredTerms(), aSearchField, theResult, BooleanClause.Occur.MUST);
}
// Finally, add the terms that must not occur in the search result
addToBooleanQuery(theTokenizer.getNotRequiredTerms(), aSearchField, theResult, BooleanClause.Occur.MUST_NOT);
return theResult.build();
}