下面列出了怎么用org.apache.lucene.search.MultiPhraseQuery的API类实例代码及写法,或者点击链接到github查看源代码。
@Override
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException {
if (sourceQuery instanceof SpanTermQuery) {
super.flatten(new TermQuery(((SpanTermQuery) sourceQuery).getTerm()), reader, flatQueries, boost);
} else if (sourceQuery instanceof ConstantScoreQuery) {
flatten(((ConstantScoreQuery) sourceQuery).getQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof FunctionScoreQuery) {
flatten(((FunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof MultiPhrasePrefixQuery) {
flatten(sourceQuery.rewrite(reader), reader, flatQueries, boost);
} else if (sourceQuery instanceof FiltersFunctionScoreQuery) {
flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
} else if (sourceQuery instanceof MultiPhraseQuery) {
MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery);
convertMultiPhraseQuery(0, new int[q.getTermArrays().size()], q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
} else if (sourceQuery instanceof BlendedTermQuery) {
final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery;
flatten(blendedTermQuery.rewrite(reader), reader, flatQueries, boost);
} else {
super.flatten(sourceQuery, reader, flatQueries, boost);
}
}
private Query applySlop(Query q, int slop) {
if (q instanceof PhraseQuery) {
PhraseQuery pq = (PhraseQuery) q;
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.setSlop(slop);
final Term[] terms = pq.getTerms();
final int[] positions = pq.getPositions();
for (int i = 0; i < terms.length; ++i) {
builder.add(terms[i], positions[i]);
}
pq = builder.build();
pq.setBoost(q.getBoost());
return pq;
} else if (q instanceof MultiPhraseQuery) {
((MultiPhraseQuery) q).setSlop(slop);
return q;
} else {
return q;
}
}
public void testQueryScorerMultiPhraseQueryHighlightingWithGap() throws Exception {
MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
/*
* The toString of MultiPhraseQuery doesn't work so well with these
* out-of-order additions, but the Query itself seems to match accurately.
*/
mpqb.add(new Term[] { new Term(FIELD_NAME, "wordz") }, 2);
mpqb.add(new Term[] { new Term(FIELD_NAME, "wordx") }, 0);
doSearching(mpqb.build());
final int maxNumFragmentsRequired = 1;
final int expectedHighlights = 2;
assertExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights);
}
public void testSynonyms() throws IOException {
indexWriter.addDocument(newDoc("mother father w mom father w dad"));
initReaderSearcherHighlighter();
MultiPhraseQuery query = new MultiPhraseQuery.Builder()
.add(new Term[]{new Term("body", "mom"), new Term("body", "mother")})
.add(new Term[]{new Term("body", "dad"), new Term("body", "father")})
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>mother father</b> w <b>mom father</b> w dad"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>mother</b> <b>father</b> w <b>mom</b> <b>father</b> w dad"}, snippets);
}
}
private Query applySlop(Query q, int slop) {
if (q instanceof PhraseQuery) {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.setSlop(slop);
PhraseQuery pq = (PhraseQuery) q;
org.apache.lucene.index.Term[] terms = pq.getTerms();
int[] positions = pq.getPositions();
for (int i = 0; i < terms.length; ++i) {
builder.add(terms[i], positions[i]);
}
q = builder.build();
} else if (q instanceof MultiPhraseQuery) {
MultiPhraseQuery mpq = (MultiPhraseQuery)q;
if (slop != mpq.getSlop()) {
q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
}
}
return q;
}
public void testMultiPhraseQuery() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new CannedAnalyzer()));
Document doc = new Document();
doc.add(newTextField("field", "", Field.Store.NO));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
Query q = new StandardQueryParser(new CannedAnalyzer()).parse("\"a\"", "field");
assertTrue(q instanceof MultiPhraseQuery);
assertEquals(1, s.search(q, 10).totalHits.value);
r.close();
w.close();
dir.close();
}
public void testMultiPhraseQueryParsing() throws Exception {
TokenAndPos[] INCR_0_QUERY_TOKENS_AND = new TokenAndPos[]{
new TokenAndPos("a", 0),
new TokenAndPos("1", 0),
new TokenAndPos("b", 1),
new TokenAndPos("1", 1),
new TokenAndPos("c", 2)
};
QueryParser qp = new QueryParser("field", new CannedAnalyzer(INCR_0_QUERY_TOKENS_AND));
Query q = qp.parse("\"this text is acually ignored\"");
assertTrue("wrong query type!", q instanceof MultiPhraseQuery);
MultiPhraseQuery.Builder multiPhraseQueryBuilder = new MultiPhraseQuery.Builder();
multiPhraseQueryBuilder.add(new Term[]{ new Term("field", "a"), new Term("field", "1") }, -1);
multiPhraseQueryBuilder.add(new Term[]{ new Term("field", "b"), new Term("field", "1") }, 0);
multiPhraseQueryBuilder.add(new Term[]{ new Term("field", "c") }, 1);
assertEquals(multiPhraseQueryBuilder.build(), q);
}
private Query applySlop(Query q, int slop) {
if (q instanceof PhraseQuery) {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.setSlop(slop);
PhraseQuery pq = (PhraseQuery) q;
org.apache.lucene.index.Term[] terms = pq.getTerms();
int[] positions = pq.getPositions();
for (int i = 0; i < terms.length; ++i) {
builder.add(terms[i], positions[i]);
}
q = builder.build();
}
else if (q instanceof MultiPhraseQuery) {
MultiPhraseQuery mpq = (MultiPhraseQuery) q;
if (slop != mpq.getSlop()) {
q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
}
}
return q;
}
@Test
public void testMultiPhraseQuery() throws Exception {
// 多短语查询
Term[] terms = new Term[] { new Term("title", "NeverEnding"), new Term("title", "Xinghua,") };
Term term = new Term("title", "The");
// add之间认为是OR操作,即"NeverEnding", "Xinghua,"和"The"之间的slop不大于3
MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery.Builder().add(terms).add(term).setSlop(3).build();
TopDocs search = searcher.search(multiPhraseQuery, 1000);
Assert.assertEquals(2, search.totalHits.value);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
if (getBoost() != 1.0F) {
return super.rewrite(reader);
}
if (termArrays.isEmpty()) {
return new MatchNoDocsQuery();
}
MultiPhraseQuery query = new MultiPhraseQuery();
query.setSlop(slop);
int sizeMinus1 = termArrays.size() - 1;
for (int i = 0; i < sizeMinus1; i++) {
query.add(termArrays.get(i), positions.get(i));
}
Term[] suffixTerms = termArrays.get(sizeMinus1);
int position = positions.get(sizeMinus1);
ObjectHashSet<Term> terms = new ObjectHashSet<>();
for (Term term : suffixTerms) {
getPrefixTerms(terms, term, reader);
if (terms.size() > maxExpansions) {
break;
}
}
if (terms.isEmpty()) {
return Queries.newMatchNoDocsQuery();
}
query.add(terms.toArray(Term.class), position);
query.setBoost(getBoost());
return query.rewrite(reader);
}
public void testQueryScorerMultiPhraseQueryHighlighting() throws Exception {
MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
mpqb.add(new Term[] { new Term(FIELD_NAME, "wordx"), new Term(FIELD_NAME, "wordb") });
mpqb.add(new Term(FIELD_NAME, "wordy"));
doSearching(mpqb.build());
final int maxNumFragmentsRequired = 2;
assertExpectedHighlightCount(maxNumFragmentsRequired, 6);
}
public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException {
final String fieldName = "substring";
final MultiPhraseQuery mpq = new MultiPhraseQuery.Builder()
.add(new Term(fieldName, "uchu")).build();
assertHighlighting(mpq, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
}
@Override
public Query build(QueryNode queryNode) throws QueryNodeException {
SlopQueryNode phraseSlopNode = (SlopQueryNode) queryNode;
Query query = (Query) phraseSlopNode.getChild().getTag(
QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID);
if (query instanceof PhraseQuery) {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.setSlop(phraseSlopNode.getValue());
PhraseQuery pq = (PhraseQuery) query;
org.apache.lucene.index.Term[] terms = pq.getTerms();
int[] positions = pq.getPositions();
for (int i = 0; i < terms.length; ++i) {
builder.add(terms[i], positions[i]);
}
query = builder.build();
} else {
MultiPhraseQuery mpq = (MultiPhraseQuery)query;
int slop = phraseSlopNode.getValue();
if (slop != mpq.getSlop()) {
query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
}
}
return query;
}
/**
* Creates complex phrase query from the cached tokenstream contents
*/
protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
MultiPhraseQuery.Builder mpqb = newMultiPhraseQueryBuilder();
mpqb.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1;
List<Term> multiTerms = new ArrayList<>();
stream.reset();
while (stream.incrementToken()) {
int positionIncrement = posIncrAtt.getPositionIncrement();
if (positionIncrement > 0 && multiTerms.size() > 0) {
if (enablePositionIncrements) {
mpqb.add(multiTerms.toArray(new Term[0]), position);
} else {
mpqb.add(multiTerms.toArray(new Term[0]));
}
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(new Term(field, termAtt.getBytesRef()));
}
if (enablePositionIncrements) {
mpqb.add(multiTerms.toArray(new Term[0]), position);
} else {
mpqb.add(multiTerms.toArray(new Term[0]));
}
return mpqb.build();
}
/** forms multiphrase query */
public void testSynonymsPhrase() throws Exception {
MultiPhraseQuery.Builder expectedBuilder = new MultiPhraseQuery.Builder();
expectedBuilder.add(new Term("field", "old"));
expectedBuilder.add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") });
QueryBuilder builder = new QueryBuilder(new MockSynonymAnalyzer());
assertEquals(expectedBuilder.build(), builder.createPhraseQuery("field", "old dogs"));
}
/** forms multiphrase query */
public void testCJKSynonymsPhrase() throws Exception {
MultiPhraseQuery.Builder expectedBuilder = new MultiPhraseQuery.Builder();
expectedBuilder.add(new Term("field", "中"));
expectedBuilder.add(new Term[] { new Term("field", "国"), new Term("field", "國")});
QueryBuilder builder = new QueryBuilder(new MockCJKSynonymAnalyzer());
assertEquals(expectedBuilder.build(), builder.createPhraseQuery("field", "中国"));
expectedBuilder.setSlop(3);
assertEquals(expectedBuilder.build(), builder.createPhraseQuery("field", "中国", 3));
}
/**
* Base implementation delegates to {@link #getFieldQuery(String,String,boolean,boolean)}.
* This method may be overridden, for example, to return
* a SpanNearQuery instead of a PhraseQuery.
*
*/
protected Query getFieldQuery(String field, String queryText, int slop)
throws SyntaxError {
Query query = getFieldQuery(field, queryText, true, false);
// only set slop of the phrase query was a result of this parser
// and not a sub-parser.
if (subQParser == null) {
if (query instanceof PhraseQuery) {
PhraseQuery pq = (PhraseQuery) query;
Term[] terms = pq.getTerms();
int[] positions = pq.getPositions();
PhraseQuery.Builder builder = new PhraseQuery.Builder();
for (int i = 0; i < terms.length; ++i) {
builder.add(terms[i], positions[i]);
}
builder.setSlop(slop);
query = builder.build();
} else if (query instanceof MultiPhraseQuery) {
MultiPhraseQuery mpq = (MultiPhraseQuery)query;
if (slop != mpq.getSlop()) {
query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
}
}
}
return query;
}
@Override
protected MultiPhraseQuery newMultiPhraseQuery() {
return new MultiPhraseQuery() {
@Override
public void add(Term[] terms, int position) {
super.add(terms, position);
for (Term term : terms) {
String resolvedField = _fieldManager.resolveField(term.field());
customQueryCheck(resolvedField);
addField(this, resolvedField);
}
}
};
}
public static void assertEqualsQuery(Query expected, Query actual) {
assertEquals(expected.getClass(), actual.getClass());
if (expected instanceof BooleanQuery) {
assertEqualsBooleanQuery((BooleanQuery) expected, (BooleanQuery) actual);
} else if (expected instanceof SuperQuery) {
assertEqualsSuperQuery((SuperQuery) expected, (SuperQuery) actual);
} else if (expected instanceof TermQuery) {
assertEqualsTermQuery((TermQuery) expected, (TermQuery) actual);
} else if (expected instanceof PrefixQuery) {
assertEqualsPrefixQuery((PrefixQuery) expected, (PrefixQuery) actual);
} else if (expected instanceof WildcardQuery) {
assertEqualsWildcardQuery((WildcardQuery) expected, (WildcardQuery) actual);
} else if (expected instanceof FuzzyQuery) {
assertEqualsFuzzyQuery((FuzzyQuery) expected, (FuzzyQuery) actual);
} else if (expected instanceof RegexpQuery) {
assertEqualsRegexpQuery((RegexpQuery) expected, (RegexpQuery) actual);
} else if (expected instanceof TermRangeQuery) {
assertEqualsTermRangeQuery((TermRangeQuery) expected, (TermRangeQuery) actual);
} else if (expected instanceof MatchAllDocsQuery) {
assertEqualsMatchAllDocsQuery((MatchAllDocsQuery) expected, (MatchAllDocsQuery) actual);
} else if (expected instanceof MultiPhraseQuery) {
assertEqualsMultiPhraseQuery((MultiPhraseQuery) expected, (MultiPhraseQuery) actual);
} else if (expected instanceof PhraseQuery) {
assertEqualsPhraseQuery((PhraseQuery) expected, (PhraseQuery) actual);
} else if (expected instanceof NumericRangeQuery<?>) {
assertEqualsNumericRangeQuery((NumericRangeQuery<?>) expected, (NumericRangeQuery<?>) actual);
} else {
fail("Type [" + expected.getClass() + "] not supported");
}
}
@Override
public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
mpqb.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1;
List<Term> multiTerms = new ArrayList<>();
stream.reset();
while (stream.incrementToken()) {
int positionIncrement = posIncrAtt.getPositionIncrement();
if (positionIncrement > 0 && multiTerms.size() > 0) {
if (enablePositionIncrements) {
mpqb.add(multiTerms.toArray(new Term[0]), position);
} else {
mpqb.add(multiTerms.toArray(new Term[0]));
}
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(new Term(field, termAtt.getBytesRef()));
}
if (enablePositionIncrements) {
mpqb.add(multiTerms.toArray(new Term[0]), position);
} else {
mpqb.add(multiTerms.toArray(new Term[0]));
}
return mpqb.build();
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = super.rewrite(reader);
if (rewritten != this) {
return rewritten;
}
if (termArrays.isEmpty()) {
return new MatchNoDocsQuery();
}
MultiPhraseQuery.Builder query = new MultiPhraseQuery.Builder();
query.setSlop(slop);
int sizeMinus1 = termArrays.size() - 1;
for (int i = 0; i < sizeMinus1; i++) {
query.add(termArrays.get(i), positions.get(i));
}
Term[] suffixTerms = termArrays.get(sizeMinus1);
int position = positions.get(sizeMinus1);
ObjectHashSet<Term> terms = new ObjectHashSet<>();
for (Term term : suffixTerms) {
getPrefixTerms(terms, term, reader);
if (terms.size() > maxExpansions) {
break;
}
}
if (terms.isEmpty()) {
if (sizeMinus1 == 0) {
// no prefix and the phrase query is empty
return Queries.newMatchNoDocsQuery("No terms supplied for " + MultiPhrasePrefixQuery.class.getName());
}
// if the terms does not exist we could return a MatchNoDocsQuery but this would break the unified highlighter
// which rewrites query with an empty reader.
return new BooleanQuery.Builder()
.add(query.build(), BooleanClause.Occur.MUST)
.add(Queries.newMatchNoDocsQuery("No terms supplied for " + MultiPhrasePrefixQuery.class.getName()),
BooleanClause.Occur.MUST).build();
}
query.add(terms.toArray(Term.class), position);
return query.build();
}
private Query getQuery() {
try {
switch (type) {
case FIELD: // fallthrough
case PHRASE:
Query query;
if (val == null) {
query = super.getFieldQuery(field, vals, false);
} else {
query = super.getFieldQuery(field, val, type == QType.PHRASE, false);
}
// Boolean query on a whitespace-separated string
// If these were synonyms we would have a SynonymQuery
if (query instanceof BooleanQuery) {
if (type == QType.FIELD) { // Don't set mm for boolean query containing phrase queries
BooleanQuery bq = (BooleanQuery) query;
query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch, false);
}
} else if (query instanceof PhraseQuery) {
PhraseQuery pq = (PhraseQuery)query;
if (minClauseSize > 1 && pq.getTerms().length < minClauseSize) return null;
PhraseQuery.Builder builder = new PhraseQuery.Builder();
Term[] terms = pq.getTerms();
int[] positions = pq.getPositions();
for (int i = 0; i < terms.length; ++i) {
builder.add(terms[i], positions[i]);
}
builder.setSlop(slop);
query = builder.build();
} else if (query instanceof MultiPhraseQuery) {
MultiPhraseQuery mpq = (MultiPhraseQuery)query;
if (minClauseSize > 1 && mpq.getTermArrays().length < minClauseSize) return null;
if (slop != mpq.getSlop()) {
query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
}
} else if (query instanceof SpanQuery) {
return query;
} else if (minClauseSize > 1) {
// if it's not a type of phrase query, it doesn't meet the minClauseSize requirements
return null;
}
return query;
case PREFIX: return super.getPrefixQuery(field, val);
case WILDCARD: return super.getWildcardQuery(field, val);
case FUZZY: return super.getFuzzyQuery(field, val, flt);
case RANGE: return super.getRangeQuery(field, val, val2, bool, bool2);
}
return null;
} catch (Exception e) {
// an exception here is due to the field query not being compatible with the input text
// for example, passing a string to a numeric field.
return null;
}
}
private static void assertEqualsMultiPhraseQuery(MultiPhraseQuery expected, MultiPhraseQuery actual) {
throw new RuntimeException("Not Implemented");
}
/**
* Builds a new MultiPhraseQuery instance.
* <p>
* This is intended for subclasses that wish to customize the generated queries.
* @return new MultiPhraseQuery instance
*/
protected MultiPhraseQuery.Builder newMultiPhraseQueryBuilder() {
return new MultiPhraseQuery.Builder();
}