下面列出了怎么用org.apache.lucene.search.DisjunctionMaxQuery的API类实例代码及写法,或者点击链接到github查看源代码。
public Query combineGrouped(List<Query> queries) {
if (queries == null || queries.isEmpty()) {
return null;
}
if (queries.size() == 1) {
return queries.get(0);
}
if (groupDismax) {
return new DisjunctionMaxQuery(queries, tieBreaker);
} else {
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
for (Query query : queries) {
booleanQuery.add(query, BooleanClause.Occur.SHOULD);
}
return booleanQuery.build();
}
}
@Override
protected Query construct(LindenQuery lindenQuery, LindenConfig config) throws Exception {
if (!lindenQuery.isSetDisMaxQuery()) {
return null;
}
LindenDisMaxQuery disMaxQuery = lindenQuery.getDisMaxQuery();
DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery((float) disMaxQuery.getTie());
for (LindenQuery subLindenQuery : disMaxQuery.getQueries()) {
Query query = QueryConstructor.constructQuery(subLindenQuery, config);
if (query != null) {
disjunctionMaxQuery.add(query);
}
}
return disjunctionMaxQuery;
}
/**
* Split a query up into individual parts that can be indexed and run separately
*
* @param q the query
* @return a collection of subqueries
*/
public Set<Query> decompose(Query q) {
if (q instanceof BooleanQuery)
return decomposeBoolean((BooleanQuery) q);
if (q instanceof DisjunctionMaxQuery) {
Set<Query> subqueries = new HashSet<>();
for (Query subq : ((DisjunctionMaxQuery) q).getDisjuncts()) {
subqueries.addAll(decompose(subq));
}
return subqueries;
}
if (q instanceof BoostQuery) {
return decomposeBoostQuery((BoostQuery) q);
}
return Collections.singleton(q);
}
@Test
public void testExtractTwoSubqueries() {
Query q1 = mock(Query.class);
Query q2 = mock(Query.class);
DisjunctionQueryExtractor disjunctionQueryExtracotr = new DisjunctionQueryExtractor();
List<Query> disjunctQueries = new ArrayList<>();
disjunctQueries.add(q1);
disjunctQueries.add(q2);
DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(disjunctQueries, 0.0f);
List<Query> extractedQueries = new ArrayList<>();
disjunctionQueryExtracotr.extract(disjunctionMaxQuery, DEFAULT_EXTRACTORS, extractedQueries);
assertEquals(2, extractedQueries.size());
assertEquals(q1, extractedQueries.get(0));
assertEquals(q2, extractedQueries.get(1));
}
@Test
public void testExtractSubqueryField() {
Query q1 = new TermQuery(new Term("field1", "value1"));
Query q2 = new TermQuery(new Term("field2", "value2"));
DisjunctionQueryExtractor disjunctionQueryExtracotr = new DisjunctionQueryExtractor();
List<Query> disjunctQueries = new ArrayList<>();
disjunctQueries.add(q1);
disjunctQueries.add(q2);
DisjunctionMaxQuery disjunctionMaxQuery = new DisjunctionMaxQuery(disjunctQueries, 0.0f);
Set<String> extractedFieldNames = new HashSet<>();
disjunctionQueryExtracotr.extractSubQueriesFields(disjunctionMaxQuery, DEFAULT_EXTRACTORS, extractedFieldNames);
assertEquals(2, extractedFieldNames.size());
assertTrue(extractedFieldNames.contains("field1"));
assertTrue(extractedFieldNames.contains("field2"));
}
@Override
public Query handle(Query query, QueryTransformer queryTransformer) {
DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query;
boolean changed = false;
List<Query> innerQueries = new ArrayList<>();
for (Query innerQuery: disjunctionMaxQuery.getDisjuncts()) {
Query newInnerQuery = queryTransformer.transform(innerQuery);
if (newInnerQuery != innerQuery) {
changed = true;
innerQueries.add(newInnerQuery);
} else {
innerQueries.add(innerQuery);
}
}
if (changed) {
return new DisjunctionMaxQuery(innerQueries, disjunctionMaxQuery.getTieBreakerMultiplier());
}
return query;
}
/** Creates a multifield query */
// TODO: investigate more general approach by default, e.g. DisjunctionMaxQuery?
protected Query getMultiFieldQuery(List<Query> queries) throws ParseException {
if (queries.isEmpty()) {
return null; // all clause words were filtered away by the analyzer.
}
if (dismax) {
return new DisjunctionMaxQuery(queries, dismaxTie);
}
else {
//mdavis - don't use super method because of min match
BooleanQuery.Builder query = new BooleanQuery.Builder();
for (Query sub : queries) {
query.add(sub, BooleanClause.Occur.SHOULD);
}
return query.build();
}
}
public void testWildcardInDisjunctionMax() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
DisjunctionMaxQuery query = new DisjunctionMaxQuery(
Collections.singleton(new WildcardQuery(new Term("body", "te*"))), 0);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
String snippets[] = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
}
public void testDisjunctionMaxQuery() throws IOException {
final DisjunctionMaxQuery query = new DisjunctionMaxQuery(Arrays.asList(
new TermQuery(new Term(FIELD, "term1")), new PrefixQuery(new Term(FIELD, "term2"))
), 1.0f);
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", query));
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("term1 term2 term3"), HighlightsMatch.MATCHER);
HighlightsMatch m = matches.matches("1");
assertNotNull(m);
assertEquals(2, m.getHitCount());
}
}
public void testDisjunctionMaxDecomposition() {
Query q = new DisjunctionMaxQuery(
Arrays.asList(new TermQuery(new Term("f", "t1")), new TermQuery(new Term("f", "t2"))), 0.1f
);
Set<Query> expected = new HashSet<>(Arrays.asList(parse("f:t1"), parse("f:t2")));
assertEquals(expected, decomposer.decompose(q));
}
public void testNestedDisjunctionMaxDecomposition() {
Query q = new DisjunctionMaxQuery(
Arrays.asList(parse("hello goodbye"), parse("world")), 0.1f
);
Set<Query> expected = new HashSet<>(Arrays.asList(parse("hello"), parse("goodbye"), parse("world")));
assertEquals(expected, decomposer.decompose(q));
}
public void testDisjunctionMaxExtractor() {
Query query = new DisjunctionMaxQuery(
Arrays.asList(new TermQuery(new Term("f", "t1")), new TermQuery(new Term("f", "t2"))), 0.1f
);
Set<Term> expected = new HashSet<>(Arrays.asList(
new Term("f", "t1"),
new Term("f", "t2")
));
assertEquals(expected, collectTerms(query));
}
public void testDisjunctionMaxQueryXML() throws ParserException, IOException {
Query q = parse("DisjunctionMaxQuery.xml");
assertTrue(q instanceof DisjunctionMaxQuery);
DisjunctionMaxQuery d = (DisjunctionMaxQuery)q;
assertEquals(0.0f, d.getTieBreakerMultiplier(), 0.0001f);
assertEquals(2, d.getDisjuncts().size());
DisjunctionMaxQuery ndq = (DisjunctionMaxQuery) d.getDisjuncts().get(1);
assertEquals(0.3f, ndq.getTieBreakerMultiplier(), 0.0001f);
assertEquals(1, ndq.getDisjuncts().size());
}
public void testDMQHit() throws IOException {
Query query = new DisjunctionMaxQuery(
Arrays.asList(
new TermQuery(new Term("test", "hit"))),
0);
TopDocs topDocs = indexSearcher.search(query, 1);
assertEquals(1, topDocs.totalHits.value);
assertEquals(1, topDocs.scoreDocs.length);
assertTrue(topDocs.scoreDocs[0].score != 0);
}
public void testDMQHitOrMiss() throws IOException {
Query query = new DisjunctionMaxQuery(
Arrays.asList(
new TermQuery(new Term("test", "hit")),
new TermQuery(new Term("test", "miss"))),
0);
TopDocs topDocs = indexSearcher.search(query, 1);
assertEquals(1, topDocs.totalHits.value);
assertEquals(1, topDocs.scoreDocs.length);
assertTrue(topDocs.scoreDocs[0].score != 0);
}
public void testDMQHitOrEmpty() throws IOException {
Query query = new DisjunctionMaxQuery(
Arrays.asList(
new TermQuery(new Term("test", "hit")),
new TermQuery(new Term("empty", "miss"))),
0);
TopDocs topDocs = indexSearcher.search(query, 1);
assertEquals(1, topDocs.totalHits.value);
assertEquals(1, topDocs.scoreDocs.length);
assertTrue(topDocs.scoreDocs[0].score != 0);
}
public void testQuery() {
DisjunctionMaxQuery dismax = new DisjunctionMaxQuery(
Arrays.asList(new TermQuery(new Term("foo1", "bar1")), new TermQuery(new Term("baz1", "bam1"))), 1.0f);
BooleanQuery bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo2", "bar2")), BooleanClause.Occur.SHOULD)
.add(new PhraseQuery.Builder().add(new Term("foo3", "baz3")).build(), BooleanClause.Occur.MUST_NOT)
.add(dismax, BooleanClause.Occur.MUST)
.build();
long actual = sizeOf(bq);
long estimated = RamUsageEstimator.sizeOfObject(bq);
// sizeOfObject uses much lower default size estimate than we normally use
// but the query-specific default is so large that the comparison becomes meaningless.
assertEquals((double)actual, (double)estimated, (double)actual * 0.5);
}
@Override
protected Query newGraphSynonymQuery(Iterator<Query> sidePathQueriesIterator) {
switch (synonymQueryStyle) {
case PICK_BEST: {
List<Query> sidePathSynonymQueries = new LinkedList<>();
sidePathQueriesIterator.forEachRemaining(sidePathSynonymQueries::add);
return new DisjunctionMaxQuery(sidePathSynonymQueries, 0.0f);
}
case AS_SAME_TERM:
case AS_DISTINCT_TERMS:{
return super.newGraphSynonymQuery(sidePathQueriesIterator);}
default:
throw new AssertionError("unrecognized synonymQueryStyle passed when creating newSynonymQuery");
}
}
private boolean containsClause(Query query, String field, String value,
int boost, boolean fuzzy) {
float queryBoost = 1f;
if (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
query = bq.getQuery();
queryBoost = bq.getBoost();
}
if(query instanceof BooleanQuery) {
return containsClause((BooleanQuery)query, field, value, boost, fuzzy);
}
if(query instanceof DisjunctionMaxQuery) {
return containsClause((DisjunctionMaxQuery)query, field, value, boost, fuzzy);
}
if (boost != queryBoost) {
return false;
}
if(query instanceof TermQuery && !fuzzy) {
return containsClause((TermQuery)query, field, value);
}
if(query instanceof FuzzyQuery && fuzzy) {
return containsClause((FuzzyQuery)query, field, value);
}
return false;
}
private boolean containsClause(DisjunctionMaxQuery query, String field, String value, int boost, boolean fuzzy) {
for(Query disjunct:query.getDisjuncts()) {
if(containsClause(disjunct, field, value, boost, fuzzy)) {
return true;
}
}
return false;
}
@Override
public void extract(final DisjunctionMaxQuery q, final Iterable<QueryExtractor<? extends Query>> extractors,
final List<Query> extractedQueries) throws UnsupportedOperationException {
for (Query internalQuery : q) {
extractQuery(internalQuery, extractors, extractedQueries);
}
}
@Override
public void extractSubQueriesFields(final DisjunctionMaxQuery q,
final Iterable<QueryExtractor<? extends Query>> extractors,
final Set<String> extractedFields) throws UnsupportedOperationException {
for (final Query internalQuery : q) {
extractFields(internalQuery, extractors, extractedFields);
}
}
private Query combineGrouped(List<? extends Query> groupQuery) {
if (groupQuery == null || groupQuery.isEmpty()) {
return zeroTermsQuery();
}
if (groupQuery.size() == 1) {
return groupQuery.get(0);
}
List<Query> queries = new ArrayList<>();
for (Query query : groupQuery) {
queries.add(query);
}
return new DisjunctionMaxQuery(queries, tieBreaker);
}
public static <T extends Query> T assertDisjunctionSubQuery(Query query, Class<T> subqueryType, int i) {
assertThat(query, instanceOf(DisjunctionMaxQuery.class));
DisjunctionMaxQuery q = (DisjunctionMaxQuery) query;
assertThat(q.getDisjuncts().size(), greaterThan(i));
assertThat(q.getDisjuncts().get(i), instanceOf(subqueryType));
return subqueryType.cast(q.getDisjuncts().get(i));
}
protected Query dmq( float tieBreakerMultiplier, Query... queries ){
return new DisjunctionMaxQuery(Arrays.asList(queries), tieBreakerMultiplier);
}
public void testOverlapWithOffset() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
newIndexWriterConfig(null));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
// no positions!
customType.setStoreTermVectorOffsets(true);
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
try {
final DisjunctionMaxQuery query = new DisjunctionMaxQuery(
Arrays.asList(
new SpanTermQuery(new Term(FIELD, "{fox}")),
new SpanTermQuery(new Term(FIELD, "fox"))),
1);
// final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
// new SpanTermQuery(new Term(FIELD, "{fox}")),
// new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
TopDocs hits = indexSearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
final Highlighter highlighter = new Highlighter(
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
new QueryScorer(query));
final TokenStream tokenStream =
TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals("<B>the fox</B> did not jump",
highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
public void testOverlapWithPositionsAndOffset()
throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
newIndexWriterConfig(null));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final DisjunctionMaxQuery query = new DisjunctionMaxQuery(
Arrays.asList(
new SpanTermQuery(new Term(FIELD, "{fox}")),
new SpanTermQuery(new Term(FIELD, "fox"))),
1);
// final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
// new SpanTermQuery(new Term(FIELD, "{fox}")),
// new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
TopDocs hits = indexSearcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
final Highlighter highlighter = new Highlighter(
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
new QueryScorer(query));
final TokenStream tokenStream =
TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals("<B>the fox</B> did not jump",
highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
/**
* Default constructor. It only uses super class constructor giving as an argument query class.
*/
public DisjunctionQueryExtractor() {
super(DisjunctionMaxQuery.class);
}
@Override
public boolean accept(Query query) {
return query instanceof DisjunctionMaxQuery;
}
/**
* Checks the number of optional clauses in the query, and compares it
* with the specification string to determine the proper value to use.
* <p>
* If mmAutoRelax=true, we'll perform auto relaxation of mm if tokens
* are removed from some but not all DisMax clauses, as can happen when
* stopwords or punctuation tokens are removed in analysis.
* </p>
* <p>
* Details about the specification format can be found
* <a href="doc-files/min-should-match.html">here</a>
* </p>
*
* <p>A few important notes...</p>
* <ul>
* <li>
* If the calculations based on the specification determine that no
* optional clauses are needed, BooleanQuerysetMinMumberShouldMatch
* will never be called, but the usual rules about BooleanQueries
* still apply at search time (a BooleanQuery containing no required
* clauses must still match at least one optional clause)
* <li>
* <li>
* No matter what number the calculation arrives at,
* BooleanQuery.setMinShouldMatch() will never be called with a
* value greater then the number of optional clauses (or less then 1)
* </li>
* </ul>
*
* <p>:TODO: should optimize the case where number is same
* as clauses to just make them all "required"
* </p>
*
* @param q The query as a BooleanQuery.Builder
* @param spec The mm spec
* @param mmAutoRelax whether to perform auto relaxation of mm if tokens are removed from some but not all DisMax clauses
*/
public static void setMinShouldMatch(BooleanQuery.Builder q, String spec, boolean mmAutoRelax) {
int optionalClauses = 0;
int maxDisjunctsSize = 0;
int optionalDismaxClauses = 0;
for (BooleanClause c : q.build().clauses()) {
if (c.getOccur() == Occur.SHOULD) {
if (mmAutoRelax && c.getQuery() instanceof DisjunctionMaxQuery) {
int numDisjuncts = ((DisjunctionMaxQuery)c.getQuery()).getDisjuncts().size();
if (numDisjuncts>maxDisjunctsSize) {
maxDisjunctsSize = numDisjuncts;
optionalDismaxClauses = 1;
}
else if (numDisjuncts == maxDisjunctsSize) {
optionalDismaxClauses++;
}
} else {
optionalClauses++;
}
}
}
int msm = calculateMinShouldMatch(optionalClauses + optionalDismaxClauses, spec);
if (0 < msm) {
q.setMinimumNumberShouldMatch(msm);
}
}