下面列出了怎么用org.apache.lucene.search.spans.SpanOrQuery的API类实例代码及写法,或者点击链接到github查看源代码。
@Test
public void testSpanContainingQuery() throws Exception {
// twenty WITHIN ((one OR hundred) NEAR two)~2
SpanContainingQuery q = new SpanContainingQuery(
new SpanNearQuery(new SpanQuery[]{
new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))),
new SpanTermQuery(new Term("field", "two"))
}, 2, true),
new SpanTermQuery(new Term("field", "twenty"))
);
checkQuery(q, new AveragePayloadFunction(), new int[] { 222, 122 }, new float[]{ 4.0f, 3.666666f });
checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 4.0f, 4.0f });
checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 4.0f, 2.0f });
}
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
String value = DOMUtils.getNonBlankTextOrFail(e);
List<SpanQuery> clausesList = new ArrayList<>();
try (TokenStream ts = analyzer.tokenStream(fieldName, value)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
ts.reset();
while (ts.incrementToken()) {
SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef())));
clausesList.add(stq);
}
ts.end();
SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
return new SpanBoostQuery(soq, boost);
}
catch (IOException ioe) {
throw new ParserException("IOException parsing value:" + value);
}
}
public SpanQuery makeSpanClause() {
SpanQuery [] spanQueries = new SpanQuery[size()];
Iterator<SpanQuery> sqi = weightBySpanQuery.keySet().iterator();
int i = 0;
while (sqi.hasNext()) {
SpanQuery sq = sqi.next();
float boost = weightBySpanQuery.get(sq);
if (boost != 1f) {
sq = new SpanBoostQuery(sq, boost);
}
spanQueries[i++] = sq;
}
if (spanQueries.length == 1)
return spanQueries[0];
else
return new SpanOrQuery(spanQueries);
}
@Override
public SpanOrQuery build(QueryNode node) throws QueryNodeException {
// validates node
BooleanQueryNode booleanNode = (BooleanQueryNode) node;
List<QueryNode> children = booleanNode.getChildren();
SpanQuery[] spanQueries = new SpanQuery[children.size()];
int i = 0;
for (QueryNode child : children) {
spanQueries[i++] = (SpanQuery) child
.getTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID);
}
return new SpanOrQuery(spanQueries);
}
@Override
public MtasSpanQuery rewrite(IndexReader reader) throws IOException {
Query q = query.rewrite(reader);
if (q instanceof SpanOrQuery) {
SpanQuery[] clauses = ((SpanOrQuery) q).getClauses();
MtasSpanQuery[] newClauses = new MtasSpanQuery[clauses.length];
for (int i = 0; i < clauses.length; i++) {
if (clauses[i] instanceof SpanTermQuery) {
newClauses[i] = new MtasSpanTermQuery((SpanTermQuery) clauses[i],
singlePosition).rewrite(reader);
} else {
throw new IOException("no SpanTermQuery after rewrite");
}
}
return new MtasSpanOrQuery(newClauses).rewrite(reader);
} else {
throw new IOException("no SpanOrQuery after rewrite");
}
}
@Override
public MtasSpanQuery rewrite(IndexReader reader) throws IOException {
Query q = query.rewrite(reader);
if (q instanceof SpanOrQuery) {
SpanQuery[] clauses = ((SpanOrQuery) q).getClauses();
if (clauses.length > MTAS_WILDCARD_EXPAND_BOUNDARY) {
// forward index solution ?
throw new IOException("Wildcard expression \""
+ CodecUtil.termValue(term.text()) + "\" expands to "
+ clauses.length + " terms, too many (boundary "
+ MTAS_WILDCARD_EXPAND_BOUNDARY + ")!");
}
MtasSpanQuery[] newClauses = new MtasSpanQuery[clauses.length];
for (int i = 0; i < clauses.length; i++) {
if (clauses[i] instanceof SpanTermQuery) {
newClauses[i] = new MtasSpanTermQuery((SpanTermQuery) clauses[i],
singlePosition);
} else {
throw new IOException("no SpanTermQuery after rewrite");
}
}
return new MtasSpanOrQuery(newClauses).rewrite(reader);
} else {
throw new IOException("no SpanOrQuery after rewrite");
}
}
@Override
public MtasSpanQuery rewrite(IndexReader reader) throws IOException {
Query q = query.rewrite(reader);
if (q instanceof SpanOrQuery) {
SpanQuery[] clauses = ((SpanOrQuery) q).getClauses();
if (clauses.length > MTAS_REGEXP_EXPAND_BOUNDARY) {
// forward index solution ?
throw new IOException("Regexp \"" + CodecUtil.termValue(term.text())
+ "\" expands to " + clauses.length + " terms, too many (boundary "
+ MTAS_REGEXP_EXPAND_BOUNDARY + ")!");
}
MtasSpanQuery[] newClauses = new MtasSpanQuery[clauses.length];
for (int i = 0; i < clauses.length; i++) {
if (clauses[i] instanceof SpanTermQuery) {
newClauses[i] = new MtasSpanTermQuery((SpanTermQuery) clauses[i],
singlePosition).rewrite(reader);
} else {
throw new IOException("no SpanTermQuery after rewrite");
}
}
return new MtasSpanOrQuery(newClauses).rewrite(reader);
} else {
throw new IOException("no SpanOrQuery after rewrite");
}
}
private Query createRegexQuery ( QueryDef qd ) {
BooleanQuery query = new BooleanQuery();
List<SpanQuery> spanClausesList = new ArrayList<SpanQuery>();
String[] queryStrings;
SpanQuery[] spanClausesArray;
RegexQuery regexQuery;
for ( String fld : qd.fields ) {
spanClausesList.clear();
queryStrings = qd.query.split(" ");
spanClausesArray = new SpanQuery[queryStrings.length];
for ( String subquery : queryStrings ) {
regexQuery = new RegexQuery( new Term( fld, subquery ) );
regexQuery.setRegexImplementation( new JavaUtilRegexCapabilities() );
//if emptyable, like a query '(optional)?' or 'bla|a*', make span optional by wrapping it SpanOrQuery
if(Pattern.matches(subquery, "")){
spanClausesList.add( new SpanOrQuery( new SpanMultiTermQueryWrapper<RegexQuery>( regexQuery ) ) );
} else {
spanClausesList.add( new SpanMultiTermQueryWrapper<RegexQuery>( regexQuery ) );
}
}
spanClausesList.toArray( spanClausesArray );
query.add( new SpanNearQuery( spanClausesArray, 0, true), Occur.SHOULD );
}
return query;
}
public void testSpanOr() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
Query query = new SpanOrQuery(new SpanQuery[]{childQuery});
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
String snippets[] = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
}
public void testPassesIfWrapped() throws IOException {
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<>(regex));
MemoryIndex mindex = randomMemoryIndex();
mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there"));
// This passes though
assertEquals(0, mindex.search(wrappedquery), 0.00001f);
TestUtil.checkReader(mindex.createSearcher().getIndexReader());
}
@Test
public void testOrQuery() throws IOException {
SpanOrQuery q = new SpanOrQuery(new SpanTermQuery(new Term("field", "eighteen")),
new SpanTermQuery(new Term("field", "nineteen")));
for (PayloadFunction fn
: new PayloadFunction[]{ new AveragePayloadFunction(), new MaxPayloadFunction(), new MinPayloadFunction() }) {
checkQuery(q, fn, new int[]{ 118, 119, 218, 219, 18, 19 },
new float[] { 4.0f, 4.0f, 4.0f, 4.0f, 2.0f, 2.0f });
}
}
@Test
public void testNestedNearQuery() throws Exception {
// (one OR hundred) NEAR (twenty two) ~ 1
// 2 4 4 4
// one hundred twenty two
// two hundred twenty two
SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))),
new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term("field", "twenty")),
new SpanTermQuery(new Term("field", "two"))
}, 0, true)
}, 1, true);
// check includeSpanScore makes a difference here
searcher.setSimilarity(new ClassicSimilarity());
try {
checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 20.901256561279297f, 17.06580352783203f });
checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 17.06580352783203f, 10.450628280639648f });
checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222 }, new float[]{ 19.15948486328125f, 17.06580352783203f });
checkQuery(q, new MaxPayloadFunction(), false, new int[]{122, 222}, new float[]{4.0f, 4.0f});
checkQuery(q, new MinPayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 2.0f});
checkQuery(q, new AveragePayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 3.666666f});
}
finally {
searcher.setSimilarity(similarity);
}
}
public void testSpanOrExtractor() {
SpanOrQuery or = new SpanOrQuery(new SpanTermQuery(new Term("field", "term1")),
new SpanTermQuery(new Term("field", "term2")));
Set<Term> expected = new HashSet<>(Arrays.asList(
new Term("field", "term1"),
new Term("field", "term2")
));
assertEquals(expected, collectTerms(or));
}
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
List<SpanQuery> clausesList = new ArrayList<>();
for (Node kid = e.getFirstChild(); kid != null; kid = kid.getNextSibling()) {
if (kid.getNodeType() == Node.ELEMENT_NODE) {
SpanQuery clause = factory.getSpanQuery((Element) kid);
clausesList.add(clause);
}
}
SpanQuery[] clauses = clausesList.toArray(new SpanQuery[clausesList.size()]);
SpanOrQuery soq = new SpanOrQuery(clauses);
float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
return new SpanBoostQuery(soq, boost);
}
public void testOrSpans() throws Exception {
assertEquals(getSpanQuery("term1 term2").toString(),
"spanOr([term1, term2])");
assertEquals(getSpanQuery("term1 OR term2").toString(),
"spanOr([term1, term2])");
assertTrue(getSpanQuery("term1 term2") instanceof SpanOrQuery);
assertTrue(getSpanQuery("term1 term2") instanceof SpanOrQuery);
}
/** make sure all sims work with spanOR(termX, termY) where termY does not exist */
public void testCrazySpans() throws Exception {
// historically this was a problem, but sim's no longer have to score terms that dont exist
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
doc.add(newField("foo", "bar", ft));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
for (Similarity sim : sims) {
is.setSimilarity(sim);
SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar"));
SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz"));
Query query = new SpanOrQuery(s1, s2);
TopDocs td = is.search(query, 10);
assertEquals(1, td.totalHits.value);
float score = td.scoreDocs[0].score;
assertFalse("negative score for " + sim, score < 0.0f);
assertFalse("inf score for " + sim, Float.isInfinite(score));
assertFalse("nan score for " + sim, Float.isNaN(score));
}
ir.close();
dir.close();
}
public void testSpanQuery() throws IOException {
SpanQuery subq = SpanNearQuery.newOrderedNearQuery(FIELD_WITH_OFFSETS)
.addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "with")))
.addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "many")))
.build();
Query q = SpanNearQuery.newOrderedNearQuery(FIELD_WITH_OFFSETS)
.addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "sentence")))
.addClause(new SpanOrQuery(subq, new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "iterations"))))
.build();
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
{ 0 }, { 1 }, { 2 }, { 3 },
{ 4, 2, 4, 9, 27, 6, 7, 35, 54 }
});
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 0, 0, 0, 0, 1 });
checkTermMatches(q, FIELD_WITH_OFFSETS, new TermMatch[][][]{
{}, {}, {}, {},
{
{
new TermMatch(2, 9, 17),
new TermMatch(3, 18, 22),
new TermMatch(4, 23, 27)
}, {
new TermMatch(6, 35, 43), new TermMatch(7, 44, 54)
}
}
});
}
public void testHandySpanQuery() throws IOException, ParserException {
final String lhsXml = "<SpanOr fieldName='contents'>"
+ "<SpanTerm>rain</SpanTerm>"
+ "<SpanTerm>spain</SpanTerm>"
+ "<SpanTerm>plain</SpanTerm>"
+ "</SpanOr>";
final String rhsXml = "<SpanNear fieldName='contents' slop='2' inOrder='true'>"
+ "<SpanTerm>sunny</SpanTerm>"
+ "<SpanTerm>sky</SpanTerm>"
+ "</SpanNear>";
final Query query = parseHandyQuery(lhsXml, rhsXml);
final BooleanQuery bq = (BooleanQuery)query;
assertEquals(2, bq.clauses().size());
for (int ii=0; ii<bq.clauses().size(); ++ii) {
final Query clauseQuery = bq.clauses().get(ii).getQuery();
switch (ii) {
case 0:
assertTrue(unwrapSpanBoostQuery(clauseQuery) instanceof SpanOrQuery);
break;
case 1:
assertTrue(unwrapSpanBoostQuery(clauseQuery) instanceof SpanNearQuery);
break;
default:
fail("unexpected clause index "+ii);
}
}
}
public void testCustomQueryWrapping() throws IOException, ParserException {
final boolean span = random().nextBoolean();
// the custom queries
final String fieldName = "contents";
final String[] randomTerms = new String[] {"bumble", "honey", "solitary"};
final String randomQuery = composeChooseOneWordQueryXml(fieldName, randomTerms);
final String apacheLuceneSolr = "<ApacheLuceneSolr fieldName='"+fieldName+"'/>";
// the wrapping query
final String parentQuery = (span ? "SpanOr" : "BooleanQuery");
final String subQueryPrefix = (span ? "" : "<Clause occurs='must'>");
final String subQuerySuffix = (span ? "" : "</Clause>");
final String xml = "<"+parentQuery+">"
+ subQueryPrefix+randomQuery+subQuerySuffix
+ subQueryPrefix+apacheLuceneSolr+subQuerySuffix
+ "</"+parentQuery+">";
// the test
final Query query = parseXmlString(xml);
if (span) {
assertTrue(unwrapSpanBoostQuery(query) instanceof SpanOrQuery);
final SpanOrQuery soq = (SpanOrQuery)unwrapSpanBoostQuery(query);
assertEquals(2, soq.getClauses().length);
checkChooseOneWordQuery(span, soq.getClauses()[0], fieldName, randomTerms);
checkApacheLuceneSolr(soq.getClauses()[1], fieldName);
} else {
assertTrue(query instanceof BooleanQuery);
final BooleanQuery bq = (BooleanQuery)query;
assertEquals(2, bq.clauses().size());
checkChooseOneWordQuery(span, bq.clauses().get(0).getQuery(), fieldName, randomTerms);
checkApacheLuceneSolr(bq.clauses().get(1).getQuery(), fieldName);
}
}
public SpanQuery getSpanQuery(Element e) throws ParserException {
SpanQuery subQueries[] = {
getSubSpanQuery(e, "Left"),
getSubSpanQuery(e, "Right"),
};
return new SpanOrQuery(subQueries);
}
/**
* Instantiates a new mtas span or query.
*
* @param initialClauses the initial clauses
*/
public MtasSpanOrQuery(MtasSpanQuery... initialClauses) {
super(null, null);
Integer minimum = null;
Integer maximum = null;
clauses = new HashSet<>();
for (MtasSpanQuery item : initialClauses) {
if (!clauses.contains(item)) {
if (clauses.isEmpty()) {
minimum = item.getMinimumWidth();
maximum = item.getMaximumWidth();
} else {
if (minimum != null && item.getMinimumWidth() != null) {
minimum = Math.min(minimum, item.getMinimumWidth());
} else {
minimum = null;
}
if (maximum != null && item.getMaximumWidth() != null) {
maximum = Math.max(maximum, item.getMaximumWidth());
} else {
maximum = null;
}
}
clauses.add(item);
}
}
setWidth(minimum, maximum);
baseQuery = new SpanOrQuery(
clauses.toArray(new MtasSpanQuery[clauses.size()]));
}
public void testOrTerm() throws Exception {
SpanOrQuery q = new SpanOrQuery(
new SpanTermQuery(new Term(FIELD, "xx")), new SpanTermQuery(new Term(FIELD, "yy"))
);
testAllFunctions(q, new int[]{ 2, 3 });
}
public SearchResult search(SearchCriteria criteria, List<MusicFolder> musicFolders, IndexType indexType) {
SearchResult result = new SearchResult();
int offset = criteria.getOffset();
int count = criteria.getCount();
result.setOffset(offset);
IndexReader reader = null;
try {
reader = createIndexReader(indexType);
Searcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new SubsonicAnalyzer();
MultiFieldQueryParser queryParser = new MultiFieldQueryParser(LUCENE_VERSION, indexType.getFields(), analyzer, indexType.getBoosts());
BooleanQuery query = new BooleanQuery();
query.add(queryParser.parse(analyzeQuery(criteria.getQuery())), BooleanClause.Occur.MUST);
List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>();
for (MusicFolder musicFolder : musicFolders) {
if (indexType == ALBUM_ID3 || indexType == ARTIST_ID3) {
musicFolderQueries.add(new SpanTermQuery(new Term(FIELD_FOLDER_ID, NumericUtils.intToPrefixCoded(musicFolder.getId()))));
} else {
musicFolderQueries.add(new SpanTermQuery(new Term(FIELD_FOLDER, musicFolder.getPath().getPath())));
}
}
query.add(new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()])), BooleanClause.Occur.MUST);
TopDocs topDocs = searcher.search(query, null, offset + count);
result.setTotalHits(topDocs.totalHits);
int start = Math.min(offset, topDocs.totalHits);
int end = Math.min(start + count, topDocs.totalHits);
for (int i = start; i < end; i++) {
Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
switch (indexType) {
case SONG:
case ARTIST:
case ALBUM:
MediaFile mediaFile = mediaFileService.getMediaFile(Integer.valueOf(doc.get(FIELD_ID)));
addIfNotNull(mediaFile, result.getMediaFiles());
break;
case ARTIST_ID3:
Artist artist = artistDao.getArtist(Integer.valueOf(doc.get(FIELD_ID)));
addIfNotNull(artist, result.getArtists());
break;
case ALBUM_ID3:
Album album = albumDao.getAlbum(Integer.valueOf(doc.get(FIELD_ID)));
addIfNotNull(album, result.getAlbums());
break;
default:
break;
}
}
} catch (Throwable x) {
LOG.error("Failed to execute Lucene search.", x);
} finally {
FileUtil.closeQuietly(reader);
}
return result;
}