下面列出了怎么用org.apache.lucene.search.RegexpQuery的API类实例代码及写法,或者点击链接到github查看源代码。
public void testRegexpExtractor() {
Set<Term> expected = new HashSet<>(Arrays.asList(
new Term("field", "califragilisticXX"),
new Term("field", "WILDCARD")));
assertEquals(expected, collectTerms(new RegexpQuery(new Term("field", "super.*califragilistic"))));
expected = new HashSet<>(Arrays.asList(
new Term("field", "hellXX"),
new Term("field", "WILDCARD")));
assertEquals(expected, collectTerms(new RegexpQuery(new Term("field", "hell."))));
expected = new HashSet<>(Arrays.asList(
new Term("field", "heXX"),
new Term("field", "WILDCARD")));
assertEquals(expected, collectTerms(new RegexpQuery(new Term("field", "hel?o"))));
}
@Override
public RegexpQuery build(QueryNode queryNode) throws QueryNodeException {
RegexpQueryNode regexpNode = (RegexpQueryNode) queryNode;
// TODO: make the maxStates configurable w/ a reasonable default (QueryParserBase uses 10000)
RegexpQuery q = new RegexpQuery(new Term(regexpNode.getFieldAsString(),
regexpNode.textToBytesRef()));
MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod) queryNode
.getTag(MultiTermRewriteMethodProcessor.TAG_ID);
if (method != null) {
q.setRewriteMethod(method);
}
return q;
}
public void testNoSuchMultiTermsInSpanFirst() throws Exception {
//this hasn't been a problem
FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
SpanQuery spanFirst = new SpanFirstQuery(spanNoSuch, 10);
assertEquals(0, searcher.count(spanFirst));
WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
spanFirst = new SpanFirstQuery(spanWCNoSuch, 10);
assertEquals(0, searcher.count(spanFirst));
RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
spanFirst = new SpanFirstQuery(spanRgxNoSuch, 10);
assertEquals(0, searcher.count(spanFirst));
PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
spanFirst = new SpanFirstQuery(spanPrfxNoSuch, 10);
assertEquals(0, searcher.count(spanFirst));
}
/**
* Instantiates a new mtas span regexp query.
*
* @param term the term
* @param singlePosition the single position
*/
public MtasSpanRegexpQuery(Term term, boolean singlePosition) {
super(singlePosition ? 1 : null, singlePosition ? 1 : null);
RegexpQuery req = new RegexpQuery(term);
query = new SpanMultiTermQueryWrapper<>(req);
this.term = term;
this.singlePosition = singlePosition;
int i = term.text().indexOf(MtasToken.DELIMITER);
if (i >= 0) {
prefix = term.text().substring(0, i);
value = term.text().substring((i + MtasToken.DELIMITER.length()));
value = (value.length() > 0) ? value : null;
} else {
prefix = term.text();
value = null;
}
}
@Test
public void testRegexpQuery() throws Exception {
// 正则查询
RegexpQuery query = new RegexpQuery(new Term("title", "To[a-z]"));
TopDocs search = searcher.search(query, 1000);
Assert.assertEquals(7, search.totalHits.value);
}
public void testOneRegexp() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
Query query = new RegexpQuery(new Term("body", "te.*"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
String snippets[] = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// wrong field
highlighter.setFieldMatcher(null);//default
BooleanQuery bq = new BooleanQuery.Builder()
.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
.add(new RegexpQuery(new Term("bogus", "te.*")), BooleanClause.Occur.SHOULD)
.build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
snippets = highlighter.highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
ir.close();
}
public void testNullPointerException() throws IOException {
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
SpanQuery wrappedquery = new SpanMultiTermQueryWrapper<>(regex);
MemoryIndex mindex = randomMemoryIndex();
mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there"));
// This throws an NPE
assertEquals(0, mindex.search(wrappedquery), 0.00001f);
TestUtil.checkReader(mindex.createSearcher().getIndexReader());
}
public void testPassesIfWrapped() throws IOException {
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<>(regex));
MemoryIndex mindex = randomMemoryIndex();
mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there"));
// This passes though
assertEquals(0, mindex.search(wrappedquery), 0.00001f);
TestUtil.checkReader(mindex.createSearcher().getIndexReader());
}
@Override
public QueryTree handleQuery(Query q, TermWeightor termWeightor) {
if (q instanceof RegexpQuery == false) {
return null;
}
RegexpQuery query = (RegexpQuery) q;
String regexp = parseOutRegexp(query.toString(""));
String selected = selectLongestSubstring(regexp);
Term term = new Term(query.getField(), selected + ngramSuffix);
double weight = termWeightor.applyAsDouble(term);
return new QueryTree() {
@Override
public double weight() {
return weight;
}
@Override
public void collectTerms(BiConsumer<String, BytesRef> termCollector) {
termCollector.accept(term.field(), term.bytes());
termCollector.accept(term.field(), wildcardTokenBytes);
}
@Override
public boolean advancePhase(double minWeight) {
return false;
}
@Override
public String toString(int depth) {
return space(depth) + "WILDCARD_NGRAM[" + term.toString() + "]^" + weight;
}
};
}
public void testOrderedNearWithWildcardExtractor() {
SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
new SpanMultiTermQueryWrapper<>(new RegexpQuery(new Term("field", "super.*cali.*"))),
new SpanTermQuery(new Term("field", "is"))
}, 0, true);
Set<Term> expected = Collections.singleton(new Term("field", "is"));
assertEquals(expected, collectTerms(q));
}
public void testWildcards() throws IOException {
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", new RegexpQuery(new Term(FIELD, "he.*"))));
MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("hello world"), HighlightsMatch.MATCHER);
assertEquals(1, matches.getQueriesRun());
assertEquals(1, matches.getMatchCount());
assertEquals(1, matches.matches("1").getHitCount());
}
}
public void testSimpleRegex() throws ParseException {
String[] fields = new String[] {"a", "b"};
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new MockAnalyzer(random()));
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new RegexpQuery(new Term("a", "[a-z][123]")), Occur.SHOULD);
bq.add(new RegexpQuery(new Term("b", "[a-z][123]")), Occur.SHOULD);
assertEquals(bq.build(), mfqp.parse("/[a-z][123]/"));
}
public void testNoSuchMultiTermsInNear() throws Exception {
//test to make sure non existent multiterms aren't throwing null pointer exceptions
FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
SpanQuery term = new SpanTermQuery(new Term("field", "brown"));
SpanQuery near = new SpanNearQuery(new SpanQuery[]{term, spanNoSuch}, 1, true);
assertEquals(0, searcher.count(near));
//flip order
near = new SpanNearQuery(new SpanQuery[]{spanNoSuch, term}, 1, true);
assertEquals(0, searcher.count(near));
WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
near = new SpanNearQuery(new SpanQuery[]{term, spanWCNoSuch}, 1, true);
assertEquals(0, searcher.count(near));
RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
near = new SpanNearQuery(new SpanQuery[]{term, spanRgxNoSuch}, 1, true);
assertEquals(0, searcher.count(near));
PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
near = new SpanNearQuery(new SpanQuery[]{term, spanPrfxNoSuch}, 1, true);
assertEquals(0, searcher.count(near));
//test single noSuch
near = new SpanNearQuery(new SpanQuery[]{spanPrfxNoSuch}, 1, true);
assertEquals(0, searcher.count(near));
//test double noSuch
near = new SpanNearQuery(new SpanQuery[]{spanPrfxNoSuch, spanPrfxNoSuch}, 1, true);
assertEquals(0, searcher.count(near));
}
public void testNoSuchMultiTermsInNotNear() throws Exception {
//test to make sure non existent multiterms aren't throwing non-matching field exceptions
FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
SpanQuery term = new SpanTermQuery(new Term("field", "brown"));
SpanNotQuery notNear = new SpanNotQuery(term, spanNoSuch, 0,0);
assertEquals(1, searcher.count(notNear));
//flip
notNear = new SpanNotQuery(spanNoSuch, term, 0,0);
assertEquals(0, searcher.count(notNear));
//both noSuch
notNear = new SpanNotQuery(spanNoSuch, spanNoSuch, 0,0);
assertEquals(0, searcher.count(notNear));
WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
notNear = new SpanNotQuery(term, spanWCNoSuch, 0,0);
assertEquals(1, searcher.count(notNear));
RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
notNear = new SpanNotQuery(term, spanRgxNoSuch, 1, 1);
assertEquals(1, searcher.count(notNear));
PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
notNear = new SpanNotQuery(term, spanPrfxNoSuch, 1, 1);
assertEquals(1, searcher.count(notNear));
}
public void testNoSuchMultiTermsInOr() throws Exception {
//test to make sure non existent multiterms aren't throwing null pointer exceptions
FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
SpanQuery term = new SpanTermQuery(new Term("field", "brown"));
SpanOrQuery near = new SpanOrQuery(new SpanQuery[]{term, spanNoSuch});
assertEquals(1, searcher.count(near));
//flip
near = new SpanOrQuery(new SpanQuery[]{spanNoSuch, term});
assertEquals(1, searcher.count(near));
WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
near = new SpanOrQuery(new SpanQuery[]{term, spanWCNoSuch});
assertEquals(1, searcher.count(near));
RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
near = new SpanOrQuery(new SpanQuery[]{term, spanRgxNoSuch});
assertEquals(1, searcher.count(near));
PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
near = new SpanOrQuery(new SpanQuery[]{term, spanPrfxNoSuch});
assertEquals(1, searcher.count(near));
near = new SpanOrQuery(new SpanQuery[]{spanPrfxNoSuch});
assertEquals(0, searcher.count(near));
near = new SpanOrQuery(new SpanQuery[]{spanPrfxNoSuch, spanPrfxNoSuch});
assertEquals(0, searcher.count(near));
}
/**
* Builds a new RegexpQuery instance
* @param regexp Regexp term
* @return new RegexpQuery instance
*/
protected Query newRegexpQuery(Term regexp) {
RegexpQuery query = new RegexpQuery(regexp);
SchemaField sf = schema.getField(regexp.field());
query.setRewriteMethod(sf.getType().getRewriteMethod(parser, sf));
return query;
}
public static void assertEqualsQuery(Query expected, Query actual) {
assertEquals(expected.getClass(), actual.getClass());
if (expected instanceof BooleanQuery) {
assertEqualsBooleanQuery((BooleanQuery) expected, (BooleanQuery) actual);
} else if (expected instanceof SuperQuery) {
assertEqualsSuperQuery((SuperQuery) expected, (SuperQuery) actual);
} else if (expected instanceof TermQuery) {
assertEqualsTermQuery((TermQuery) expected, (TermQuery) actual);
} else if (expected instanceof PrefixQuery) {
assertEqualsPrefixQuery((PrefixQuery) expected, (PrefixQuery) actual);
} else if (expected instanceof WildcardQuery) {
assertEqualsWildcardQuery((WildcardQuery) expected, (WildcardQuery) actual);
} else if (expected instanceof FuzzyQuery) {
assertEqualsFuzzyQuery((FuzzyQuery) expected, (FuzzyQuery) actual);
} else if (expected instanceof RegexpQuery) {
assertEqualsRegexpQuery((RegexpQuery) expected, (RegexpQuery) actual);
} else if (expected instanceof TermRangeQuery) {
assertEqualsTermRangeQuery((TermRangeQuery) expected, (TermRangeQuery) actual);
} else if (expected instanceof MatchAllDocsQuery) {
assertEqualsMatchAllDocsQuery((MatchAllDocsQuery) expected, (MatchAllDocsQuery) actual);
} else if (expected instanceof MultiPhraseQuery) {
assertEqualsMultiPhraseQuery((MultiPhraseQuery) expected, (MultiPhraseQuery) actual);
} else if (expected instanceof PhraseQuery) {
assertEqualsPhraseQuery((PhraseQuery) expected, (PhraseQuery) actual);
} else if (expected instanceof NumericRangeQuery<?>) {
assertEqualsNumericRangeQuery((NumericRangeQuery<?>) expected, (NumericRangeQuery<?>) actual);
} else {
fail("Type [" + expected.getClass() + "] not supported");
}
}
private static Term getTerm(RegexpQuery regexpQuery) {
try {
Field field = AutomatonQuery.class.getDeclaredField("term");
field.setAccessible(true);
return (Term) field.get(regexpQuery);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
protected Query literalMatchesAnyArrayRef(Literal candidate, Reference array, LuceneQueryBuilder.Context context) throws IOException {
String regexString = LikeOperators.patternToRegex((String) candidate.value(), LikeOperators.DEFAULT_ESCAPE, false);
regexString = regexString.substring(1, regexString.length() - 1);
String notLike = negateWildcard(regexString);
return new RegexpQuery(new Term(
array.column().fqn(),
notLike),
RegexpFlag.COMPLEMENT.value()
);
}
/**
* Make sure we still sport the fast Lucene regular
* expression engine when not using PCRE features.
*/
@Test
public void testRegexQueryFast() throws Exception {
Query query = convert("name ~ '[a-z]'");
assertThat(query, instanceOf(ConstantScoreQuery.class));
ConstantScoreQuery scoreQuery = (ConstantScoreQuery) query;
assertThat(scoreQuery.getQuery(), instanceOf(RegexpQuery.class));
}
public void testRegexpQuery() throws Exception {
makeIndexStrMV();
Term term = new Term(F, "d[a-z].g");
defgMultiTermQueryTest(new RegexpQuery(term));
}
public void testSpanMultiTerms() {
SpanQuery q = new SpanMultiTermQueryWrapper<>(new RegexpQuery(new Term("field", "term.*")));
Set<Term> terms = collectTerms(q);
assertEquals(1, terms.size());
assertEquals(TermFilteredPresearcher.ANYTOKEN_FIELD, terms.iterator().next().field());
}
public void testRegexQueryParsing() throws Exception {
final String[] fields = {"b", "t"};
final StandardQueryParser parser = new StandardQueryParser();
parser.setMultiFields(fields);
parser.setDefaultOperator(StandardQueryConfigHandler.Operator.AND);
parser.setAnalyzer(new MockAnalyzer(random()));
BooleanQuery.Builder exp = new BooleanQuery.Builder();
exp.add(new BooleanClause(new RegexpQuery(new Term("b", "ab.+")), BooleanClause.Occur.SHOULD));//TODO spezification? was "MUST"
exp.add(new BooleanClause(new RegexpQuery(new Term("t", "ab.+")), BooleanClause.Occur.SHOULD));//TODO spezification? was "MUST"
assertEquals(exp.build(), parser.parse("/ab.+/", null));
RegexpQuery regexpQueryexp = new RegexpQuery(new Term("test", "[abc]?[0-9]"));
assertEquals(regexpQueryexp, parser.parse("test:/[abc]?[0-9]/", null));
}
private RegexpQuery rxq(String field, String text) {
return new RegexpQuery(new Term(field, text));
}
private static void assertEqualsRegexpQuery(RegexpQuery expected, RegexpQuery actual) {
assertEquals(expected.getField(), actual.getField());
assertEquals(getTerm(expected), getTerm(actual));
}
public SearchResult searchRegexp(String string, String fieldName,
boolean applyAllDeletes, IDocumentsVisitor visitor, String... fieldsToLoad) throws IOException {
Query query = new RegexpQuery(new Term(fieldName, string));
return search(query, applyAllDeletes, visitor, fieldsToLoad);
}
private static Query toLuceneRegexpQuery(String fieldName, String value) {
return new ConstantScoreQuery(
new RegexpQuery(new Term(fieldName, value), RegExp.ALL));
}