下面列出了怎么用org.apache.lucene.search.TopDocs的API类实例代码及写法,或者点击链接到github查看源代码。
public Set<ProvenanceEventRecord> read(final TopDocs topDocs, final EventAuthorizer authorizer, final IndexReader indexReader, final Collection<Path> allProvenanceLogFiles,
final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException {
if (retrievalCount.get() >= maxResults) {
return Collections.emptySet();
}
final long start = System.nanoTime();
final ScoreDoc[] scoreDocs = topDocs.scoreDocs;
final int numDocs = Math.min(scoreDocs.length, maxResults);
final List<Document> docs = new ArrayList<>(numDocs);
for (int i = numDocs - 1; i >= 0; i--) {
final int docId = scoreDocs[i].doc;
final Document d = indexReader.document(docId);
docs.add(d);
}
final long readDocuments = System.nanoTime() - start;
logger.debug("Reading {} Lucene Documents took {} millis", docs.size(), TimeUnit.NANOSECONDS.toMillis(readDocuments));
return read(docs, authorizer, allProvenanceLogFiles, retrievalCount, maxResults, maxAttributeChars);
}
private List<Document> runQuery(final File indexDirectory, final List<File> storageDirs, final String query) throws IOException, ParseException {
try (final DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(indexDirectory))) {
final IndexSearcher searcher = new IndexSearcher(directoryReader);
final Analyzer analyzer = new SimpleAnalyzer();
final org.apache.lucene.search.Query luceneQuery = new QueryParser("uuid", analyzer).parse(query);
final Query q = new Query("");
q.setMaxResults(1000);
final TopDocs topDocs = searcher.search(luceneQuery, 1000);
final List<Document> docs = new ArrayList<>();
for (final ScoreDoc scoreDoc : topDocs.scoreDocs) {
final int docId = scoreDoc.doc;
final Document d = directoryReader.document(docId);
docs.add(d);
}
return docs;
}
}
public void testWithSameTermQuery() throws IOException {
indexWriter.addDocument(newDoc("Yin yang, yin gap yang"));
initReaderSearcherHighlighter();
BooleanQuery query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("body", "yin")), BooleanClause.Occur.MUST)
.add(newPhraseQuery("body", "yin yang"), BooleanClause.Occur.MUST)
// add queries for other fields; we shouldn't highlight these because of that.
.add(new TermQuery(new Term("title", "yang")), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
dupMatchAllowed.set(false); // We don't want duplicates from "Yin" being in TermQuery & PhraseQuery.
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"<b>Yin yang</b>, <b>yin</b> gap yang"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>Yin</b> <b>yang</b>, <b>yin</b> gap yang"}, snippets);
}
}
private static void checkQuery(SpanQuery query, PayloadFunction function, boolean includeSpanScore, int[] expectedDocs, float[] expectedScores) throws IOException {
assertTrue("Expected docs and scores arrays must be the same length!", expectedDocs.length == expectedScores.length);
PayloadScoreQuery psq = new PayloadScoreQuery(query, function, PayloadDecoder.FLOAT_DECODER, includeSpanScore);
TopDocs hits = searcher.search(psq, expectedDocs.length);
for (int i = 0; i < hits.scoreDocs.length; i++) {
if (i > expectedDocs.length - 1)
fail("Unexpected hit in document " + hits.scoreDocs[i].doc);
if (hits.scoreDocs[i].doc != expectedDocs[i])
fail("Unexpected hit in document " + hits.scoreDocs[i].doc);
assertEquals("Bad score in document " + expectedDocs[i], expectedScores[i], hits.scoreDocs[i].score, 0.000001);
}
if (hits.scoreDocs.length > expectedDocs.length)
fail("Unexpected hit in document " + hits.scoreDocs[expectedDocs.length]);
QueryUtils.check(random(), psq, searcher);
}
public void testPhraseScoreIsEqualToBoost() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir,
newIndexWriterConfig().setSimilarity(new BooleanSimilarity()));
Document doc = new Document();
doc.add(new TextField("foo", "bar baz quux", Store.NO));
w.addDocument(doc);
DirectoryReader reader = w.getReader();
w.close();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(new BooleanSimilarity());
PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux");
TopDocs topDocs = searcher.search(query, 2);
assertEquals(1, topDocs.totalHits.value);
assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
topDocs = searcher.search(new BoostQuery(query, 7), 2);
assertEquals(1, topDocs.totalHits.value);
assertEquals(7f, topDocs.scoreDocs[0].score, 0f);
reader.close();
dir.close();
}
private void assertSearch(Query query, String field, String... values) throws IOException {
/* The limit of search queue is doubled to catch the error in case when for some reason there are more docs than expected */
SolrIndexSearcher searcher = req.getSearcher();
TopDocs result = searcher.search(query, values.length * 2);
assertEquals(values.length, result.totalHits.value);
List<String> actualValues = new ArrayList<String>();
for (int index = 0; index < values.length; ++index) {
Document doc = searcher.doc(result.scoreDocs[index].doc);
actualValues.add(doc.get(field));
}
for (String expectedValue: values) {
boolean removed = actualValues.remove(expectedValue);
if (!removed) {
fail("Search result does not contain expected values");
}
}
}
public void testBooleanQuery() throws Exception {
TermQuery tq1 = new TermQuery(new Term("text", "cow"));
TermQuery tq2 = new TermQuery(new Term("text", "brown"));
TermQuery tq3 = new TermQuery(new Term("text", "how"));
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(tq1, BooleanClause.Occur.SHOULD);
builder.add(tq2, BooleanClause.Occur.SHOULD);
builder.add(tq3, BooleanClause.Occur.SHOULD);
Query q = builder.build();
String statsType = "sum_raw_tf";
ExplorerQuery eq = new ExplorerQuery(q, statsType);
// Verify tf score
TopDocs docs = searcher.search(eq, 4);
assertThat(docs.scoreDocs[0].score, equalTo(3.0f));
}
private void searchIndex() throws IOException, InvalidTokenOffsetsException {
Query query = new TermQuery(new Term("t_text1", "random"));
IndexReader reader = DirectoryReader.open(dir1);
IndexSearcher searcher = newSearcher(reader);
// This scorer can return negative idf -> null fragment
Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
// This scorer doesn't use idf (patch version)
//Scorer scorer = new QueryTermScorer( query, "t_text1" );
Highlighter h = new Highlighter( scorer );
TopDocs hits = searcher.search(query, 10);
for( int i = 0; i < hits.totalHits.value; i++ ){
Document doc = searcher.doc( hits.scoreDocs[i].doc );
String result = h.getBestFragment( a, "t_text1", doc.get( "t_text1" ));
if (VERBOSE) System.out.println("result:" + result);
assertEquals("more <B>random</B> words for second field", result);
}
reader.close();
}
private void testSorting(int precisionStep) throws Exception {
String field="field"+precisionStep;
// 10 random tests, the index order is ascending,
// so using a reverse sort field should retun descending documents
int num = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < num; i++) {
long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset;
long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset;
if (lower>upper) {
long a=lower; lower=upper; upper=a;
}
Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true)));
if (topDocs.totalHits.value==0) continue;
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue();
for (int j=1; j<sd.length; j++) {
long act=searcher.doc(sd[j].doc).getField(field).numericValue().longValue();
assertTrue("Docs should be sorted backwards", last>act );
last=act;
}
}
}
private Map<String, List<String>> doTranslate(Set<String> terms) {
Map<String, List<String>> res = new HashMap<>();
try {
TopDocs topDocs = LuceneUtils.getTopDocs(searcher, terms, TERM_FIELD);
if (topDocs != null) {
for (ScoreDoc sd : topDocs.scoreDocs) {
Document doc = searcher.doc(sd.doc);
Map<String, Double> content = convert(doc.getBinaryValue(TRANSLATION_FIELD).bytes);
res.put(doc.get(TERM_FIELD), getRelevantTranslations(content));
}
}
} catch (IOException e) {
logger.error(e.getMessage());
//TODO throw new expection here.
e.printStackTrace();
}
return res;
}
public void testBasics() throws IOException {
indexWriter.addDocument(newDoc("Yin yang, filter")); // filter out. test getTermToSpanLists reader 1-doc filter
indexWriter.addDocument(newDoc("yin alone, Yin yang, yin gap yang"));
initReaderSearcherHighlighter();
//query: -filter +"yin yang"
BooleanQuery query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("body", "filter")), BooleanClause.Occur.MUST_NOT)
.add(newPhraseQuery("body", "yin yang"), BooleanClause.Occur.MUST)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertArrayEquals(new String[]{"yin alone, <b>Yin yang</b>, yin gap yang"}, snippets);
} else {
assertArrayEquals(new String[]{"yin alone, <b>Yin</b> <b>yang</b>, yin gap yang"}, snippets);
}
}
/** User drills down on the specified range. */
public TopDocs drillDown(DoubleRange range) throws IOException {
// Passing no baseQuery means we drill down on all
// documents ("browse only"):
DrillDownQuery q = new DrillDownQuery(null);
final DoubleValuesSource vs = getDistanceValueSource();
q.add("field", range.getQuery(getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
@Override
protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
assert drillSideways.length == 1;
return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
}
};
return ds.search(q, 10).hits;
}
public TopDocs executeQuery(org.apache.lucene.search.Query query) throws IOException, ParseException {
Directory indexDir = FSDirectory.open(Paths.get(INDEX_DIRECTORY));
try {
IndexReader reader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(reader);
if (isBm25 == false) {
ClassicSimilarity CS = new ClassicSimilarity();
searcher.setSimilarity(CS);
}
TopDocs docs = searcher.search(query, hitsPerPage);
return docs;
} catch (Exception e) {
logger.error(e.getMessage());
return null;
}
}
private TopDocs knnSearch(String text) throws IOException {
BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
for (String fieldName : textFieldNames) {
String boost = null;
mlt.setBoost(true); //terms boost actually helps in MLT queries
if (fieldName.contains("^")) {
String[] field2boost = fieldName.split("\\^");
fieldName = field2boost[0];
boost = field2boost[1];
}
if (boost != null) {
mlt.setBoostFactor(Float.parseFloat(boost));//if we have a field boost, we add it
}
mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(text)), BooleanClause.Occur.SHOULD));
mlt.setBoostFactor(1);// restore neutral boost for next field
}
Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
if (query != null) {
mltQuery.add(query, BooleanClause.Occur.MUST);
}
return indexSearcher.search(mltQuery.build(), k);
}
private void doTestExactScore (ValueSource valueSource) throws Exception {
Query functionQuery = getFunctionQuery(valueSource);
IndexReader r = DirectoryReader.open(dir);
IndexSearcher s = newSearcher(r);
TopDocs td = s.search(functionQuery,1000);
assertEquals("All docs should be matched!",N_DOCS,td.totalHits.value);
ScoreDoc sd[] = td.scoreDocs;
for (ScoreDoc aSd : sd) {
float score = aSd.score;
log(s.explain(functionQuery, aSd.doc));
String id = s.getIndexReader().document(aSd.doc).get(ID_FIELD);
float expectedScore = expectedFieldScore(id); // "ID7" --> 7.0
assertEquals("score of " + id + " shuould be " + expectedScore + " != " + score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
}
r.close();
}
private void testLeftOpenRange(int precisionStep) throws Exception {
String field="field"+precisionStep;
int count=3000;
int upper=(count-1)*distance + (distance/3) + startOffset;
LegacyNumericRangeQuery<Integer> q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, null, upper, true, true);
TopDocs topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
assertEquals("Score doc count", count, sd.length );
Document doc=searcher.doc(sd[0].doc);
assertEquals("First doc", startOffset, doc.getField(field).numericValue().intValue());
doc=searcher.doc(sd[sd.length-1].doc);
assertEquals("Last doc", (count-1)*distance+startOffset, doc.getField(field).numericValue().intValue());
q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, null, upper, false, true);
topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
sd = topDocs.scoreDocs;
assertNotNull(sd);
assertEquals("Score doc count", count, sd.length );
doc=searcher.doc(sd[0].doc);
assertEquals("First doc", startOffset, doc.getField(field).numericValue().intValue());
doc=searcher.doc(sd[sd.length-1].doc);
assertEquals("Last doc", (count-1)*distance+startOffset, doc.getField(field).numericValue().intValue());
}
public void testRamDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
RAMDirectory ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("RamDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RamDirectory search consumes {}ms!", (end - start));
}
public void testMatchesSlopBug() throws IOException {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
Query query = new IntervalQuery("title", Intervals.maxgaps(random().nextBoolean() ? 1 : 2,
Intervals.ordered(
Intervals.term("this"), Intervals.term("is"), Intervals.term("the"), Intervals.term("field"))));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
String[] snippets = highlighter.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
assertEquals("" + highlighter.getFlags("title"),
"<b>This is the title field</b>.", snippets[0]);
} else {
assertEquals("" + highlighter.getFlags("title"),
"<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.", snippets[0]);
}
ir.close();
}
private LTRScoringQuery.ModelWeight performQuery(TopDocs hits,
IndexSearcher searcher, int docid, LTRScoringQuery model) throws IOException,
ModelException {
final List<LeafReaderContext> leafContexts = searcher.getTopReaderContext()
.leaves();
final int n = ReaderUtil.subIndex(hits.scoreDocs[0].doc, leafContexts);
final LeafReaderContext context = leafContexts.get(n);
final int deBasedDoc = hits.scoreDocs[0].doc - context.docBase;
final Weight weight = searcher.createWeight(searcher.rewrite(model), ScoreMode.COMPLETE, 1);
final Scorer scorer = weight.scorer(context);
// rerank using the field final-score
scorer.iterator().advance(deBasedDoc);
scorer.score();
assertTrue(weight instanceof LTRScoringQuery.ModelWeight);
final LTRScoringQuery.ModelWeight modelWeight = (LTRScoringQuery.ModelWeight) weight;
return modelWeight;
}
public void testMaxLen() throws IOException {
indexWriter.addDocument(newDoc("alpha bravo charlie - gap alpha bravo")); // hyphen is at char 21
initReaderSearcherHighlighter();
highlighter.setMaxLength(21);
BooleanQuery query = new BooleanQuery.Builder()
.add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.SHOULD)
.add(newPhraseQuery("body", "gap alpha"), BooleanClause.Occur.SHOULD)
.add(newPhraseQuery("body", "charlie gap"), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
String[] snippets = highlighter.highlight("body", query, topDocs);
final boolean weightMatches = highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES);
if (fieldType == UHTestHelper.reanalysisType || weightMatches) {
if (weightMatches) {
assertArrayEquals(new String[]{"<b>alpha bravo</b> charlie -"}, snippets);
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie -"}, snippets);
}
} else {
assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> -"}, snippets);
}
}
public static void doSearch(String indexDir , String queryStr) throws IOException, ParseException, InvalidTokenOffsetsException {
Directory directory = FSDirectory.open(Paths.get(indexDir));
DirectoryReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new SmartChineseAnalyzer();
QueryParser parser = new QueryParser("tcontent",analyzer);
Query query = parser.parse(queryStr);
long startTime = System.currentTimeMillis();
TopDocs docs = searcher.search(query,10);
System.out.println("查找"+queryStr+"所用时间:"+(System.currentTimeMillis()-startTime));
System.out.println("查询到"+docs.totalHits+"条记录");
//加入高亮显示的
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color=red>","</font></b>");
QueryScorer scorer = new QueryScorer(query);//计算查询结果最高的得分
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);//根据得分算出一个片段
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,scorer);
highlighter.setTextFragmenter(fragmenter);//设置显示高亮的片段
//遍历查询结果
for(ScoreDoc scoreDoc : docs.scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("title"));
System.out.println(doc.get("tcontent"));
String tcontent = doc.get("tcontent");
if(tcontent != null){
TokenStream tokenStream = analyzer.tokenStream("tcontent", new StringReader(tcontent));
String summary = highlighter.getBestFragment(tokenStream, tcontent);
System.out.println(summary);
}
}
reader.close();
}
public void testMultipleTerms() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.addDocument(doc);
body.setStringValue("Highlighting the first term. Hope it works.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
Query query = new IntervalQuery("body", Intervals.or(
Intervals.term("highlighting"),
Intervals.term("just"),
Intervals.term("first")));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
String snippets[] = highlighter.highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
ir.close();
}
@Override
public void run() {
if (submission.isCanceled()) {
return;
}
try {
final DocumentToEventConverter converter = new DocumentToEventConverter() {
@Override
public Set<ProvenanceEventRecord> convert(TopDocs topDocs, IndexReader indexReader) throws IOException {
// Always authorized. We do this because we need to pull back the event, regardless of whether or not
// the user is truly authorized, because instead of ignoring unauthorized events, we want to replace them.
final EventAuthorizer authorizer = EventAuthorizer.GRANT_ALL;
final DocsReader docsReader = new DocsReader();
return docsReader.read(topDocs, authorizer, indexReader, getAllLogFiles(), new AtomicInteger(0), Integer.MAX_VALUE, maxAttributeChars);
}
};
final Set<ProvenanceEventRecord> matchingRecords = LineageQuery.computeLineageForFlowFiles(getIndexManager(), indexDir, null, flowFileUuids, converter);
final StandardLineageResult result = submission.getResult();
result.update(replaceUnauthorizedWithPlaceholders(matchingRecords, user), matchingRecords.size());
logger.info("Successfully created Lineage for FlowFiles with UUIDs {} in {} milliseconds; Lineage contains {} nodes and {} edges",
flowFileUuids, result.getComputationTime(TimeUnit.MILLISECONDS), result.getNodes().size(), result.getEdges().size());
} catch (final Throwable t) {
logger.error("Failed to query provenance repository due to {}", t.toString());
if (logger.isDebugEnabled()) {
logger.error("", t);
}
if (t.getMessage() == null) {
submission.getResult().setError(t.toString());
} else {
submission.getResult().setError(t.getMessage());
}
}
}
static void doUpdate(Term doc, IndexWriter writer, Field... fields) throws IOException {
long seqId = -1;
do { // retry if we just committing a merge
try (DirectoryReader reader = writer.getReader()) {
TopDocs topDocs = new IndexSearcher(new IncludeSoftDeletesWrapper(reader)).search(new TermQuery(doc), 10);
assertEquals(1, topDocs.totalHits.value);
int theDoc = topDocs.scoreDocs[0].doc;
seqId = writer.tryUpdateDocValue(reader, theDoc, fields);
}
} while (seqId == -1);
}
@Test
public void testMatchAllDocsQuery() throws Exception {
// 全部匹配查询
Query query = new MatchAllDocsQuery();
TopDocs search = searcher.search(query, 1000000);
Assert.assertEquals(1681, search.totalHits.value);
}
public void testFeatureMissing() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig().setMergePolicy(newLogMergePolicy(random().nextBoolean()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new FeatureField("field", "name", 1.3F));
doc.add(newStringField("value", "1.3", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new FeatureField("field", "name", 4.2F));
doc.add(newStringField("value", "4.2", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(FeatureField.newFeatureSort("field", "name"));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits.value);
// null is treated as 0
assertEquals("4.2", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("1.3", searcher.doc(td.scoreDocs[1].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
public void testEncode() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
}
};
Query query = new IntervalQuery("body", Intervals.term("highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
String snippets[] = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
ir.close();
}
/**
* Returns the top k results from a More Like This query based on the input document
*
* @param document the document to use for More Like This search
* @return the top results for the MLT query
* @throws IOException If there is a low-level I/O error
*/
private TopDocs knnSearch(Document document) throws IOException {
BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
for (String fieldName : textFieldNames) {
String boost = null;
if (fieldName.contains("^")) {
String[] field2boost = fieldName.split("\\^");
fieldName = field2boost[0];
boost = field2boost[1];
}
String[] fieldValues = document.getValues(fieldName);
mlt.setBoost(true); // we want always to use the boost coming from TF * IDF of the term
if (boost != null) {
mlt.setBoostFactor(Float.parseFloat(boost)); // this is an additional multiplicative boost coming from the field boost
}
mlt.setAnalyzer(field2analyzer.get(fieldName));
for (String fieldContent : fieldValues) {
mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)), BooleanClause.Occur.SHOULD));
}
mlt.setBoostFactor(1);// restore neutral boost for next field
}
Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
if (query != null) {
mltQuery.add(query, BooleanClause.Occur.MUST);
}
return indexSearcher.search(mltQuery.build(), k);
}
@Test
public void testMultiPhraseQuery() throws Exception {
// 多短语查询
Term[] terms = new Term[] { new Term("title", "NeverEnding"), new Term("title", "Xinghua,") };
Term term = new Term("title", "The");
// add之间认为是OR操作,即"NeverEnding", "Xinghua,"和"The"之间的slop不大于3
MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery.Builder().add(terms).add(term).setSlop(3).build();
TopDocs search = searcher.search(multiPhraseQuery, 1000);
Assert.assertEquals(2, search.totalHits.value);
}
@Test
public void testPointExactQuery() throws Exception {
// 精确查询
Query exactQuery = IntPoint.newExactQuery("id", 1);
TopDocs search = searcher.search(exactQuery, 1000);
Assert.assertEquals(1, search.totalHits.value);
}