类org.apache.lucene.search.TopDocs源码实例Demo

下面列出了怎么用org.apache.lucene.search.TopDocs的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: localization_nifi   文件: DocsReader.java
public Set<ProvenanceEventRecord> read(final TopDocs topDocs, final EventAuthorizer authorizer, final IndexReader indexReader, final Collection<Path> allProvenanceLogFiles,
        final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException {
    if (retrievalCount.get() >= maxResults) {
        return Collections.emptySet();
    }

    final long start = System.nanoTime();
    final ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    final int numDocs = Math.min(scoreDocs.length, maxResults);
    final List<Document> docs = new ArrayList<>(numDocs);

    for (int i = numDocs - 1; i >= 0; i--) {
        final int docId = scoreDocs[i].doc;
        final Document d = indexReader.document(docId);
        docs.add(d);
    }

    final long readDocuments = System.nanoTime() - start;
    logger.debug("Reading {} Lucene Documents took {} millis", docs.size(), TimeUnit.NANOSECONDS.toMillis(readDocuments));
    return read(docs, authorizer, allProvenanceLogFiles, retrievalCount, maxResults, maxAttributeChars);
}
 
private List<Document> runQuery(final File indexDirectory, final List<File> storageDirs, final String query) throws IOException, ParseException {
    try (final DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(indexDirectory))) {
        final IndexSearcher searcher = new IndexSearcher(directoryReader);

        final Analyzer analyzer = new SimpleAnalyzer();
        final org.apache.lucene.search.Query luceneQuery = new QueryParser("uuid", analyzer).parse(query);

        final Query q = new Query("");
        q.setMaxResults(1000);
        final TopDocs topDocs = searcher.search(luceneQuery, 1000);

        final List<Document> docs = new ArrayList<>();
        for (final ScoreDoc scoreDoc : topDocs.scoreDocs) {
            final int docId = scoreDoc.doc;
            final Document d = directoryReader.document(docId);
            docs.add(d);
        }

        return docs;
    }
}
 
public void testWithSameTermQuery() throws IOException {
  indexWriter.addDocument(newDoc("Yin yang, yin gap yang"));
  initReaderSearcherHighlighter();

  BooleanQuery query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "yin")), BooleanClause.Occur.MUST)
      .add(newPhraseQuery("body", "yin yang"), BooleanClause.Occur.MUST)
      // add queries for other fields; we shouldn't highlight these because of that.
      .add(new TermQuery(new Term("title", "yang")), BooleanClause.Occur.SHOULD)
      .build();

  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  dupMatchAllowed.set(false); // We don't want duplicates from "Yin" being in TermQuery & PhraseQuery.
  String[] snippets = highlighter.highlight("body", query, topDocs);
  if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertArrayEquals(new String[]{"<b>Yin yang</b>, <b>yin</b> gap yang"}, snippets);
  } else {
    assertArrayEquals(new String[]{"<b>Yin</b> <b>yang</b>, <b>yin</b> gap yang"}, snippets);
  }
}
 
源代码4 项目: lucene-solr   文件: TestPayloadScoreQuery.java
private static void checkQuery(SpanQuery query, PayloadFunction function, boolean includeSpanScore, int[] expectedDocs, float[] expectedScores) throws IOException {

    assertTrue("Expected docs and scores arrays must be the same length!", expectedDocs.length == expectedScores.length);

    PayloadScoreQuery psq = new PayloadScoreQuery(query, function, PayloadDecoder.FLOAT_DECODER, includeSpanScore);
    TopDocs hits = searcher.search(psq, expectedDocs.length);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
      if (i > expectedDocs.length - 1)
        fail("Unexpected hit in document " + hits.scoreDocs[i].doc);
      if (hits.scoreDocs[i].doc != expectedDocs[i])
        fail("Unexpected hit in document " + hits.scoreDocs[i].doc);
      assertEquals("Bad score in document " + expectedDocs[i], expectedScores[i], hits.scoreDocs[i].score, 0.000001);
    }

    if (hits.scoreDocs.length > expectedDocs.length)
      fail("Unexpected hit in document " + hits.scoreDocs[expectedDocs.length]);

    QueryUtils.check(random(), psq, searcher);
  }
 
源代码5 项目: lucene-solr   文件: TestBooleanSimilarity.java
public void testPhraseScoreIsEqualToBoost() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir,
      newIndexWriterConfig().setSimilarity(new BooleanSimilarity()));
  Document doc = new Document();
  doc.add(new TextField("foo", "bar baz quux", Store.NO));
  w.addDocument(doc);

  DirectoryReader reader = w.getReader();
  w.close();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new BooleanSimilarity());

  PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux");

  TopDocs topDocs = searcher.search(query, 2);
  assertEquals(1, topDocs.totalHits.value);
  assertEquals(1f, topDocs.scoreDocs[0].score, 0f);

  topDocs = searcher.search(new BoostQuery(query, 7), 2);
  assertEquals(1, topDocs.totalHits.value);
  assertEquals(7f, topDocs.scoreDocs[0].score, 0f);

  reader.close();
  dir.close();
}
 
源代码6 项目: lucene-solr   文件: TestHierarchicalDocBuilder.java
private void assertSearch(Query query, String field, String... values) throws IOException {
  /* The limit of search queue is doubled to catch the error in case when for some reason there are more docs than expected  */
  SolrIndexSearcher searcher = req.getSearcher();
  TopDocs result = searcher.search(query, values.length * 2);
  assertEquals(values.length, result.totalHits.value);
  List<String> actualValues = new ArrayList<String>();
  for (int index = 0; index < values.length; ++index) {
    Document doc = searcher.doc(result.scoreDocs[index].doc);
    actualValues.add(doc.get(field));
  }
  
  for (String expectedValue: values) {
    boolean removed = actualValues.remove(expectedValue);
    if (!removed) {
      fail("Search result does not contain expected values");
    }
  }
}
 
public void testBooleanQuery() throws Exception {
    TermQuery tq1 = new TermQuery(new Term("text", "cow"));
    TermQuery tq2 = new TermQuery(new Term("text", "brown"));
    TermQuery tq3 = new TermQuery(new Term("text", "how"));

    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(tq1, BooleanClause.Occur.SHOULD);
    builder.add(tq2, BooleanClause.Occur.SHOULD);
    builder.add(tq3, BooleanClause.Occur.SHOULD);

    Query q = builder.build();
    String statsType = "sum_raw_tf";

    ExplorerQuery eq = new ExplorerQuery(q, statsType);

    // Verify tf score
    TopDocs docs = searcher.search(eq, 4);
    assertThat(docs.scoreDocs[0].score, equalTo(3.0f));
}
 
源代码8 项目: lucene-solr   文件: HighlighterTest.java
private void searchIndex() throws IOException, InvalidTokenOffsetsException {
  Query query = new TermQuery(new Term("t_text1", "random"));
  IndexReader reader = DirectoryReader.open(dir1);
  IndexSearcher searcher = newSearcher(reader);
  // This scorer can return negative idf -> null fragment
  Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
  // This scorer doesn't use idf (patch version)
  //Scorer scorer = new QueryTermScorer( query, "t_text1" );
  Highlighter h = new Highlighter( scorer );

  TopDocs hits = searcher.search(query, 10);
  for( int i = 0; i < hits.totalHits.value; i++ ){
    Document doc = searcher.doc( hits.scoreDocs[i].doc );
    String result = h.getBestFragment( a, "t_text1", doc.get( "t_text1" ));
    if (VERBOSE) System.out.println("result:" +  result);
    assertEquals("more <B>random</B> words for second field", result);
  }
  reader.close();
}
 
源代码9 项目: lucene-solr   文件: TestNumericTerms64.java
private void testSorting(int precisionStep) throws Exception {
  String field="field"+precisionStep;
  // 10 random tests, the index order is ascending,
  // so using a reverse sort field should retun descending documents
  int num = TestUtil.nextInt(random(), 10, 20);
  for (int i = 0; i < num; i++) {
    long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset;
    long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset;
    if (lower>upper) {
      long a=lower; lower=upper; upper=a;
    }
    Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
    TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true)));
    if (topDocs.totalHits.value==0) continue;
    ScoreDoc[] sd = topDocs.scoreDocs;
    assertNotNull(sd);
    long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue();
    for (int j=1; j<sd.length; j++) {
      long act=searcher.doc(sd[j].doc).getField(field).numericValue().longValue();
      assertTrue("Docs should be sorted backwards", last>act );
      last=act;
    }
  }
}
 
源代码10 项目: Indra   文件: LuceneTranslator.java
private Map<String, List<String>> doTranslate(Set<String> terms) {

        Map<String, List<String>> res = new HashMap<>();

        try {
            TopDocs topDocs = LuceneUtils.getTopDocs(searcher, terms, TERM_FIELD);


            if (topDocs != null) {
                for (ScoreDoc sd : topDocs.scoreDocs) {
                    Document doc = searcher.doc(sd.doc);
                    Map<String, Double> content = convert(doc.getBinaryValue(TRANSLATION_FIELD).bytes);
                    res.put(doc.get(TERM_FIELD), getRelevantTranslations(content));
                }
            }

        } catch (IOException e) {
            logger.error(e.getMessage());
            //TODO throw new expection here.
            e.printStackTrace();
        }

        return res;
    }
 
public void testBasics() throws IOException {
  indexWriter.addDocument(newDoc("Yin yang, filter")); // filter out. test getTermToSpanLists reader 1-doc filter
  indexWriter.addDocument(newDoc("yin alone, Yin yang, yin gap yang"));
  initReaderSearcherHighlighter();

  //query:  -filter +"yin yang"
  BooleanQuery query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "filter")), BooleanClause.Occur.MUST_NOT)
      .add(newPhraseQuery("body", "yin yang"), BooleanClause.Occur.MUST)
      .build();


  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  String[] snippets = highlighter.highlight("body", query, topDocs);
  if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertArrayEquals(new String[]{"yin alone, <b>Yin yang</b>, yin gap yang"}, snippets);
  } else {
    assertArrayEquals(new String[]{"yin alone, <b>Yin</b> <b>yang</b>, yin gap yang"}, snippets);
  }
}
 
源代码12 项目: lucene-solr   文件: DistanceFacetsExample.java
/** User drills down on the specified range. */
public TopDocs drillDown(DoubleRange range) throws IOException {

  // Passing no baseQuery means we drill down on all
  // documents ("browse only"):
  DrillDownQuery q = new DrillDownQuery(null);
  final DoubleValuesSource vs = getDistanceValueSource();
  q.add("field", range.getQuery(getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
  DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
      @Override
      protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {        
        assert drillSideways.length == 1;
        return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
      }
    };
  return ds.search(q, 10).hits;
}
 
源代码13 项目: scava   文件: SORecommender.java
public TopDocs executeQuery(org.apache.lucene.search.Query query) throws IOException, ParseException {
	Directory indexDir = FSDirectory.open(Paths.get(INDEX_DIRECTORY));
	try {
		IndexReader reader = DirectoryReader.open(indexDir);
		IndexSearcher searcher = new IndexSearcher(reader);
		if (isBm25 == false) {
			ClassicSimilarity CS = new ClassicSimilarity();
			searcher.setSimilarity(CS);
		}
		TopDocs docs = searcher.search(query, hitsPerPage);
		return docs;
	} catch (Exception e) {
		logger.error(e.getMessage());
		return null;
	}
}
 
源代码14 项目: lucene-solr   文件: KNearestNeighborClassifier.java
private TopDocs knnSearch(String text) throws IOException {
  BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
  for (String fieldName : textFieldNames) {
    String boost = null;
    mlt.setBoost(true); //terms boost actually helps in MLT queries
    if (fieldName.contains("^")) {
      String[] field2boost = fieldName.split("\\^");
      fieldName = field2boost[0];
      boost = field2boost[1];
    }
    if (boost != null) {
      mlt.setBoostFactor(Float.parseFloat(boost));//if we have a field boost, we add it
    }
    mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(text)), BooleanClause.Occur.SHOULD));
    mlt.setBoostFactor(1);// restore neutral boost for next field
  }
  Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
  mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
  if (query != null) {
    mltQuery.add(query, BooleanClause.Occur.MUST);
  }
  return indexSearcher.search(mltQuery.build(), k);
}
 
源代码15 项目: lucene-solr   文件: TestFieldScoreQuery.java
private void doTestExactScore (ValueSource valueSource) throws Exception {
  Query functionQuery = getFunctionQuery(valueSource);
  IndexReader r = DirectoryReader.open(dir);
  IndexSearcher s = newSearcher(r);
  TopDocs td = s.search(functionQuery,1000);
  assertEquals("All docs should be matched!",N_DOCS,td.totalHits.value);
  ScoreDoc sd[] = td.scoreDocs;
  for (ScoreDoc aSd : sd) {
    float score = aSd.score;
    log(s.explain(functionQuery, aSd.doc));
    String id = s.getIndexReader().document(aSd.doc).get(ID_FIELD);
    float expectedScore = expectedFieldScore(id); // "ID7" --> 7.0
    assertEquals("score of " + id + " shuould be " + expectedScore + " != " + score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
  }
  r.close();
}
 
源代码16 项目: lucene-solr   文件: TestNumericRangeQuery32.java
private void testLeftOpenRange(int precisionStep) throws Exception {
  String field="field"+precisionStep;
  int count=3000;
  int upper=(count-1)*distance + (distance/3) + startOffset;
  LegacyNumericRangeQuery<Integer> q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, null, upper, true, true);
  TopDocs topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
  ScoreDoc[] sd = topDocs.scoreDocs;
  assertNotNull(sd);
  assertEquals("Score doc count", count, sd.length );
  Document doc=searcher.doc(sd[0].doc);
  assertEquals("First doc", startOffset, doc.getField(field).numericValue().intValue());
  doc=searcher.doc(sd[sd.length-1].doc);
  assertEquals("Last doc", (count-1)*distance+startOffset, doc.getField(field).numericValue().intValue());
  
  q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, null, upper, false, true);
  topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
  sd = topDocs.scoreDocs;
  assertNotNull(sd);
  assertEquals("Score doc count", count, sd.length );
  doc=searcher.doc(sd[0].doc);
  assertEquals("First doc", startOffset, doc.getField(field).numericValue().intValue());
  doc=searcher.doc(sd[sd.length-1].doc);
  assertEquals("Last doc", (count-1)*distance+startOffset, doc.getField(field).numericValue().intValue());
}
 
源代码17 项目: RedisDirectory   文件: TestLucene.java
public void testRamDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("RamDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RamDirectory search consumes {}ms!", (end - start));
}
 
public void testMatchesSlopBug() throws IOException {
  IndexReader ir = indexSomeFields();
  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new IntervalQuery("title", Intervals.maxgaps(random().nextBoolean() ? 1 : 2,
      Intervals.ordered(
          Intervals.term("this"), Intervals.term("is"), Intervals.term("the"), Intervals.term("field"))));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String[] snippets = highlighter.highlight("title", query, topDocs, 10);
  assertEquals(1, snippets.length);
  if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertEquals("" + highlighter.getFlags("title"),
        "<b>This is the title field</b>.", snippets[0]);
  } else {
    assertEquals("" + highlighter.getFlags("title"),
        "<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.", snippets[0]);
  }
  ir.close();
}
 
源代码19 项目: lucene-solr   文件: TestSelectiveWeightCreation.java
private LTRScoringQuery.ModelWeight performQuery(TopDocs hits,
    IndexSearcher searcher, int docid, LTRScoringQuery model) throws IOException,
    ModelException {
  final List<LeafReaderContext> leafContexts = searcher.getTopReaderContext()
      .leaves();
  final int n = ReaderUtil.subIndex(hits.scoreDocs[0].doc, leafContexts);
  final LeafReaderContext context = leafContexts.get(n);
  final int deBasedDoc = hits.scoreDocs[0].doc - context.docBase;

  final Weight weight = searcher.createWeight(searcher.rewrite(model), ScoreMode.COMPLETE, 1);
  final Scorer scorer = weight.scorer(context);

  // rerank using the field final-score
  scorer.iterator().advance(deBasedDoc);
  scorer.score();
  assertTrue(weight instanceof LTRScoringQuery.ModelWeight);
  final LTRScoringQuery.ModelWeight modelWeight = (LTRScoringQuery.ModelWeight) weight;
  return modelWeight;

}
 
public void testMaxLen() throws IOException {
  indexWriter.addDocument(newDoc("alpha bravo charlie - gap alpha bravo")); // hyphen is at char 21
  initReaderSearcherHighlighter();
  highlighter.setMaxLength(21);

  BooleanQuery query = new BooleanQuery.Builder()
      .add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.SHOULD)
      .add(newPhraseQuery("body", "gap alpha"), BooleanClause.Occur.SHOULD)
      .add(newPhraseQuery("body", "charlie gap"), BooleanClause.Occur.SHOULD)
      .build();

  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  String[] snippets = highlighter.highlight("body", query, topDocs);

  final boolean weightMatches = highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES);
  if (fieldType == UHTestHelper.reanalysisType || weightMatches) {
    if (weightMatches) {
      assertArrayEquals(new String[]{"<b>alpha bravo</b> charlie -"}, snippets);
    } else {
      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie -"}, snippets);
    }
  } else {
    assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> -"}, snippets);
  }
}
 
源代码21 项目: taoshop   文件: SearchBuilder.java
public static void doSearch(String indexDir , String queryStr) throws IOException, ParseException, InvalidTokenOffsetsException {
    Directory directory = FSDirectory.open(Paths.get(indexDir));
    DirectoryReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new SmartChineseAnalyzer();
    QueryParser parser = new QueryParser("tcontent",analyzer);
    Query query = parser.parse(queryStr);

    long startTime = System.currentTimeMillis();
    TopDocs docs = searcher.search(query,10);

    System.out.println("查找"+queryStr+"所用时间:"+(System.currentTimeMillis()-startTime));
    System.out.println("查询到"+docs.totalHits+"条记录");

    //加入高亮显示的
    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color=red>","</font></b>");
    QueryScorer scorer = new QueryScorer(query);//计算查询结果最高的得分
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);//根据得分算出一个片段
    Highlighter highlighter = new Highlighter(simpleHTMLFormatter,scorer);
    highlighter.setTextFragmenter(fragmenter);//设置显示高亮的片段

    //遍历查询结果
    for(ScoreDoc scoreDoc : docs.scoreDocs){
        Document doc = searcher.doc(scoreDoc.doc);
        System.out.println(doc.get("title"));
        System.out.println(doc.get("tcontent"));
        String tcontent = doc.get("tcontent");
        if(tcontent != null){
            TokenStream tokenStream =  analyzer.tokenStream("tcontent", new StringReader(tcontent));
            String summary = highlighter.getBestFragment(tokenStream, tcontent);
            System.out.println(summary);
        }
    }
    reader.close();
}
 
public void testMultipleTerms() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
  iw.addDocument(doc);
  body.setStringValue("Highlighting the first term. Hope it works.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new IntervalQuery("body", Intervals.or(
      Intervals.term("highlighting"),
      Intervals.term("just"),
      Intervals.term("first")));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
  assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
  ir.close();
}
 
@Override
public void run() {
    if (submission.isCanceled()) {
        return;
    }

    try {
        final DocumentToEventConverter converter = new DocumentToEventConverter() {
            @Override
            public Set<ProvenanceEventRecord> convert(TopDocs topDocs, IndexReader indexReader) throws IOException {
                // Always authorized. We do this because we need to pull back the event, regardless of whether or not
                // the user is truly authorized, because instead of ignoring unauthorized events, we want to replace them.
                final EventAuthorizer authorizer = EventAuthorizer.GRANT_ALL;
                final DocsReader docsReader = new DocsReader();
                return docsReader.read(topDocs, authorizer, indexReader, getAllLogFiles(), new AtomicInteger(0), Integer.MAX_VALUE, maxAttributeChars);
            }
        };

        final Set<ProvenanceEventRecord> matchingRecords = LineageQuery.computeLineageForFlowFiles(getIndexManager(), indexDir, null, flowFileUuids, converter);

        final StandardLineageResult result = submission.getResult();
        result.update(replaceUnauthorizedWithPlaceholders(matchingRecords, user), matchingRecords.size());

        logger.info("Successfully created Lineage for FlowFiles with UUIDs {} in {} milliseconds; Lineage contains {} nodes and {} edges",
                flowFileUuids, result.getComputationTime(TimeUnit.MILLISECONDS), result.getNodes().size(), result.getEdges().size());
    } catch (final Throwable t) {
        logger.error("Failed to query provenance repository due to {}", t.toString());
        if (logger.isDebugEnabled()) {
            logger.error("", t);
        }

        if (t.getMessage() == null) {
            submission.getResult().setError(t.toString());
        } else {
            submission.getResult().setError(t.getMessage());
        }
    }
}
 
static void doUpdate(Term doc, IndexWriter writer, Field... fields) throws IOException {
  long seqId = -1;
  do { // retry if we just committing a merge
    try (DirectoryReader reader = writer.getReader()) {
      TopDocs topDocs = new IndexSearcher(new IncludeSoftDeletesWrapper(reader)).search(new TermQuery(doc), 10);
      assertEquals(1, topDocs.totalHits.value);
      int theDoc = topDocs.scoreDocs[0].doc;
      seqId = writer.tryUpdateDocValue(reader, theDoc, fields);
    }
  } while (seqId == -1);
}
 
源代码25 项目: jstarcraft-core   文件: LuceneQueryTestCase.java
@Test
public void testMatchAllDocsQuery() throws Exception {
    // 全部匹配查询
    Query query = new MatchAllDocsQuery();
    TopDocs search = searcher.search(query, 1000000);
    Assert.assertEquals(1681, search.totalHits.value);
}
 
源代码26 项目: lucene-solr   文件: TestFeatureSort.java
public void testFeatureMissing() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig().setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
  Document doc = new Document();
  writer.addDocument(doc);
  doc = new Document();
  doc.add(new FeatureField("field", "name", 1.3F));
  doc.add(newStringField("value", "1.3", Field.Store.YES));
  writer.addDocument(doc);
  doc = new Document();
  doc.add(new FeatureField("field", "name", 4.2F));
  doc.add(newStringField("value", "4.2", Field.Store.YES));
  writer.addDocument(doc);
  IndexReader ir = writer.getReader();
  writer.close();

  IndexSearcher searcher = newSearcher(ir);
  Sort sort = new Sort(FeatureField.newFeatureSort("field", "name"));

  TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
  assertEquals(3, td.totalHits.value);
  // null is treated as 0
  assertEquals("4.2", searcher.doc(td.scoreDocs[0].doc).get("value"));
  assertEquals("1.3", searcher.doc(td.scoreDocs[1].doc).get("value"));
  assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));

  ir.close();
  dir.close();
}
 
public void testEncode() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
    @Override
    protected PassageFormatter getFormatter(String field) {
      return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
    }
  };
  
  Query query = new IntervalQuery("body", Intervals.term("highlighting"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(1, snippets.length);
  assertEquals("Just a test <b>highlighting</b> from &lt;i&gt;postings&lt;&#x2F;i&gt;. ", snippets[0]);
  ir.close();
}
 
/**
 * Returns the top k results from a More Like This query based on the input document
 *
 * @param document the document to use for More Like This search
 * @return the top results for the MLT query
 * @throws IOException If there is a low-level I/O error
 */
private TopDocs knnSearch(Document document) throws IOException {
  BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();

  for (String fieldName : textFieldNames) {
    String boost = null;
    if (fieldName.contains("^")) {
      String[] field2boost = fieldName.split("\\^");
      fieldName = field2boost[0];
      boost = field2boost[1];
    }
    String[] fieldValues = document.getValues(fieldName);
    mlt.setBoost(true); // we want always to use the boost coming from TF * IDF of the term
    if (boost != null) {
      mlt.setBoostFactor(Float.parseFloat(boost)); // this is an additional multiplicative boost coming from the field boost
    }
    mlt.setAnalyzer(field2analyzer.get(fieldName));
    for (String fieldContent : fieldValues) {
      mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)), BooleanClause.Occur.SHOULD));
    }
    mlt.setBoostFactor(1);// restore neutral boost for next field
  }
  Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
  mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
  if (query != null) {
    mltQuery.add(query, BooleanClause.Occur.MUST);
  }
  return indexSearcher.search(mltQuery.build(), k);
}
 
源代码29 项目: jstarcraft-core   文件: LuceneQueryTestCase.java
@Test
public void testMultiPhraseQuery() throws Exception {
    // 多短语查询
    Term[] terms = new Term[] { new Term("title", "NeverEnding"), new Term("title", "Xinghua,") };
    Term term = new Term("title", "The");
    // add之间认为是OR操作,即"NeverEnding", "Xinghua,"和"The"之间的slop不大于3
    MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery.Builder().add(terms).add(term).setSlop(3).build();
    TopDocs search = searcher.search(multiPhraseQuery, 1000);
    Assert.assertEquals(2, search.totalHits.value);
}
 
源代码30 项目: jstarcraft-core   文件: LuceneQueryTestCase.java
@Test
public void testPointExactQuery() throws Exception {
    // 精确查询
    Query exactQuery = IntPoint.newExactQuery("id", 1);
    TopDocs search = searcher.search(exactQuery, 1000);
    Assert.assertEquals(1, search.totalHits.value);
}
 
 类所在包
 同包方法