下面列出了怎么用org.apache.lucene.search.TopScoreDocCollector的API类实例代码及写法,或者点击链接到github查看源代码。
public ReRankCollector(int reRankDocs,
int length,
Query reRankQuery,
double reRankWeight,
QueryCommand cmd,
IndexSearcher searcher,
Map<BytesRef, Integer> boostedPriority,
boolean scale) throws IOException {
super(null);
this.reRankQuery = reRankQuery;
this.reRankDocs = reRankDocs;
this.length = length;
this.boostedPriority = boostedPriority;
this.scale = scale;
Sort sort = cmd.getSort();
if(sort == null) {
this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), null);
} else {
sort = sort.rewrite(searcher);
this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), null, false, true, true);
}
this.searcher = searcher;
this.reRankWeight = reRankWeight;
}
@Override
public List<String> search(final String object) {
if (stopwords.contains(object.toLowerCase())) {
log.debug("\t Stopword detected: |" + object + "|");
return ImmutableList.of();
}
ArrayList<String> uris = Lists.newArrayList();
try {
log.debug("\t start asking index...");
Query q = new FuzzyQuery(new Term(FIELD_NAME_OBJECT, object), 0);
TopScoreDocCollector collector = TopScoreDocCollector.create(numberOfDocsRetrievedFromIndex);
isearcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (ScoreDoc hit : hits) {
Document hitDoc = isearcher.doc(hit.doc);
uris.add(hitDoc.get(FIELD_NAME_SUBJECT));
}
log.debug("\t finished asking index...");
} catch (Exception e) {
log.error(e.getLocalizedMessage() + " -> " + object, e);
}
return uris;
}
private SearchResults search() throws IOException {
// execute search
ScoreDoc after = docs.length == 0 ? null : docs[docs.length - 1];
TopDocs topDocs;
if (sort != null) {
topDocs = searcher.searchAfter(after, query, pageSize, sort);
} else {
int hitsThreshold = exactHitsCount ? Integer.MAX_VALUE : DEFAULT_TOTAL_HITS_THRESHOLD;
TopScoreDocCollector collector = TopScoreDocCollector.create(pageSize, after, hitsThreshold);
searcher.search(query, collector);
topDocs = collector.topDocs();
}
// reset total hits for the current query
this.totalHits = topDocs.totalHits;
// cache search results for later use
ScoreDoc[] newDocs = new ScoreDoc[docs.length + topDocs.scoreDocs.length];
System.arraycopy(docs, 0, newDocs, 0, docs.length);
System.arraycopy(topDocs.scoreDocs, 0, newDocs, docs.length, topDocs.scoreDocs.length);
this.docs = newDocs;
return SearchResults.of(topDocs.totalHits, topDocs.scoreDocs, currentPage * pageSize, searcher, fieldsToLoad);
}
@Override
protected Collector createFirstPassCollector() throws IOException {
DocSet groupFilt = searcher.getDocSet(query);
int groupDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
Collector subCollector;
if (withinGroupSort == null || withinGroupSort.equals(Sort.RELEVANCE)) {
subCollector = topCollector = TopScoreDocCollector.create(groupDocsToCollect, Integer.MAX_VALUE);
} else {
topCollector = TopFieldCollector.create(searcher.weightSort(withinGroupSort), groupDocsToCollect, Integer.MAX_VALUE);
if (needScores) {
maxScoreCollector = new MaxScoreCollector();
subCollector = MultiCollector.wrap(topCollector, maxScoreCollector);
} else {
subCollector = topCollector;
}
}
collector = new FilterCollector(groupFilt, subCollector);
return collector;
}
@Override
public List<Collector> create() throws IOException {
Collector subCollector;
if (sort == null || sort.equals(Sort.RELEVANCE)) {
subCollector = topDocsCollector = TopScoreDocCollector.create(docsToCollect, Integer.MAX_VALUE);
} else {
topDocsCollector = TopFieldCollector.create(sort, docsToCollect, Integer.MAX_VALUE);
if (needScores) {
maxScoreCollector = new MaxScoreCollector();
subCollector = MultiCollector.wrap(topDocsCollector, maxScoreCollector);
} else {
subCollector = topDocsCollector;
}
}
filterCollector = new FilterCollector(docSet, subCollector);
return Arrays.asList((Collector) filterCollector);
}
@SuppressWarnings({"unchecked"})
public ReRankCollector(int reRankDocs,
int length,
Rescorer reRankQueryRescorer,
QueryCommand cmd,
IndexSearcher searcher,
Set<BytesRef> boostedPriority) throws IOException {
super(null);
this.reRankDocs = reRankDocs;
this.length = length;
this.boostedPriority = boostedPriority;
this.query = cmd.getQuery();
Sort sort = cmd.getSort();
if(sort == null) {
this.sort = null;
this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), cmd.getMinExactCount());
} else {
this.sort = sort = sort.rewrite(searcher);
//scores are needed for Rescorer (regardless of whether sort needs it)
this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), cmd.getMinExactCount());
}
this.searcher = searcher;
this.reRankQueryRescorer = reRankQueryRescorer;
}
private List<String> returnMusics(Query query) throws IOException {
int hitsPerPage = 10;
IndexReader reader = DirectoryReader.open(LuceneUtil.INSTANCE.getDirectory());
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(
hitsPerPage, true);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
List<String> musics = new LinkedList<>();
for(int i=0;i<hits.length;++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
musics.add(d.get(COLUMN_NAME));
}
return musics;
}
private List<String> returnResume(Query query) throws IOException {
int hitsPerPage = 10;
IndexReader reader = DirectoryReader.open(LuceneUtil.INSTANCE.getDirectory());
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(
hitsPerPage, true);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
List<String> resumeIDs = new LinkedList<>();
for(int i=0;i<hits.length;++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
resumeIDs.add(d.get(COLUMN_NICk_NAME));
}
return resumeIDs;
}
private List<Triple> getFromIndex(int maxNumberOfResults, BooleanQuery bq) throws IOException {
log.debug("\t start asking index...");
TopScoreDocCollector collector = TopScoreDocCollector.create(maxNumberOfResults, true);
isearcher.search(bq, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
List<Triple> triples = new ArrayList<Triple>();
String s, p, o;
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits[i].doc);
s = hitDoc.get(FIELD_NAME_SUBJECT);
p = hitDoc.get(FIELD_NAME_PREDICATE);
o = hitDoc.get(FIELD_NAME_OBJECT_URI);
if (o == null) {
o = hitDoc.get(FIELD_NAME_OBJECT_LITERAL);
}
Triple triple = new Triple(s, p, o);
triples.add(triple);
}
log.debug("\t finished asking index...");
return triples;
}
/**
* Search for bible chapters that match the given filter.
*
* @param queryString the query string to filter.
* @param type ignored - may be null.
* @return a list of all bible chapters that match the given filter.
*/
@Override
public BibleChapter[] filter(String queryString, FilterType type) {
String sanctifyQueryString = SearchIndexUtils.makeLuceneQuery(queryString);
if(chapters.isEmpty() || sanctifyQueryString.isEmpty()) {
return chapters.values().toArray(new BibleChapter[chapters.size()]);
}
List<BibleChapter> ret;
try (DirectoryReader dr = DirectoryReader.open(index)) {
IndexSearcher searcher = new IndexSearcher(dr);
BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
Query q = new ComplexPhraseQueryParser("text", analyzer).parse(sanctifyQueryString);
TopScoreDocCollector collector = TopScoreDocCollector.create(10000,10000);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
ret = new ArrayList<>();
for(int i = 0; i < hits.length; ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
BibleChapter chapter = chapters.get(Integer.parseInt(d.get("number")));
ret.add(chapter);
}
return ret.toArray(new BibleChapter[ret.size()]);
}
catch (ParseException | IOException ex) {
LOGGER.log(Level.WARNING, "Invalid query string: " + sanctifyQueryString, ex);
return new BibleChapter[0];
}
}
static void search(String className, Indexer indexer, Collection<IndexingContext> contexts, List<? super ClassUsage> results) throws IOException {
String searchString = crc32base64(className.replace('.', '/'));
Query refClassQuery = indexer.constructQuery(ClassDependencyIndexCreator.FLD_NB_DEPENDENCY_CLASS.getOntology(), new StringSearchExpression(searchString));
TopScoreDocCollector collector = TopScoreDocCollector.create(NexusRepositoryIndexerImpl.MAX_RESULT_COUNT, null);
for (IndexingContext context : contexts) {
IndexSearcher searcher = context.acquireIndexSearcher();
try {
searcher.search(refClassQuery, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
LOG.log(Level.FINER, "for {0} ~ {1} found {2} hits", new Object[] {className, searchString, hits.length});
for (ScoreDoc hit : hits) {
int docId = hit.doc;
Document d = searcher.doc(docId);
String fldValue = d.get(ClassDependencyIndexCreator.NB_DEPENDENCY_CLASSES);
LOG.log(Level.FINER, "{0} uses: {1}", new Object[] {className, fldValue});
Set<String> refClasses = parseField(searchString, fldValue, d.get(ArtifactInfo.NAMES));
if (!refClasses.isEmpty()) {
ArtifactInfo ai = IndexUtils.constructArtifactInfo(d, context);
if (ai != null) {
ai.setRepository(context.getRepositoryId());
List<NBVersionInfo> version = NexusRepositoryIndexerImpl.convertToNBVersionInfo(Collections.singleton(ai));
if (!version.isEmpty()) {
results.add(new ClassUsage(version.get(0), refClasses));
}
}
}
}
} finally {
context.releaseIndexSearcher(searcher);
}
}
}
public ArrayList<String> search(final String object) {
ArrayList<String> uris = Lists.newArrayList();
try {
log.debug("\t start asking index...");
// remove hyphens assertTrue
// if (object.contains("-")) {
// object = "\"" + object.replace("-", " ") + "\"";
// }
// FuzzyQuery q = new FuzzyQuery(new Term(FIELD_NAME_OBJECT,
// object), 0);
QueryParser qp = new QueryParser(FIELD_NAME_OBJECT, analyzer);
TopScoreDocCollector collector = TopScoreDocCollector.create(numberOfDocsRetrievedFromIndex);
isearcher.search(qp.createPhraseQuery(FIELD_NAME_OBJECT, object), collector);
// isearcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (ScoreDoc hit : hits) {
Document hitDoc = isearcher.doc(hit.doc);
uris.add(hitDoc.get(FIELD_NAME_SUBJECT));
}
log.debug("\t finished asking index...");
} catch (Exception e) {
log.error(e.getLocalizedMessage() + " -> " + object, e);
}
return uris;
}
@Override
public List<String> search(final String object) {
if (stopwords.contains(object.toLowerCase())) {
log.debug("\t Stopword detected: |" + object + "|");
System.out.println("returning immutable empty");
return ImmutableList.of();
}
ArrayList<String> uris = Lists.newArrayList();
try {
log.debug("\t start asking index for |" + object + "|");
Query q = new FuzzyQuery(new Term(FIELD_NAME_OBJECT, object), 0);
TopScoreDocCollector collector = TopScoreDocCollector.create(numberOfDocsRetrievedFromIndex);
isearcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (ScoreDoc hit : hits) {
Document hitDoc = isearcher.doc(hit.doc);
log.debug(object + "->" + hitDoc.get(FIELD_NAME_SUBJECT) + ", " + hitDoc.get(FIELD_NAME_OBJECT));
uris.add(hitDoc.get(FIELD_NAME_SUBJECT));
}
log.debug("\t finished asking index...");
} catch (Exception e) {
log.error(e.getLocalizedMessage() + " -> " + object, e);
}
return uris;
}
@Override
protected Collector createCollector() throws Exception {
Collector collector = null;
if (clnName.equalsIgnoreCase("topScoreDoc") == true) {
collector = TopScoreDocCollector.create(numHits(), Integer.MAX_VALUE);
} else if (clnName.length() > 0){
collector = Class.forName(clnName).asSubclass(Collector.class).getConstructor().newInstance();
} else {
collector = super.createCollector();
}
return collector;
}
TopDocsReducer(Sort withinGroupSort,
int maxDocsPerGroup, boolean getMaxScores) {
this.needsScores = getMaxScores || withinGroupSort.needsScores();
if (withinGroupSort == Sort.RELEVANCE) {
supplier = () -> new TopDocsAndMaxScoreCollector(true, TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE), null);
} else {
supplier = () -> {
TopFieldCollector topDocsCollector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
MaxScoreCollector maxScoreCollector = getMaxScores ? new MaxScoreCollector() : null;
return new TopDocsAndMaxScoreCollector(false, topDocsCollector, maxScoreCollector);
};
}
}
@Monster("takes over two hours")
public void testExactlyAtTrueLimit() throws Exception {
Directory dir = newFSDirectory(createTempDir("2BDocs3"));
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null));
Document doc = new Document();
doc.add(newStringField("field", "text", Field.Store.NO));
for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
iw.addDocument(doc);
/*
if (i%1000000 == 0) {
System.out.println((i/1000000) + " M docs...");
}
*/
}
iw.commit();
// First unoptimized, then optimized:
for(int i=0;i<2;i++) {
DirectoryReader ir = DirectoryReader.open(dir);
assertEquals(IndexWriter.MAX_DOCS, ir.maxDoc());
assertEquals(IndexWriter.MAX_DOCS, ir.numDocs());
IndexSearcher searcher = new IndexSearcher(ir);
TopScoreDocCollector collector = TopScoreDocCollector.create(10, Integer.MAX_VALUE);
searcher.search(new TermQuery(new Term("field", "text")), collector);
TopDocs hits = collector.topDocs();
assertEquals(IndexWriter.MAX_DOCS, hits.totalHits.value);
// Sort by docID reversed:
hits = searcher.search(new TermQuery(new Term("field", "text")), 10, new Sort(new SortField(null, SortField.Type.DOC, true)));
assertEquals(IndexWriter.MAX_DOCS, hits.totalHits.value);
assertEquals(10, hits.scoreDocs.length);
assertEquals(IndexWriter.MAX_DOCS-1, hits.scoreDocs[0].doc);
ir.close();
iw.forceMerge(1);
}
iw.close();
dir.close();
}
private void testRangeSplit(int precisionStep) throws Exception {
String field="ascfield"+precisionStep;
// 10 random tests
int num = TestUtil.nextInt(random(), 10, 20);
for (int i =0; i< num; i++) {
int lower=(int)(random().nextDouble()*noDocs - noDocs/2);
int upper=(int)(random().nextDouble()*noDocs - noDocs/2);
if (lower>upper) {
int a=lower; lower=upper; upper=a;
}
// test inclusive range
Query tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
TopScoreDocCollector collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
searcher.search(tq, collector);
TopDocs tTopDocs = collector.topDocs();
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits.value );
// test exclusive range
tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, false);
collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
searcher.search(tq, collector);
tTopDocs = collector.topDocs();
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits.value );
// test left exclusive range
tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, true);
collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
searcher.search(tq, collector);
tTopDocs = collector.topDocs();
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits.value );
// test right exclusive range
tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, false);
collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
searcher.search(tq, collector);
tTopDocs = collector.topDocs();
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits.value );
}
}
/** we fake a float test using int2float conversion of LegacyNumericUtils */
private void testFloatRange(int precisionStep) throws Exception {
final String field="ascfield"+precisionStep;
final int lower=-1000, upper=+2000;
Query tq= LegacyNumericRangeQuery.newFloatRange(field, precisionStep,
NumericUtils.sortableIntToFloat(lower), NumericUtils.sortableIntToFloat(upper), true, true);
TopScoreDocCollector collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
searcher.search(tq, collector);
TopDocs tTopDocs = collector.topDocs();
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits.value );
}
private void testRangeSplit(int precisionStep) throws Exception {
String field="ascfield"+precisionStep;
// 10 random tests
int num = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < num; i++) {
long lower=(long)(random().nextDouble()*noDocs - noDocs/2);
long upper=(long)(random().nextDouble()*noDocs - noDocs/2);
if (lower>upper) {
long a=lower; lower=upper; upper=a;
}
// test inclusive range
Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
TopScoreDocCollector collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
searcher.search(tq, collector);
TopDocs tTopDocs = collector.topDocs();
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits.value );
// test exclusive range
tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false);
collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
searcher.search(tq, collector);
tTopDocs = collector.topDocs();
assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits.value );
// test left exclusive range
tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true);
collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
searcher.search(tq, collector);
tTopDocs = collector.topDocs();
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits.value );
// test right exclusive range
tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false);
collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
searcher.search(tq, collector);
tTopDocs = collector.topDocs();
assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits.value );
}
}
/** we fake a double test using long2double conversion of LegacyNumericUtils */
private void testDoubleRange(int precisionStep) throws Exception {
final String field="ascfield"+precisionStep;
final long lower=-1000L, upper=+2000L;
Query tq= LegacyNumericRangeQuery.newDoubleRange(field, precisionStep,
NumericUtils.sortableLongToDouble(lower), NumericUtils.sortableLongToDouble(upper), true, true);
TopScoreDocCollector collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
searcher.search(tq, collector);
TopDocs tTopDocs = collector.topDocs();
assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits.value );
}
@Override
public Collector newCollector() {
TopScoreDocCollector collector = TopScoreDocCollector.create(_numHitsToCollect, _after, true);
Collector col = new StopExecutionCollector(collector, _running);
if (_runSlow) {
return new SlowCollector(col);
}
return col;
}
private TopScoreDocCollector getTopScoreDocCollector(Collector collector) {
if (collector instanceof SlowCollector) {
SlowCollector slowCollector = (SlowCollector) collector;
return getTopScoreDocCollector(slowCollector.getCollector());
} else if (collector instanceof StopExecutionCollector) {
StopExecutionCollector stopExecutionCollector = (StopExecutionCollector) collector;
return getTopScoreDocCollector(stopExecutionCollector.getCollector());
} else if (collector instanceof TopScoreDocCollector) {
TopScoreDocCollector topScoreDocCollector = (TopScoreDocCollector) collector;
return topScoreDocCollector;
} else {
throw new RuntimeException("Collector type [" + collector + "] not supported.");
}
}
/**
* @param query
* - a query
* @param field
* - the field where to search the query
* @return number of documents containing the text in query in the given
* fields
*/
public int getFreq(String query, String field) {
Query q = null;
searcher = getSearcher();
TopScoreDocCollector collector = TopScoreDocCollector.create(1, true);
// try {
Text t = new Text(query).disableStopwords();
PhraseQuery pq = new PhraseQuery();
int i = 0;
for (String term : t.getTerms()) {
pq.add(new Term(field, term), i++);
}
q = pq;
logger.debug(q.toString());
// } catch (ParseException e) {
// logger.error("querying the index: {} ", e.toString());
// return -1;
// }
try {
searcher.search(q, collector);
} catch (IOException e) {
logger.error("querying the index: {} ", e.toString());
return -1;
}
return collector.getTotalHits();
}
protected TopDocsCollector<? extends ScoreDoc> createTopDocsCollector(int size) throws IOException {
return TopScoreDocCollector.create(size);
}
@Override
public LongArrayList recommendInternal(ClickData clickData) {
//create a result list
LongArrayList results = new LongArrayList();
try {
//determine the input query, which can either be based on the current item
//or all items from the current session depending on the configuration
String input;
if (!wholeSession){
//extract the content from the current item
input = extractContent(clickData.click.item);
}else{
//iteratively append the content of every item from the current user session
input="";
for (int i = 0 ; i<clickData.session.size(); i++ ){
input += " "+ extractContent(clickData.session.get(i).item);
}
}
//avoid an exception that happens for too large queries
BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
//create a query
Query q = new QueryParser("text", analyzer)
.parse(QueryParserUtil.escape(input));
//set an unreasonably high retrieval amount, because we want a long recommendation list
int hitsPerPage = 100000;
//instantiate the retrieval objects
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
//execute the query
searcher.search(q, collector);
//iterate the hits and extract the item ids
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (int i = 1; i < hits.length; ++i) {
if (hits[i].score < minScore) {
//stop retrieving, if the lucene score is too low
break;
}
int docId = hits[i].doc;
Document d = searcher.doc(docId);
results.add(Long.parseLong(d.get("id")));
}
reader.close();
} catch (ParseException | IOException e) {
e.printStackTrace();
}
//return the results
return results;
}
protected Collector createCollector() throws Exception {
return TopScoreDocCollector.create(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1);
}
/** Returns the grouped results. Returns null if the
* number of groups collected is <= groupOffset.
*
* <p><b>NOTE</b>: This collector is unable to compute
* the groupValue per group so it will always be null.
* This is normally not a problem, as you can obtain the
* value just like you obtain other values for each
* matching document (eg, via stored fields, via
* DocValues, etc.)
*
* @param withinGroupSort The {@link Sort} used to sort
* documents within each group.
* @param groupOffset Which group to start from
* @param withinGroupOffset Which document to start from
* within each group
* @param maxDocsPerGroup How many top documents to keep
* within each group.
*/
public TopGroups<?> getTopGroups(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup) throws IOException {
//if (queueFull) {
//System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups);
//}
if (subDocUpto != 0) {
processGroup();
}
if (groupOffset >= groupQueue.size()) {
return null;
}
int totalGroupedHitCount = 0;
final ScoreAndDoc fakeScorer = new ScoreAndDoc();
float maxScore = Float.MIN_VALUE;
@SuppressWarnings({"unchecked","rawtypes"})
final GroupDocs<Object>[] groups = new GroupDocs[groupQueue.size() - groupOffset];
for(int downTo=groupQueue.size()-groupOffset-1;downTo>=0;downTo--) {
final OneGroup og = groupQueue.pop();
// At this point we hold all docs w/ in each group,
// unsorted; we now sort them:
final TopDocsCollector<?> collector;
if (withinGroupSort.equals(Sort.RELEVANCE)) {
// Sort by score
if (!needsScores) {
throw new IllegalArgumentException("cannot sort by relevance within group: needsScores=false");
}
collector = TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE);
} else {
// Sort by fields
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
}
float groupMaxScore = needsScores ? Float.NEGATIVE_INFINITY : Float.NaN;
LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
leafCollector.setScorer(fakeScorer);
for(int docIDX=0;docIDX<og.count;docIDX++) {
final int doc = og.docs[docIDX];
fakeScorer.doc = doc;
if (needsScores) {
fakeScorer.score = og.scores[docIDX];
groupMaxScore = Math.max(groupMaxScore, fakeScorer.score);
}
leafCollector.collect(doc);
}
totalGroupedHitCount += og.count;
final Object[] groupSortValues;
groupSortValues = new Comparable<?>[comparators.length];
for(int sortFieldIDX=0;sortFieldIDX<comparators.length;sortFieldIDX++) {
groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.comparatorSlot);
}
final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup);
// TODO: we could aggregate scores across children
// by Sum/Avg instead of passing NaN:
groups[downTo] = new GroupDocs<>(Float.NaN,
groupMaxScore,
new TotalHits(og.count, TotalHits.Relation.EQUAL_TO),
topDocs.scoreDocs,
null,
groupSortValues);
maxScore = Math.max(maxScore, groupMaxScore);
}
/*
while (groupQueue.size() != 0) {
final OneGroup og = groupQueue.pop();
//System.out.println(" leftover: og ord=" + og.groupOrd + " count=" + og.count);
totalGroupedHitCount += og.count;
}
*/
return new TopGroups<>(new TopGroups<>(groupSort.getSort(),
withinGroupSort.getSort(),
totalHitCount, totalGroupedHitCount, groups, maxScore),
totalGroupCount);
}
public void testBasics() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()
.setMergePolicy(newLogMergePolicy(random().nextBoolean())));
Document doc = new Document();
LatLonPoint point = new LatLonPoint("foo", 0.0, 0.0);
doc.add(point);
LatLonDocValuesField docValue = new LatLonDocValuesField("foo",0.0, 0.0);
doc.add(docValue);
double pivotDistance = 5000;//5k
point.setLocationValue(-7, -7);
docValue.setLocationValue(-7, -7);
w.addDocument(doc);
point.setLocationValue(9, 9);
docValue.setLocationValue(9, 9);
w.addDocument(doc);
point.setLocationValue(8, 8);
docValue.setLocationValue(8, 8);
w.addDocument(doc);
point.setLocationValue(4, 4);
docValue.setLocationValue(4, 4);
w.addDocument(doc);
point.setLocationValue(-1, -1);
docValue.setLocationValue(-1, -1);
w.addDocument(doc);
DirectoryReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
Query q = LatLonPoint.newDistanceFeatureQuery("foo", 3, 10, 10, pivotDistance);
TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 1);
searcher.search(q, collector);
TopDocs topHits = collector.topDocs();
assertEquals(2, topHits.scoreDocs.length);
double distance1 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(9)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(9)), 10,10);
double distance2 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(8)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(8)), 10,10);
CheckHits.checkEqual(q,
new ScoreDoc[] {
new ScoreDoc(1, (float) (3f * (pivotDistance / (pivotDistance + distance1)))),
new ScoreDoc(2, (float) (3f * (pivotDistance / (pivotDistance + distance2))))
},
topHits.scoreDocs);
distance1 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(9)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(9)), 9,9);
distance2 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(8)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(8)), 9,9);
q = LatLonPoint.newDistanceFeatureQuery("foo", 3, 9, 9, pivotDistance);
collector = TopScoreDocCollector.create(2, null, 1);
searcher.search(q, collector);
topHits = collector.topDocs();
assertEquals(2, topHits.scoreDocs.length);
CheckHits.checkExplanations(q, "", searcher);
CheckHits.checkEqual(q,
new ScoreDoc[] {
new ScoreDoc(1, (float) (3f * (pivotDistance / (pivotDistance + distance1)))),
new ScoreDoc(2, (float) (3f * (pivotDistance / (pivotDistance + distance2))))
},
topHits.scoreDocs);
reader.close();
w.close();
dir.close();
}
public void testCrossesDateLine() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()
.setMergePolicy(newLogMergePolicy(random().nextBoolean())));
Document doc = new Document();
LatLonPoint point = new LatLonPoint("foo", 0.0, 0.0);
doc.add(point);
LatLonDocValuesField docValue = new LatLonDocValuesField("foo",0.0, 0.0);
doc.add(docValue);
double pivotDistance = 5000;//5k
point.setLocationValue(0, -179);
docValue.setLocationValue(0, -179);
w.addDocument(doc);
point.setLocationValue(0, 176);
docValue.setLocationValue(0, 176);
w.addDocument(doc);
point.setLocationValue(0, -150);
docValue.setLocationValue(0, -150);
w.addDocument(doc);
point.setLocationValue(0, -140);
docValue.setLocationValue(0, -140);
w.addDocument(doc);
point.setLocationValue(0, 140);
docValue.setLocationValue(01, 140);
w.addDocument(doc);
DirectoryReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
Query q = LatLonPoint.newDistanceFeatureQuery("foo", 3, 0, 179, pivotDistance);
TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 1);
searcher.search(q, collector);
TopDocs topHits = collector.topDocs();
assertEquals(2, topHits.scoreDocs.length);
double distance1 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(0)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(-179)), 0,179);
double distance2 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(0)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(176)), 0,179);
CheckHits.checkEqual(q,
new ScoreDoc[] {
new ScoreDoc(0, (float) (3f * (pivotDistance / (pivotDistance + distance1)))),
new ScoreDoc(1, (float) (3f * (pivotDistance / (pivotDistance + distance2))))
},
topHits.scoreDocs);
reader.close();
w.close();
dir.close();
}
public void testMissingValue() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()
.setMergePolicy(newLogMergePolicy(random().nextBoolean())));
Document doc = new Document();
LatLonPoint point = new LatLonPoint("foo", 0, 0);
doc.add(point);
LatLonDocValuesField docValue = new LatLonDocValuesField("foo", 0, 0);
doc.add(docValue);
point.setLocationValue(3, 3);
docValue.setLocationValue(3, 3);
w.addDocument(doc);
w.addDocument(new Document());
point.setLocationValue(7, 7);
docValue.setLocationValue(7, 7);
w.addDocument(doc);
DirectoryReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
Query q = LatLonPoint.newDistanceFeatureQuery("foo", 3, 10, 10, 5);
TopScoreDocCollector collector = TopScoreDocCollector.create(3, null, 1);
searcher.search(q, collector);
TopDocs topHits = collector.topDocs();
assertEquals(2, topHits.scoreDocs.length);
double distance1 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(7)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(7)), 10,10);
double distance2 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(3)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(3)), 10,10);
CheckHits.checkEqual(q,
new ScoreDoc[] {
new ScoreDoc(2, (float) (3f * (5. / (5. + distance1)))),
new ScoreDoc(0, (float) (3f * (5. / (5. + distance2))))
},
topHits.scoreDocs);
CheckHits.checkExplanations(q, "", searcher);
reader.close();
w.close();
dir.close();
}