下面列出了怎么用org.apache.lucene.search.LongValuesSource的API类实例代码及写法,或者点击链接到github查看源代码。
@Test
public void testValueSourceEmptyReader() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
// Make sure the index is created?
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
writer.commit();
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
InputIterator inputIterator = dictionary.getEntryIterator();
assertNull(inputIterator.next());
assertEquals(inputIterator.weight(), 0);
assertNull(inputIterator.payload());
IOUtils.close(ir, analyzer, dir);
}
@Test
public void testLongValuesSourceEmptyReader() throws IOException {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
// Make sure the index is created?
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
writer.commit();
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
InputIterator inputIterator = dictionary.getEntryIterator();
assertNull(inputIterator.next());
assertEquals(inputIterator.weight(), 0);
assertNull(inputIterator.payload());
IOUtils.close(ir, analyzer, dir);
}
/** Create {@code LongValueFacetCounts}, using the provided
* {@link LongValuesSource}.
* random access (implement {@link org.apache.lucene.search.DocIdSet#bits}). */
public LongValueFacetCounts(String field, LongValuesSource valueSource, FacetsCollector hits,
boolean multiValued) throws IOException {
this.field = field;
if (valueSource == null) {
if (multiValued) {
countMultiValued(field, hits.getMatchingDocs());
} else {
count(field, hits.getMatchingDocs());
}
} else {
// value source is always single valued
if (multiValued) {
throw new IllegalArgumentException("can only compute multi-valued facets directly from doc values (when valueSource is null)");
}
count(valueSource, hits.getMatchingDocs());
}
}
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) {
LongValues fv = valueSource.getValues(hits.context, null);
// NOTE: this is not as efficient as working directly with the doc values APIs in the sparse case
// because we are doing a linear scan across all hits, but this API is more flexible since a
// LongValuesSource can compute interesting values at query time
DocIdSetIterator docs = hits.bits.iterator();
for (int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS;) {
// Skip missing docs:
if (fv.advanceExact(doc)) {
increment(fv.longValue());
totCount++;
}
doc = docs.nextDoc();
}
}
}
@Override
public void indexingDone() {
try {
spellChecker = new DirectSpellChecker();
spellChecker.setMaxEdits(2);
spellChecker.setAccuracy(0.1f);
spellChecker.setMinPrefix(0);
reader = DirectoryReader.open(writer);
fuzzySuggester = new FuzzySuggester(directory, "", writer.getAnalyzer());
Dictionary dict = new DocumentValueSourceDictionary(reader, WORD_FIELD, new LongValuesSource() {
@Override
public boolean needsScores() {
return false;
}
@Override
public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
return null;
}
});
fuzzySuggester.build(dict);
writer.close();
searcher = new IndexSearcher(DirectoryReader.open(directory));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private void countAll(LongValuesSource valueSource, String field, IndexReader reader) throws IOException {
for (LeafReaderContext context : reader.leaves()) {
LongValues fv = valueSource.getValues(context, null);
int maxDoc = context.reader().maxDoc();
for (int doc = 0; doc < maxDoc; doc++) {
// Skip missing docs:
if (fv.advanceExact(doc)) {
increment(fv.longValue());
totCount++;
}
}
}
}
public LongValuesSource fromExpression(String weightExpression, Set<SortField> sortFields) {
Expression expression = null;
try {
expression = JavascriptCompiler.compile(weightExpression);
} catch (ParseException e) {
throw new RuntimeException(e);
}
SimpleBindings bindings = new SimpleBindings();
for (SortField sortField : sortFields) {
bindings.add(sortField.getField(), fromSortField(sortField));
}
return expression.getDoubleValuesSource(bindings).toLongValuesSource();
}
@Override
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new QParser(qstr, localParams, params, req) {
@Override
public Query parse() throws SyntaxError {
String fieldName = Preconditions.checkNotNull(localParams.get(SETVAL_FIELD_NAME));
String countFieldName = Preconditions.checkNotNull(localParams.get(COUNT_FIELD_NAME));
boolean allowMissingValues = Boolean.parseBoolean(Preconditions.checkNotNull(localParams.get(MISSING_VAL_ALLOWED)));
String wildcardToken = localParams.get(WILDCARD_CHAR);
LongValuesSource minimumNumberMatch = LongValuesSource.fromIntField(countFieldName);
Collection<Query> queries = new ArrayList<>();
String fieldVals = Preconditions.checkNotNull(localParams.get(SETVAL_PARAM_NAME));
for (String v : fieldVals.split(",")) {
queries.add(new TermQuery(new Term(fieldName, v)));
}
if (wildcardToken != null && !wildcardToken.equals("")) {
queries.add(new TermQuery(new Term(fieldName, wildcardToken)));
}
if (allowMissingValues) {
// To construct this query we need to do a little trick tho construct a test for an empty field as follows:
// (*:* AND -fieldName:*) ==> parses as: (+*:* -fieldName:*)
// It is a feature of Lucene that pure negative queries are not allowed (although Solr allows them as a top level construct)
// therefore we need to AND with *:*
// We can then pass this BooleanQuery to the CoveringQuery as one of its allowed matches.
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD));
builder.add(new BooleanClause(new WildcardQuery(new Term(fieldName, "*")), BooleanClause.Occur.MUST_NOT));
queries.add(builder.build());
}
return new CoveringQuery(queries, minimumNumberMatch);
}
};
}
/**
* Expose this ValueSource as a LongValuesSource
*/
public LongValuesSource asLongValuesSource() {
return new WrappedLongValuesSource(this);
}
@Override
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
@Override
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
return new NoCacheConstantLongValuesSource(searcher.getIndexReader().getSumTotalTermFreq(field), this);
}
private NoCacheConstantLongValuesSource(long value, LongValuesSource parent) {
this.value = value;
this.parent = parent;
}
@Override
public LongValuesSource rewrite(IndexSearcher reader) throws IOException {
return this;
}
public void testSumTotalTermFreq() throws Exception {
LongValuesSource vs = IndexReaderFunctions.sumTotalTermFreq("text");
assertHits(vs.toDoubleValuesSource(), new float[] { 8f, 8f });
assertEquals("sumTotalTermFreq(text)", vs.toString());
assertCacheable(vs, false);
}
void assertCacheable(LongValuesSource vs, boolean expected) throws Exception {
Query q = new FunctionScoreQuery(new MatchAllDocsQuery(), vs.toDoubleValuesSource());
Weight w = searcher.createWeight(q, ScoreMode.COMPLETE, 1);
LeafReaderContext ctx = reader.leaves().get(0);
assertEquals(expected, w.isCacheable(ctx));
}
@Override
protected GroupSelector<LongRange> getGroupSelector() {
return new LongRangeGroupSelector(LongValuesSource.fromLongField("long"),
new LongRangeFactory(100, 100, 900));
}
/**
* Creates a new dictionary with the contents of the fields named <code>field</code>
* for the terms and uses the <code>weightsValueSource</code> supplied to determine the
* score.
*/
public DocumentValueSourceDictionary(IndexReader reader, String field,
LongValuesSource weightsValueSource) {
super(reader, field, null, null);
this.weightsValueSource = weightsValueSource;
}
ValueSourceQuery(LongRange range, Query fastMatchQuery, LongValuesSource valueSource) {
this.range = range;
this.fastMatchQuery = fastMatchQuery;
this.valueSource = valueSource;
}
/** Create {@code LongRangeFacetCounts}, using {@link
* LongValuesSource} from the specified field. */
public LongRangeFacetCounts(String field, FacetsCollector hits, LongRange... ranges) throws IOException {
this(field, LongValuesSource.fromLongField(field), hits, ranges);
}
/** Create {@code LongRangeFacetCounts}, using the provided
* {@link LongValuesSource}. */
public LongRangeFacetCounts(String field, LongValuesSource valueSource, FacetsCollector hits, LongRange... ranges) throws IOException {
this(field, valueSource, hits, null, ranges);
}
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {
LongRange[] ranges = (LongRange[]) this.ranges;
LongRangeCounter counter = new LongRangeCounter(ranges);
int missingCount = 0;
for (MatchingDocs hits : matchingDocs) {
LongValues fv = valueSource.getValues(hits.context, null);
totCount += hits.totalHits;
final DocIdSetIterator fastMatchDocs;
if (fastMatchQuery != null) {
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
final IndexSearcher searcher = new IndexSearcher(topLevelContext);
searcher.setQueryCache(null);
final Weight fastMatchWeight = searcher.createWeight(searcher.rewrite(fastMatchQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
Scorer s = fastMatchWeight.scorer(hits.context);
if (s == null) {
continue;
}
fastMatchDocs = s.iterator();
} else {
fastMatchDocs = null;
}
DocIdSetIterator docs = hits.bits.iterator();
for (int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
if (fastMatchDocs != null) {
int fastMatchDoc = fastMatchDocs.docID();
if (fastMatchDoc < doc) {
fastMatchDoc = fastMatchDocs.advance(doc);
}
if (doc != fastMatchDoc) {
doc = docs.advance(fastMatchDoc);
continue;
}
}
// Skip missing docs:
if (fv.advanceExact(doc)) {
counter.add(fv.longValue());
} else {
missingCount++;
}
doc = docs.nextDoc();
}
}
int x = counter.fillCounts(counts);
missingCount += x;
//System.out.println("totCount " + totCount + " x " + x + " missingCount " + missingCount);
totCount -= missingCount;
}
/** Create {@code LongValueFacetCounts}, using the provided
* {@link LongValuesSource}. If hits is
* null then all facets are counted. */
public LongValueFacetCounts(String field, LongValuesSource valueSource, FacetsCollector hits) throws IOException {
this(field, valueSource, hits, false);
}
/** Counts all facet values for the provided {@link LongValuesSource}. This produces the same result as computing
* facets on a {@link org.apache.lucene.search.MatchAllDocsQuery}, but is more efficient. */
public LongValueFacetCounts(String field, LongValuesSource valueSource, IndexReader reader) throws IOException {
this.field = field;
countAll(valueSource, field, reader);
}
/**
* Creates a constant value source returning the sumTotalTermFreq for a field
*
* @see IndexReader#getSumTotalTermFreq(String)
*/
public static LongValuesSource sumTotalTermFreq(String field) {
return new SumTotalTermFreqValuesSource(field);
}
/**
* Creates a new LongRangeGroupSelector
* @param source a LongValuesSource to retrieve long values per document
* @param rangeFactory a LongRangeFactory that defines how to group the long values into range buckets
*/
public LongRangeGroupSelector(LongValuesSource source, LongRangeFactory rangeFactory) {
this.source = source;
this.rangeFactory = rangeFactory;
}
/**
* Creates a new dictionary with the contents of the fields named <code>field</code>
* for the terms, <code>payload</code> for the corresponding payloads, <code>contexts</code>
* for the associated contexts and uses the <code>weightsValueSource</code> supplied
* to determine the score.
*/
public DocumentValueSourceDictionary(IndexReader reader, String field,
LongValuesSource weightsValueSource, String payload, String contexts) {
super(reader, field, null, payload, contexts);
this.weightsValueSource = weightsValueSource;
}
/**
* Creates a new dictionary with the contents of the fields named <code>field</code>
* for the terms, <code>payloadField</code> for the corresponding payloads
* and uses the <code>weightsValueSource</code> supplied to determine the
* score.
*/
public DocumentValueSourceDictionary(IndexReader reader, String field,
LongValuesSource weightsValueSource, String payload) {
super(reader, field, null, payload);
this.weightsValueSource = weightsValueSource;
}
/**
* Create a Query that matches documents in this range
*
* The query will check all documents that match the provided match query,
* or every document in the index if the match query is null.
*
* If the value source is static, eg an indexed numeric field, it may be
* faster to use {@link org.apache.lucene.search.PointRangeQuery}
*
* @param fastMatchQuery a query to use as a filter
* @param valueSource the source of values for the range check
*/
public Query getQuery(Query fastMatchQuery, LongValuesSource valueSource) {
return new ValueSourceQuery(this, fastMatchQuery, valueSource);
}
/** Create {@code LongRangeFacetCounts}, using the provided
* {@link LongValuesSource}, and using the provided Filter as
* a fastmatch: only documents passing the filter are
* checked for the matching ranges, which is helpful when
* the provided {@link LongValuesSource} is costly per-document,
* such as a geo distance. The filter must be
* random access (implement {@link DocIdSet#bits}). */
public LongRangeFacetCounts(String field, LongValuesSource valueSource, FacetsCollector hits, Query fastMatchQuery, LongRange... ranges) throws IOException {
super(field, ranges, fastMatchQuery);
count(valueSource, hits.getMatchingDocs());
}