下面列出了org.apache.lucene.search.SimpleCollector#org.apache.lucene.util.Bits 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream,
TagClusterReducer tagClusterReducer, boolean skipAltTokens,
boolean ignoreStopWords) throws IOException {
this.terms = terms;
this.liveDocs = liveDocs;
this.tokenStream = tokenStream;
this.skipAltTokens = skipAltTokens;
this.ignoreStopWords = ignoreStopWords;
byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
taggingAtt = tokenStream.addAttribute(TaggingAttribute.class);
tokenStream.reset();
this.tagClusterReducer = tagClusterReducer;
}
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
final LeafBucketCollector sub) throws IOException {
final Bits docsWithValue;
if (valuesSource != null) {
docsWithValue = valuesSource.docsWithValue(ctx);
} else {
docsWithValue = new Bits.MatchNoBits(ctx.reader().maxDoc());
}
return new LeafBucketCollectorBase(sub, docsWithValue) {
@Override
public void collect(int doc, long bucket) throws IOException {
if (docsWithValue != null && !docsWithValue.get(doc)) {
collectBucket(sub, doc, bucket);
}
}
};
}
/** Specialized method to bulk-score all hits; we
* separate this from {@link #scoreRange} to help out
* hotspot.
* See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a> */
static void scoreAll(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, Bits acceptDocs) throws IOException {
if (twoPhase == null) {
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
if (acceptDocs == null || acceptDocs.get(doc)) {
collector.collect(doc);
}
}
} else {
// The scorer has an approximation, so run the approximation first, then check acceptDocs, then confirm
final DocIdSetIterator approximation = twoPhase.approximation();
for (int doc = approximation.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = approximation.nextDoc()) {
if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
collector.collect(doc);
}
}
}
}
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream,
TagClusterReducer tagClusterReducer, boolean skipAltTokens,
boolean ignoreStopWords) throws IOException {
this.terms = terms;
this.liveDocs = liveDocs;
this.tokenStream = tokenStream;
this.skipAltTokens = skipAltTokens;
this.ignoreStopWords = ignoreStopWords;
byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
taggingAtt = tokenStream.addAttribute(TaggingAttribute.class);
tokenStream.reset();
this.tagClusterReducer = tagClusterReducer;
}
private void checkExpecteds(BitSet expecteds) throws Exception {
IndexReader r = DirectoryReader.open(dir);
//Perhaps not the most efficient approach but meets our
//needs here.
final Bits liveDocs = MultiBits.getLiveDocs(r);
for (int i = 0; i < r.maxDoc(); i++) {
if (liveDocs == null || liveDocs.get(i)) {
String sval=r.document(i).get(FIELD_RECORD_ID);
if(sval!=null) {
int val=Integer.parseInt(sval);
assertTrue("Did not expect document #"+val, expecteds.get(val));
expecteds.set(val,false);
}
}
}
r.close();
assertEquals("Should have 0 docs remaining ", 0 ,expecteds.cardinality());
}
/**
* Check the behaviour of compareBottom when docsWithField is null (this happens when all documents contain the
* field).
*/
@Test
public void testCompareBottom_nullDocsWithField()
{
// Set docsWithField to null to simulate all documents containing the field.
Bits oldValue = textSortFieldComparator.docsWithField;
textSortFieldComparator.docsWithField = null;
// Set up the document to have an empty term.
when(mockDocTerms.get(DOC)).thenReturn(new BytesRef());
// Call the method under test.
textSortFieldComparator.compareBottom(DOC);
// Expect the EMPTY_TERM to be compared
verify(mockCollator).compare(BOTTOM_STRING, "");
// Reset docsWithField with the mock after the test.
textSortFieldComparator.docsWithField = oldValue;
}
/** Get prefix & leaf docs at this cell. */
private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException {
assert indexedCell.compareToNoLeaf(cell) == 0;
//called when we've reached detailLevel.
if (indexedCell.isLeaf()) {//only a leaf
SmallDocSet result = collectDocs(acceptContains);
nextTerm();
return result;
} else {
SmallDocSet docsAtPrefix = collectDocs(acceptContains);
if (!nextTerm()) {
return docsAtPrefix;
}
//collect leaf too
if (indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0) {
SmallDocSet docsAtLeaf = collectDocs(acceptContains);
nextTerm();
return union(docsAtPrefix, docsAtLeaf);
} else {
return docsAtPrefix;
}
}
}
private static DocSet createSmallSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxPossible, int firstReader) throws IOException {
int[] docs = new int[maxPossible];
int sz = 0;
for (int i = firstReader; i < postList.length; i++) {
PostingsEnum postings = postList[i];
if (postings == null) continue;
LeafReaderContext ctx = leaves.get(i);
Bits liveDocs = ctx.reader().getLiveDocs();
int base = ctx.docBase;
for (; ; ) {
int subId = postings.nextDoc();
if (subId == DocIdSetIterator.NO_MORE_DOCS) break;
if (liveDocs != null && !liveDocs.get(subId)) continue;
int globalId = subId + base;
docs[sz++] = globalId;
}
}
return new SortedIntDocSet(docs, sz);
}
public static DocIdSet getFullySetDocIdSet(int maxDoc) {
Bits bits = getFullySetBits(maxDoc);
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return getFullySetDocIdSetIterator(maxDoc);
}
@Override
public Bits bits() throws IOException {
return bits;
}
@Override
public boolean isCacheable() {
return true;
}
};
}
/**
* Creates a new {@link SoftDeletesRetentionMergePolicy}
* @param field the soft deletes field
* @param retentionQuerySupplier a query supplier for the retention query
* @param in the wrapped MergePolicy
*/
public SoftDeletesRetentionMergePolicy(String field, Supplier<Query> retentionQuerySupplier, MergePolicy in) {
super(in, toWrap -> new MergePolicy.OneMerge(toWrap.segments) {
@Override
public CodecReader wrapForMerge(CodecReader reader) throws IOException {
CodecReader wrapped = toWrap.wrapForMerge(reader);
Bits liveDocs = reader.getLiveDocs();
if (liveDocs == null) { // no deletes - just keep going
return wrapped;
}
return applyRetentionQuery(field, retentionQuerySupplier.get(), wrapped);
}
});
Objects.requireNonNull(field, "field must not be null");
Objects.requireNonNull(retentionQuerySupplier, "retentionQuerySupplier must not be null");
this.field = field;
this.retentionQuerySupplier = retentionQuerySupplier;
}
private static void printDocs(DirectoryReader r) throws Throwable {
for(LeafReaderContext ctx : r.leaves()) {
// TODO: improve this
LeafReader sub = ctx.reader();
Bits liveDocs = sub.getLiveDocs();
System.out.println(" " + ((SegmentReader) sub).getSegmentInfo());
for(int docID=0;docID<sub.maxDoc();docID++) {
Document doc = sub.document(docID);
if (liveDocs == null || liveDocs.get(docID)) {
System.out.println(" docID=" + docID + " id:" + doc.get("id"));
} else {
System.out.println(" DEL docID=" + docID + " id:" + doc.get("id"));
}
}
}
}
/** Returns a single {@link Bits} instance for this
* reader, merging live Documents on the
* fly. This method will return null if the reader
* has no deletions.
*
* <p><b>NOTE</b>: this is a very slow way to access live docs.
* For example, each Bits access will require a binary search.
* It's better to get the sub-readers and iterate through them
* yourself. */
public static Bits getLiveDocs(IndexReader reader) {
if (reader.hasDeletions()) {
final List<LeafReaderContext> leaves = reader.leaves();
final int size = leaves.size();
assert size > 0 : "A reader with deletions must have at least one leave";
if (size == 1) {
return leaves.get(0).reader().getLiveDocs();
}
final Bits[] liveDocs = new Bits[size];
final int[] starts = new int[size + 1];
for (int i = 0; i < size; i++) {
// record all liveDocs, even if they are null
final LeafReaderContext ctx = leaves.get(i);
liveDocs[i] = ctx.reader().getLiveDocs();
starts[i] = ctx.docBase;
}
starts[size] = reader.maxDoc();
return new MultiBits(liveDocs, starts, true);
} else {
return null;
}
}
@Override
public Bits getBits(final LeafReaderContext context) throws IOException {
final int maxDoc = context.reader().maxDoc();
FixedBitSet bits = new FixedBitSet(maxDoc);
final SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field);
int docID;
while ((docID = values.nextDoc()) != NO_MORE_DOCS) {
final int count = values.docValueCount();
for (int i = 0; i < count; ++i) {
final long v = values.nextValue();
if (v >= min && v <= max) {
bits.set(docID);
break;
}
}
}
return bits;
}
@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
// TODO: figure out if min/max can be used to optimize this and still work correctly with pause/resume
// and also check if twoPhaseIterator can be used
collector.setScorer(scorer);
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
if (acceptDocs == null || acceptDocs.get(doc)) {
collector.collect(doc);
}
}
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public void score(LeafCollector collector, Bits acceptDocs) throws IOException {
assert max == 0;
collector = new AssertingLeafCollector(collector, 0, PostingsEnum.NO_MORE_DOCS);
if (random.nextBoolean()) {
try {
final int next = score(collector, acceptDocs, 0, PostingsEnum.NO_MORE_DOCS);
assert next == DocIdSetIterator.NO_MORE_DOCS;
} catch (UnsupportedOperationException e) {
in.score(collector, acceptDocs);
}
} else {
in.score(collector, acceptDocs);
}
}
@Override
public Bits docsWithValue(LeafReaderContext context) {
final MultiGeoPointValues geoPoints = geoPointValues(context);
if (org.elasticsearch.index.fielddata.FieldData.unwrapSingleton(geoPoints) != null) {
return org.elasticsearch.index.fielddata.FieldData.unwrapSingletonBits(geoPoints);
} else {
return org.elasticsearch.index.fielddata.FieldData.docsWithValue(geoPoints, context.reader().maxDoc());
}
}
private static BulkScorer scorer(int... matches) {
return new BulkScorer() {
final ScoreAndDoc scorer = new ScoreAndDoc();
int i = 0;
@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
collector.setScorer(scorer);
while (i < matches.length && matches[i] < min) {
i += 1;
}
while (i < matches.length && matches[i] < max) {
scorer.doc = matches[i];
if (acceptDocs == null || acceptDocs.get(scorer.doc)) {
collector.collect(scorer.doc);
}
i += 1;
}
if (i == matches.length) {
return DocIdSetIterator.NO_MORE_DOCS;
}
return RandomNumbers.randomIntBetween(random(), max, matches[i]);
}
@Override
public long cost() {
return matches.length;
}
};
}
private Scorer[] getScorers(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer,
Bits acceptDocs) throws IOException {
Scorer[] scorers = new Scorer[_facets.length];
for (int i = 0; i < scorers.length; i++) {
scorers[i] = _facets[i].scorer(context, scoreDocsInOrder, topScorer, acceptDocs);
}
return scorers;
}
@Override
public SortedBinaryDocValues getBytesValues() {
try {
final BinaryDocValues values = DocValues.getBinary(reader, field);
final Bits docsWithField = DocValues.getDocsWithField(reader, field);
return FieldData.singleton(values, docsWithField);
} catch (IOException e) {
throw new IllegalStateException("Cannot load doc values", e);
}
}
private FixedBitSet sortLiveDocs(Bits liveDocs, Sorter.DocMap sortMap) {
assert liveDocs != null && sortMap != null;
FixedBitSet sortedLiveDocs = new FixedBitSet(liveDocs.length());
sortedLiveDocs.set(0, liveDocs.length());
for (int i = 0; i < liveDocs.length(); i++) {
if (liveDocs.get(i) == false) {
sortedLiveDocs.clear(sortMap.oldToNew(i));
}
}
return sortedLiveDocs;
}
private static OpenBitSet getDocsToFetch(AtomicReader atomicReader, Selector selector, int primeDocRowId,
int numberOfDocsInRow, Bits liveDocs, Filter filter, AtomicInteger totalRecords) throws IOException {
Set<String> alreadyProcessed = new HashSet<String>();
OpenBitSet bits = new OpenBitSet(numberOfDocsInRow);
OpenBitSet mask = null;
if (filter != null) {
DocIdSet docIdSet = filter.getDocIdSet(atomicReader.getContext(), liveDocs);
mask = getMask(docIdSet, primeDocRowId, numberOfDocsInRow);
}
Set<String> columnFamiliesToFetch = selector.getColumnFamiliesToFetch();
boolean fetchAll = true;
if (columnFamiliesToFetch != null) {
fetchAll = false;
applyFamilies(alreadyProcessed, bits, columnFamiliesToFetch, atomicReader, primeDocRowId, numberOfDocsInRow,
liveDocs);
}
Map<String, Set<String>> columnsToFetch = selector.getColumnsToFetch();
if (columnsToFetch != null) {
fetchAll = false;
applyColumns(alreadyProcessed, bits, columnsToFetch, atomicReader, primeDocRowId, numberOfDocsInRow, liveDocs);
}
if (fetchAll) {
bits.set(0, numberOfDocsInRow);
}
if (mask != null) {
bits.intersect(mask);
}
totalRecords.set((int) bits.cardinality());
return bits;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final Scorer filterScorer = filter.scorer(context);
final LeafCollector in = collector.getLeafCollector(context);
final Bits bits = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorer);
return new FilterLeafCollector(in) {
@Override
public void collect(int doc) throws IOException {
if (bits.get(doc)) {
in.collect(doc);
}
}
};
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException
{
docTerms = DocValues.getBinary(context.reader(), field);
docsWithField = DocValues.getDocsWithField(context.reader(), field);
if (docsWithField instanceof Bits.MatchAllBits) {
docsWithField = null;
}
return this;
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException
{
docTerms = DocValues.getBinary(context.reader(), field);
docsWithField = DocValues.getDocsWithField(context.reader(), field);
if (docsWithField instanceof Bits.MatchAllBits)
{
docsWithField = null;
}
return this;
}
@Test
public void testQueryFilterWrap1() throws IOException {
IndexReader r = getIndexReader();
AccessControlFactory accessControlFactory = new FilterAccessControlFactory();
Collection<String> readAuthorizations = new ArrayList<String>();
Collection<String> discoverAuthorizations = new ArrayList<String>();
Set<String> discoverableFields = new HashSet<String>(Arrays.asList("rowid"));
BlurSecureIndexSearcher blurSecureIndexSearcher = new BlurSecureIndexSearcher(r, null, accessControlFactory,
readAuthorizations, discoverAuthorizations, discoverableFields, null);
Query wrapFilter;
Query query = new TermQuery(new Term("a", "b"));
Filter filter = new Filter() {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
throw new RuntimeException("Not implemented.");
}
};
{
Term primeDocTerm = new Term(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE);
ScoreType scoreType = ScoreType.SUPER;
SuperQuery superQuery = new SuperQuery(query, scoreType, primeDocTerm);
wrapFilter = blurSecureIndexSearcher.wrapFilter(superQuery, filter);
System.out.println(wrapFilter);
}
{
assertTrue(wrapFilter instanceof SuperQuery);
SuperQuery sq = (SuperQuery) wrapFilter;
Query inner = sq.getQuery();
assertTrue(inner instanceof FilteredQuery);
FilteredQuery filteredQuery = (FilteredQuery) inner;
Query innerFilteredQuery = filteredQuery.getQuery();
assertEquals(innerFilteredQuery, query);
assertTrue(filteredQuery.getFilter() == filter);
}
}
public static Bits getFullyEmptyBits(int maxDoc) {
return new Bits() {
@Override
public boolean get(int index) {
return false;
}
@Override
public int length() {
return maxDoc;
}
};
}
public static Bits getMatchAll(final int length) {
return new Bits() {
@Override
public int length() {
return length;
}
@Override
public boolean get(int index) {
return true;
}
};
}
/** This is the primary algorithm; recursive. Returns null if finds none. */
private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException {
if (thisTerm == null)//signals all done
return null;
// Get the AND of all child results (into combinedSubResults)
SmallDocSet combinedSubResults = null;
// Optimization: use null subCellsFilter when we know cell is within the query shape.
Shape subCellsFilter = queryShape;
if (cell.getLevel() != 0 && ((cell.getShapeRel() == null || cell.getShapeRel() == SpatialRelation.WITHIN))) {
subCellsFilter = null;
assert cell.getShape().relate(queryShape) == SpatialRelation.WITHIN;
}
CellIterator subCells = cell.getNextLevelCells(subCellsFilter);
while (subCells.hasNext()) {
Cell subCell = subCells.next();
if (!seek(subCell)) {
combinedSubResults = null;
} else if (subCell.getLevel() == detailLevel) {
combinedSubResults = getDocs(subCell, acceptContains);
} else if (!multiOverlappingIndexedShapes &&
subCell.getShapeRel() == SpatialRelation.WITHIN) {
combinedSubResults = getLeafDocs(subCell, acceptContains);
} else {
//OR the leaf docs with all child results
SmallDocSet leafDocs = getLeafDocs(subCell, acceptContains);
SmallDocSet subDocs = visit(subCell, acceptContains); //recursion
combinedSubResults = union(leafDocs, subDocs);
}
if (combinedSubResults == null)
break;
acceptContains = combinedSubResults;//has the 'AND' effect on next iteration
}
return combinedSubResults;
}
@Override
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
long gen = info.getDelGen();
String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
final int length = info.info.maxDoc();
try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) {
Throwable priorE = null;
try {
CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT,
info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
long data[] = new long[FixedBitSet.bits2words(length)];
for (int i = 0; i < data.length; i++) {
data[i] = input.readLong();
}
FixedBitSet fbs = new FixedBitSet(data, length);
if (fbs.length() - fbs.cardinality() != info.getDelCount()) {
throw new CorruptIndexException("bits.deleted=" + (fbs.length() - fbs.cardinality()) +
" info.delcount=" + info.getDelCount(), input);
}
return fbs.asReadOnlyBits();
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
}
throw new AssertionError();
}
private Bits getTopAcceptDocs(DocSet docSet, SolrIndexSearcher searcher) throws IOException {
if (docSet.size() == searcher.numDocs()) {
return null; // means match everything (all live docs). This can speedup things a lot.
} else if (docSet.size() == 0) {
return new Bits.MatchNoBits(searcher.maxDoc()); // can speedup things a lot
} else {
return docSet.getBits();
}
}