下面列出了org.apache.lucene.search.SimpleCollector#org.apache.lucene.util.FixedBitSet 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Test
public void test() throws IOException {
Random random = new Random(_seed);
int numBits = random.nextInt(10000000);
FixedBitSet fixedBitSet = new FixedBitSet(numBits);
populate(random, numBits, fixedBitSet);
String id = "id";
String segmentName = "seg1";
RAMDirectory directory = new RAMDirectory();
IndexFileBitSet indexFileBitSet = new IndexFileBitSet(numBits, id, segmentName, directory);
assertFalse(indexFileBitSet.exists());
indexFileBitSet.create(fixedBitSet.iterator());
indexFileBitSet.load();
checkEquals(fixedBitSet.iterator(), indexFileBitSet.iterator(), numBits);
indexFileBitSet.close();
String[] listAll = directory.listAll();
for (String s : listAll) {
System.out.println(s + " " + directory.fileLength(s));
}
}
/** childInput may also contain parents (i.e. a parent or below will all roll up to that parent) */
public static DocSet toParents(DocSet childInput, BitDocSet parentList, QueryContext qcontext) throws IOException {
FixedBitSet parentBits = parentList.getBits();
DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc());
DocIterator iter = childInput.iterator();
int currentParent = -1;
while (iter.hasNext()) {
int childDoc = iter.nextDoc(); // TODO: skipping
if (childDoc <= currentParent) { // use <= since we also allow parents in the input
// we already visited this parent
continue;
}
currentParent = parentBits.nextSetBit(childDoc);
if (currentParent != DocIdSetIterator.NO_MORE_DOCS) {
// only collect the parent the first time we skip to it
collector.collect( currentParent );
}
}
return collector.getDocSet();
}
public static SolrOwnerScorer createOwnerScorer(Weight weight, LeafReaderContext context, SolrIndexSearcher searcher, String authority) throws IOException
{
if (AuthorityType.getAuthorityType(authority) == AuthorityType.USER)
{
DocSet ownedDocs = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_OWNERLOOKUP_CACHE, authority);
if (ownedDocs == null)
{
// Cache miss: query the index for docs where the owner matches the authority.
ownedDocs = searcher.getDocSet(new TermQuery(new Term(QueryConstants.FIELD_OWNER, authority)));
searcher.cacheInsert(CacheConstants.ALFRESCO_OWNERLOOKUP_CACHE, authority, ownedDocs);
}
return new SolrOwnerScorer(weight, ownedDocs, context, searcher);
}
// Return an empty doc set, as the authority isn't a user.
return new SolrOwnerScorer(weight, new BitDocSet(new FixedBitSet(0)), context, searcher);
}
public GroupExpandCollector(SortedDocValues docValues, FixedBitSet groupBits, IntHashSet collapsedSet, int limit, Sort sort) throws IOException {
int numGroups = collapsedSet.size();
groups = new LongObjectHashMap<>(numGroups);
DocIdSetIterator iterator = new BitSetIterator(groupBits, 0); // cost is not useful here
int group;
while ((group = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
groups.put(group, getCollector(limit, sort));
}
this.collapsedSet = collapsedSet;
this.groupBits = groupBits;
this.docValues = docValues;
if(docValues instanceof MultiDocValues.MultiSortedDocValues) {
this.multiSortedDocValues = (MultiDocValues.MultiSortedDocValues)docValues;
this.ordinalMap = multiSortedDocValues.mapping;
}
}
private Bits searchForDocBits(Query query) throws IOException {
FixedBitSet bitSet = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
indexSearcher.search(query,
new SimpleCollector() {
int leafDocBase;
@Override
public void collect(int doc) throws IOException {
bitSet.set(leafDocBase + doc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
leafDocBase = context.docBase;
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
});
return bitSet;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
current = new FixedBitSet(context.reader().maxDoc());
fixedBitSets.add(context.ord, current);
return new LeafCollector() {
@Override
public void setScorer(Scorer scorer) throws IOException {}
@Override
public void collect(int doc) throws IOException {
current.set(doc);
totalHits++;
}
};
}
@Override
public DocSet andNot(DocSet other) {
FixedBitSet newbits = bits.clone();
if (other instanceof BitDocSet) {
newbits.andNot(((BitDocSet) other).bits);
} else {
DocIterator iter = other.iterator();
while (iter.hasNext()) {
int doc = iter.nextDoc();
if (doc < newbits.length()) {
newbits.clear(doc);
}
}
}
return new BitDocSet(newbits);
}
private DocList getDocList(int rows, FixedBitSet matchDocIdsBS) throws IOException {
//Now we must supply a Solr DocList and add it to the response.
// Typically this is gotten via a SolrIndexSearcher.search(), but in this case we
// know exactly what documents to return, the order doesn't matter nor does
// scoring.
// Ideally an implementation of DocList could be directly implemented off
// of a BitSet, but there are way too many methods to implement for a minor
// payoff.
int matchDocs = matchDocIdsBS.cardinality();
int[] docIds = new int[ Math.min(rows, matchDocs) ];
DocIdSetIterator docIdIter = new BitSetIterator(matchDocIdsBS, 1);
for (int i = 0; i < docIds.length; i++) {
docIds[i] = docIdIter.nextDoc();
}
return new DocSlice(0, docIds.length, docIds, null, matchDocs, 1f, TotalHits.Relation.EQUAL_TO);
}
protected void fillDocsAndScores(FixedBitSet matchingDocs, TermsEnum termsEnum) throws IOException {
BytesRef spare = new BytesRef();
PostingsEnum postingsEnum = null;
for (int i = 0; i < terms.size(); i++) {
if (termsEnum.seekExact(terms.get(ords[i], spare))) {
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
float score = TermsIncludingScoreQuery.this.scores[ords[i]];
for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) {
matchingDocs.set(doc);
// In the case the same doc is also related to a another doc, a score might be overwritten. I think this
// can only happen in a many-to-many relation
scores[doc] = score;
}
}
}
}
public void testConjunctions() throws Exception {
// test many small sets... the bugs will be found on boundary conditions
try (Directory d = newDirectory()) {
IndexWriter iw = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
iw.addDocument(new Document());
iw.close();
try (DirectoryReader r = DirectoryReader.open(d)) {
IndexSearcher s = newSearcher(r);
s.setQueryCache(null);
FixedBitSet[] sets = randBitSets(atLeast(1000), atLeast(10));
int iterations = TEST_NIGHTLY ? atLeast(10000) : atLeast(500);
doConjunctions(s, sets, iterations, atLeast(5));
doNestedConjunctions(s, sets, iterations, atLeast(3), atLeast(3));
}
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
int maxDoc = context.reader().maxDoc();
FixedBitSet bits = new FixedBitSet(maxDoc);
Random random = new Random(seed ^ context.docBase);
for(int docID=0;docID<maxDoc;docID++) {
if (random.nextFloat() <= density) {
bits.set(docID);
//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
}
}
return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(bits, bits.approximateCardinality()));
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
};
}
private static void flush(
int block, FixedBitSet buffer, int cardinality, byte denseRankPower, IndexOutput out) throws IOException {
assert block >= 0 && block < 65536;
out.writeShort((short) block);
assert cardinality > 0 && cardinality <= 65536;
out.writeShort((short) (cardinality - 1));
if (cardinality > MAX_ARRAY_LENGTH) {
if (cardinality != 65536) { // all docs are set
if (denseRankPower != -1) {
final byte[] rank = createRank(buffer, denseRankPower);
out.writeBytes(rank, rank.length);
}
for (long word : buffer.getBits()) {
out.writeLong(word);
}
}
} else {
BitSetIterator it = new BitSetIterator(buffer, cardinality);
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
out.writeShort((short) doc);
}
}
}
public static Vector weightedSuperposition(
BinaryVector v1, double weight1, BinaryVector v2, double weight2) {
BinaryVector conclusion = (BinaryVector) VectorFactory.createZeroVector(VectorType.BINARY, v1.getDimension());
FixedBitSet cVote = conclusion.bitSet;
FixedBitSet v1vote = v1.bitSet;
FixedBitSet v2vote = v2.bitSet;
Random random = new Random();
random.setSeed(Bobcat.asLong(v1.writeLongToString()));
for (int x = 0; x < v1.getDimension(); x++) {
double probability = 0;
if (v1vote.get(x)) probability += weight1 / (weight1 + weight2);
if (v2vote.get(x)) probability += weight2 / (weight1 + weight2);
if (random.nextDouble() <= probability)
cVote.set(x);
}
return conclusion;
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
SolrIndexSearcher solrIndexSearcher = (SolrIndexSearcher)searcher;
IndexReaderContext context = solrIndexSearcher.getTopReaderContext();
List<LeafReaderContext> leaves = context.leaves();
FixedBitSet[] fixedBitSets = new FixedBitSet[leaves.size()];
for(LeafReaderContext leaf : leaves) {
try {
SegmentPartitioner segmentPartitioner = new SegmentPartitioner(leaf,worker,workers, keys, solrIndexSearcher);
segmentPartitioner.run();
fixedBitSets[segmentPartitioner.context.ord] = segmentPartitioner.docs;
} catch(Exception e) {
throw new IOException(e);
}
}
ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(new BitsFilter(fixedBitSets));
return searcher.rewrite(constantScoreQuery).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
final BinaryDocValues oldDocValues = in.getBinaryDocValues(field);
if (oldDocValues == null) return null;
CachedBinaryDVs dvs;
synchronized (cachedBinaryDVs) {
dvs = cachedBinaryDVs.get(field);
if (dvs == null) {
FixedBitSet docsWithField = new FixedBitSet(maxDoc());
BytesRef[] values = new BytesRef[maxDoc()];
while (true) {
int docID = oldDocValues.nextDoc();
if (docID == NO_MORE_DOCS) {
break;
}
int newDocID = docMap.oldToNew(docID);
docsWithField.set(newDocID);
values[newDocID] = BytesRef.deepCopyOf(oldDocValues.binaryValue());
}
dvs = new CachedBinaryDVs(values, docsWithField);
cachedBinaryDVs.put(field, dvs);
}
}
return new SortingBinaryDocValues(dvs);
}
private static int dedup(int[] arr, int length, FixedBitSet acceptDocs) {
int pos = 0;
int previous = -1;
for (int i = 0; i < length; ++i) {
final int value = arr[i];
// assert value >= previous;
if (value != previous && (acceptDocs == null || acceptDocs.get(value))) {
arr[pos++] = value;
previous = value;
}
}
return pos;
}
@Override
public int intersectionSize(DocSet other) {
if (other instanceof BitDocSet) {
return (int) FixedBitSet.intersectionCount(this.bits, ((BitDocSet) other).bits);
} else {
// they had better not call us back!
return other.intersectionSize(this);
}
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
throws IOException {
set = new FixedBitSet(context.reader().maxDoc());
sets.add(set);
return this;
}
/**
* Performs superposition from a particular row by sweeping a bitset across the voting record
* such that for any column in which the incoming bitset contains a '1', 1's are changed
* to 0's until a new 1 can be added, facilitating incrementation of the
* binary number represented in this column.
*
* @param incomingBitSet the bitset to be added
* @param rowfloor the index of the place in the voting record to start the sweep at
*/
protected synchronized void superposeBitSetFromRowFloor(FixedBitSet incomingBitSet, int rowfloor) {
// Attempt to save space when minimum value across all columns > 0
// by decrementing across the board and raising the minimum where possible.
int max = getMaximumSharedWeight();
if (max > 0) {
decrement(max);
}
// Handle overflow: if any column that will be incremented
// contains all 1's, add a new row to the voting record.
tempSet.xor(tempSet);
tempSet.xor(incomingBitSet);
for (int x = rowfloor; x < votingRecord.size() && tempSet.cardinality() > 0; x++) {
tempSet.and(votingRecord.get(x));
}
if (tempSet.cardinality() > 0) {
votingRecord.add(new FixedBitSet(dimension));
}
// Sweep copy of bitset to be added across rows of voting record.
// If a new '1' is added, this position in the copy is changed to zero
// and will not affect future rows.
// The xor step will transform 1's to 0's or vice versa for
// dimension in which the temporary bitset contains a '1'.
votingRecord.get(rowfloor).xor(incomingBitSet);
tempSet.xor(tempSet);
tempSet.xor(incomingBitSet);
for (int x = rowfloor + 1; x < votingRecord.size(); x++) {
tempSet.andNot(votingRecord.get(x-1)); //if 1 already added, eliminate dimension from tempSet
votingRecord.get(x).xor(tempSet);
// votingRecord.get(x).trimTrailingZeros(); //attempt to save in sparsely populated rows
}
}
public void or(BitsFilter bitsFilter)
{
List<FixedBitSet> andSets = bitsFilter.bitSets;
for(int i=0; i<bitSets.size(); i++)
{
FixedBitSet a = bitSets.get(i);
FixedBitSet b = andSets.get(i);
a.or(b);
}
}
public static DocSet toSmallSet(BitDocSet bitSet) {
int sz = bitSet.size();
int[] docs = new int[sz];
FixedBitSet bs = bitSet.getBits();
int doc = -1;
for (int i=0; i<sz; i++) {
doc = bs.nextSetBit(doc + 1);
docs[i] = doc;
}
return new SortedIntDocSet(docs);
}
@Override
public void reset() throws IOException {
counts = null;
for (FixedBitSet bits : arr) {
if (bits == null) continue;
bits.clear(0, bits.length());
}
}
protected synchronized FixedBitSet concludeVote(long target) {
long target2 = (long) Math.ceil((double) target / (double) 2);
target2 = target2 - minimum;
// Unlikely other than in testing: minimum more than half the votes
if (target2 < 0) {
FixedBitSet ans = new FixedBitSet(dimension);
ans.set(0, dimension);
return ans;
}
boolean even = (target % 2 == 0);
FixedBitSet result = concludeVote(target2, votingRecord.size() - 1);
if (even) {
setTempSetToExactMatches(target2);
boolean switcher = true;
// 50% chance of being true with split vote.
int q = tempSet.nextSetBit(0);
while (q != DocIdSetIterator.NO_MORE_DOCS)
{
switcher = !switcher;
if (switcher) tempSet.clear(q);
if (q+1 >= tempSet.length()) q = DocIdSetIterator.NO_MORE_DOCS;
else q = tempSet.nextSetBit(q+1);
}
result.andNot(tempSet);
}
return result;
}
@SuppressWarnings({"unchecked", "rawtypes"})
private Object getShardValue(int slot) throws IOException {
if (factory != null) return getShardHLL(slot);
FixedBitSet ords = arr[slot];
int unique;
if (counts != null) {
unique = counts[slot];
} else {
unique = ords==null ? 0 : ords.cardinality();
}
SimpleOrderedMap map = new SimpleOrderedMap();
map.add("unique", unique);
map.add("nTerms", nTerms);
int maxExplicit=100;
// TODO: make configurable
// TODO: share values across buckets
if (unique > 0) {
List lst = new ArrayList( Math.min(unique, maxExplicit) );
int maxOrd = ords.length();
if (maxOrd > 0) {
for (int ord=0; lst.size() < maxExplicit;) {
ord = ords.nextSetBit(ord);
if (ord == DocIdSetIterator.NO_MORE_DOCS) break;
BytesRef val = lookupOrd(ord);
Object o = field.getType().toObject(field, val);
lst.add(o);
if (++ord >= maxOrd) break;
}
}
map.add("vals", lst);
}
return map;
}
public DocIdSet randSet(int sz) {
FixedBitSet obs = new FixedBitSet(sz);
int n = r.nextInt(sz);
for (int i=0; i<n; i++) {
obs.set(r.nextInt(sz));
}
return new BitDocIdSet(obs);
}
private MergePointVisitor(PrefixCodedTerms sortedPackedPoints, FixedBitSet result, float[] scores) throws IOException {
this.result = result;
this.scores = scores;
scratch.length = bytesPerDim;
this.iterator = sortedPackedPoints.iterator();
this.scoreIterator = aggregatedJoinScores.iterator();
nextQueryPoint = iterator.next();
if (scoreIterator.hasNext()) {
nextScore = scoreIterator.next();
}
}
public DocSet getDocSet(IndexSearcher searcher) throws IOException {
IndexReaderContext top = ReaderUtil.getTopLevelContext(searcher.getTopReaderContext());
List<LeafReaderContext> segs = top.leaves();
DocSetBuilder builder = new DocSetBuilder(top.reader().maxDoc(), Math.min(64,(top.reader().maxDoc()>>>10)+4));
PointValues[] segPoints = new PointValues[segs.size()];
for (int i=0; i<segPoints.length; i++) {
segPoints[i] = segs.get(i).reader().getPointValues(field);
}
int maxCollect = Math.min(maxDocFreq, top.reader().maxDoc());
PointSetQuery.CutoffPointVisitor visitor = new PointSetQuery.CutoffPointVisitor(maxCollect);
PrefixCodedTerms.TermIterator iterator = sortedPackedPoints.iterator();
outer: for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
visitor.setPoint(point);
for (int i=0; i<segs.size(); i++) {
if (segPoints[i] == null) continue;
visitor.setBase(segs.get(i).docBase);
segPoints[i].intersect(visitor);
if (visitor.getCount() > maxDocFreq) {
continue outer;
}
}
int collected = visitor.getCount();
int[] ids = visitor.getGlobalIds();
for (int i=0; i<collected; i++) {
builder.add( ids[i] );
}
}
FixedBitSet liveDocs = getLiveDocs(searcher);
DocSet set = builder.build(liveDocs);
return set;
}
public void testDocsWithValue() {
final BitSet parents = new FixedBitSet(20);
parents.set(0);
parents.set(5);
parents.set(6);
parents.set(10);
parents.set(15);
parents.set(19);
final BitSet children = new FixedBitSet(20);
children.set(2);
children.set(3);
children.set(4);
children.set(12);
children.set(17);
final BitSet childDocsWithValue = new FixedBitSet(20);
childDocsWithValue.set(2);
childDocsWithValue.set(3);
childDocsWithValue.set(4);
childDocsWithValue.set(8);
childDocsWithValue.set(16);
final Bits docsWithValue = BlockJoinSelector.wrap(childDocsWithValue, parents, children);
assertFalse(docsWithValue.get(0));
assertTrue(docsWithValue.get(5));
assertFalse(docsWithValue.get(6));
assertFalse(docsWithValue.get(10));
assertFalse(docsWithValue.get(15));
assertFalse(docsWithValue.get(19));
}
public void testSortedSelector() throws IOException {
final BitSet parents = new FixedBitSet(20);
parents.set(0);
parents.set(5);
parents.set(6);
parents.set(10);
parents.set(15);
parents.set(19);
final BitSet children = new FixedBitSet(20);
children.set(2);
children.set(3);
children.set(4);
children.set(12);
children.set(17);
final int[] ords = new int[20];
Arrays.fill(ords, -1);
ords[2] = 5;
ords[3] = 7;
ords[4] = 3;
ords[12] = 10;
ords[18] = 10;
final SortedDocValues mins = BlockJoinSelector.wrap(DocValues.singleton(new CannedSortedDocValues(ords)), BlockJoinSelector.Type.MIN, parents, toIter(children));
assertEquals(5, nextDoc(mins,5));
assertEquals(3, mins.ordValue());
assertEquals(15, nextDoc(mins,15));
assertEquals(10, mins.ordValue());
assertNoMoreDoc(mins, 20);
final SortedDocValues maxs = BlockJoinSelector.wrap(DocValues.singleton(new CannedSortedDocValues(ords)), BlockJoinSelector.Type.MAX, parents, toIter(children));
assertEquals(5, nextDoc(maxs,5));
assertEquals(7, maxs.ordValue());
assertEquals(15, nextDoc(maxs,15));
assertEquals(10, maxs.ordValue());
assertNoMoreDoc( maxs,20);
}
@Override
public int unionSize(DocSet other) {
if (other instanceof BitDocSet) {
// if we don't know our current size, this is faster than
// size + other.size - intersection_size
return (int) FixedBitSet.unionCount(this.bits, ((BitDocSet) other).bits);
} else {
// they had better not call us back!
return other.unionSize(this);
}
}