下面列出了怎么用org.apache.lucene.index.DocsEnum的API类实例代码及写法,或者点击链接到github查看源代码。
@Override
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) {
DocsEnum docs = null;
Term term = new Term(fieldName, word);
try {
int baseDocId;
for (int i = 0; i < reader.length; i++) {
docs = reader[i].termDocsEnum(term);
baseDocId = contexts[i].docBase;
if (docs != null) {
while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
documents.add(baseDocId + docs.docID());
}
}
}
} catch (IOException e) {
LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
}
}
@Override
public void getDocumentsWithWord(String word, IntArrayList documents) {
DocsEnum docs = null;
Term term = new Term(fieldName, word);
try {
int baseDocId;
for (int i = 0; i < reader.length; i++) {
docs = reader[i].termDocsEnum(term);
baseDocId = contexts[i].docBase;
if (docs != null) {
while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
documents.add(docs.docID() + baseDocId);
}
}
}
} catch (IOException e) {
LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
}
}
@Test
public void testTermDocIterable() throws IOException {
for (int pass = 0; pass < 1; pass++) {
for (int id = 0; id < BLOCKS; id++) {
DocsEnum termDocs = reader.termDocsEnum(new Term("id", Integer.toString(id)));
TermDocIterable iterable = new TermDocIterable(termDocs, reader);
int count = 0;
int i = 0;
long s = System.nanoTime();
for (Document document : iterable) {
count++;
assertEquals(i, Integer.parseInt(document.get("field")));
i++;
}
long time = System.nanoTime() - s;
System.out.println(time / 1000000.0 + " " + id + " " + pass);
assertEquals(COUNT_PER_BLOCK, count);
}
}
}
private void applyDeletes(Directory directory, IndexWriter indexWriter, IndexSearcherCloseable searcher,
String shard, boolean emitDeletes, Configuration configuration) throws IOException {
DirectoryReader newReader = DirectoryReader.open(directory);
try {
List<AtomicReaderContext> newLeaves = newReader.getContext().leaves();
BlurPartitioner blurPartitioner = new BlurPartitioner();
Text key = new Text();
int numberOfShards = _shardContext.getTableContext().getDescriptor().getShardCount();
int shardId = ShardUtil.getShardIndex(shard);
Action action = new Action() {
@Override
public void found(AtomicReader reader, Bits liveDocs, TermsEnum termsEnum) throws IOException {
DocsEnum docsEnum = termsEnum.docs(liveDocs, null);
if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
indexWriter.deleteDocuments(new Term(BlurConstants.ROW_ID, BytesRef.deepCopyOf(termsEnum.term())));
}
}
};
LOG.info("Applying deletes for table [{0}] shard [{1}] new reader [{2}]", _table, shard, newReader);
boolean skipCheckRowIds = isInternal(newReader);
LOG.info("Skip rowid check [{0}] for table [{1}] shard [{2}] new reader [{3}]", skipCheckRowIds, _table, shard,
newReader);
for (AtomicReaderContext context : newLeaves) {
AtomicReader newAtomicReader = context.reader();
if (isFastRowIdDeleteSupported(newAtomicReader)) {
runNewRowIdCheckAndDelete(indexWriter, emitDeletes, blurPartitioner, key, numberOfShards, shardId,
newAtomicReader, skipCheckRowIds);
} else {
runOldMergeSortRowIdCheckAndDelete(emitDeletes, searcher.getIndexReader(), blurPartitioner, key,
numberOfShards, shardId, action, newAtomicReader);
}
}
} finally {
newReader.close();
}
}
private static void applyFamily(OpenBitSet bits, String family, AtomicReader atomicReader, int primeDocRowId,
int numberOfDocsInRow, Bits liveDocs) throws IOException {
Fields fields = atomicReader.fields();
Terms terms = fields.terms(BlurConstants.FAMILY);
TermsEnum iterator = terms.iterator(null);
BytesRef text = new BytesRef(family);
int lastDocId = primeDocRowId + numberOfDocsInRow;
if (iterator.seekExact(text, true)) {
DocsEnum docs = iterator.docs(liveDocs, null, DocsEnum.FLAG_NONE);
int doc = primeDocRowId;
while ((doc = docs.advance(doc)) < lastDocId) {
bits.set(doc - primeDocRowId);
}
}
}
public TermDocIterable(DocsEnum docsEnum, AtomicReader reader, ResetableDocumentStoredFieldVisitor fieldSelector) {
if (docsEnum == null) {
throw new NullPointerException("docsEnum can not be null.");
}
this.docsEnum = docsEnum;
this.reader = reader;
this.fieldSelector = fieldSelector;
}
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
assert !eof;
//if (DEBUG) {
//System.out.println("BTTR.docs seg=" + segment);
//}
currentFrame.decodeMetaData();
//if (DEBUG) {
//System.out.println(" state=" + currentFrame.state);
//}
return postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse, flags);
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
AtomicReader reader = context.reader();
List<DocIdSet> list = new ArrayList<DocIdSet>();
Fields fields = reader.fields();
Terms terms = fields.terms(_fieldName);
if (terms == null) {
// if field is not present then show nothing.
return DocIdSet.EMPTY_DOCIDSET;
}
TermsEnum iterator = terms.iterator(null);
BytesRef bytesRef;
DocumentVisibilityEvaluator visibilityEvaluator = new DocumentVisibilityEvaluator(_authorizations);
while ((bytesRef = iterator.next()) != null) {
if (isVisible(visibilityEvaluator, bytesRef)) {
DocIdSet docIdSet = _filterCacheStrategy.getDocIdSet(_fieldName, bytesRef, reader);
if (docIdSet != null) {
list.add(docIdSet);
} else {
// Do not use acceptDocs because we want the acl cache to be version
// agnostic.
DocsEnum docsEnum = iterator.docs(null, null);
list.add(buildCache(reader, docsEnum, bytesRef));
}
}
}
return getLogicalOr(list);
}
private boolean checkDocs() throws IOException {
DocsEnum maskDocsEnum = _maskTermsEnum.docs(null, null, DocsEnum.FLAG_NONE);
DocsEnum docsEnum = in.docs(null, null, DocsEnum.FLAG_NONE);
int docId;
while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (maskDocsEnum.advance(docId) != docId) {
return true;
}
}
return false;
}
private boolean hasAccess(BytesRef term) throws IOException {
DocsEnum docsEnum = in.docs(null, null, DocsEnum.FLAG_NONE);
int docId;
while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (_accessControlReader.hasAccess(ReadType.TERMS_ENUM, docId)) {
return true;
}
}
return false;
}
public synchronized void searchWithPosition(int queryTermsSeen) {
if (null != this.reader) {
if (null != this.reader.getContext()) {
if (null != this.reader.getContext().leaves()) {
Term term = new Term("tokens", this.searchTerm);
for (AtomicReaderContext ctx : this.reader.getContext()
.leaves()) {
int base = ctx.docBase;
// SpanTermQuery spanQ = new SpanTermQuery(term);
try {
DocsAndPositionsEnum docEnum = MultiFields
.getTermPositionsEnum(ctx.reader(),
MultiFields.getLiveDocs(ctx
.reader()), "tokens", term
.bytes());
if (null != docEnum) {
int doc = DocsEnum.NO_MORE_DOCS;
while ((doc = docEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
long docId = doc + base;
CandidateSimInfo simInfo = null;
if (this.simMap.containsKey(docId)) {
simInfo = this.simMap.get(docId);
simInfo.similarity = simInfo.similarity
+ Math.min(freqTerm,
docEnum.freq());
} else {
if (earlierDocs.contains(docId))
continue;
Document d = SearchManager.searcher
.get(shard).getDocument(docId);
long candidateId = Long.parseLong(d
.get("id"));
// Get rid of these early -- we're only
// looking for candidates
// whose ids are smaller than the query
if (candidateId >= this.queryId) {
// System.out.println("Query " +
// this.queryId +
// ", getting rid of " +
// candidateId);
earlierDocs.add(docId);
continue; // we reject the candidate
}
simInfo = new CandidateSimInfo();
simInfo.doc = d;
simInfo.candidateSize = Integer
.parseInt(d.get("size"));
simInfo.similarity = Math.min(freqTerm,
docEnum.freq());
// System.out.println("before putting in simmap "+
// Util.debug_thread());
this.simMap.put(docId, simInfo);
// System.out.println("after putting in simmap "+
// Util.debug_thread());
}
simInfo.queryMatchPosition = queryTermsSeen;
int candidatePos = docEnum.nextPosition();
simInfo.candidateMatchPosition = candidatePos
+ docEnum.freq();
if (!Util.isSatisfyPosFilter(
this.simMap.get(docId).similarity,
this.querySize, queryTermsSeen,
simInfo.candidateSize,
simInfo.candidateMatchPosition,
this.computedThreshold)) {
// System.out.println("before removing in simmap "+
// Util.debug_thread());
this.simMap.remove(docId);
// System.out.println("after removing in simmap "+
// Util.debug_thread());
}
}
} else {
logger.trace("docEnum is null, " + base
+ ", term: " + this.searchTerm
+ Util.debug_thread());
}
} catch (Exception e) {
e.printStackTrace();
logger.error("exception caught " + e.getMessage()
+ Util.debug_thread() + " search term:"
+ this.searchTerm);
}
}
} else {
logger.debug("leaves are null, " + this.searchTerm
+ Util.debug_thread());
}
} else {
logger.debug("getContext is null, " + this.searchTerm
+ Util.debug_thread());
}
} else {
logger.debug("this.reader is null, " + this.searchTerm
+ Util.debug_thread());
}
}
protected void requestDocumentsWithWord(String word, IntObjectOpenHashMap<IntArrayList[]> positionsInDocs,
IntIntOpenHashMap docLengths, int wordId, int numberOfWords) {
DocsAndPositionsEnum docPosEnum = null;
Term term = new Term(fieldName, word);
int localDocId,
globalDocId,
baseDocId;
IntArrayList positions[];
try {
for (int i = 0; i < reader.length; i++) {
docPosEnum = reader[i].termPositionsEnum(term);
baseDocId = contexts[i].docBase;
if (docPosEnum != null) {
while (docPosEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
localDocId = docPosEnum.docID();
globalDocId = localDocId + baseDocId;
// if this is the first word and we found a new document
if (!positionsInDocs.containsKey(globalDocId)) {
positions = new IntArrayList[numberOfWords];
positionsInDocs.put(globalDocId, positions);
} else {
positions = positionsInDocs.get(globalDocId);
}
if (positions[wordId] == null) {
positions[wordId] = new IntArrayList();
}
// Go through the positions inside this document
for (int p = 0; p < docPosEnum.freq(); ++p) {
positions[wordId].add(docPosEnum.nextPosition());
}
if (!docLengths.containsKey(globalDocId)) {
// Get the length of the document
docLengths.put(globalDocId, reader[i].document(localDocId).getField(docLengthFieldName)
.numericValue().intValue());
}
}
}
}
} catch (IOException e) {
LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
}
}
public TermDocIterable(DocsEnum docsEnum, AtomicReader reader) {
this(docsEnum, reader, new ResetableDocumentStoredFieldVisitor());
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
checkRunningState();
return _termsEnum.docs(liveDocs, reuse, flags);
}
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
currentFrame.decodeMetaData();
return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
Bits secureLiveDocs = getSecureLiveDocs(liveDocs, _maxDoc, _accessControlReader);
return in.docs(secureLiveDocs, reuse, flags);
}