下面列出了org.apache.lucene.index.PostingsEnum#OFFSETS 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public static PostingsEnum docs(Random random, TermsEnum termsEnum, PostingsEnum reuse, int flags) throws IOException {
// TODO: simplify this method? it would be easier to randomly either use the flags passed, or do the random selection,
// FREQS should be part fo the random selection instead of outside on its own?
if (random.nextBoolean()) {
if (random.nextBoolean()) {
final int posFlags;
switch (random.nextInt(4)) {
case 0: posFlags = PostingsEnum.POSITIONS; break;
case 1: posFlags = PostingsEnum.OFFSETS; break;
case 2: posFlags = PostingsEnum.PAYLOADS; break;
default: posFlags = PostingsEnum.ALL; break;
}
return termsEnum.postings(null, posFlags);
}
flags |= PostingsEnum.FREQS;
}
return termsEnum.postings(reuse, flags);
}
private int convertToLuceneFlags(int flags) {
int lucenePositionsFlags = PostingsEnum.NONE;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_FREQUENCIES) > 0 ? PostingsEnum.FREQS : 0x0;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_POSITIONS) > 0 ? PostingsEnum.POSITIONS : 0x0;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_PAYLOADS) > 0 ? PostingsEnum.PAYLOADS : 0x0;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_OFFSETS) > 0 ? PostingsEnum.OFFSETS : 0x0;
return lucenePositionsFlags;
}
@Override
public Terms terms(String field) throws IOException {
// ensure the underlying PostingsEnum returns offsets. It's sad we have to do this to use the SpanCollector.
return new FilterTerms(super.terms(fieldName)) {
@Override
public TermsEnum iterator() throws IOException {
return new FilterTermsEnum(in.iterator()) {
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
return super.postings(reuse, flags | PostingsEnum.OFFSETS);
}
};
}
};
}
/**
* Sets the current field for writing, and returns the
* fixed length of long[] metadata (which is fixed per
* field), called when the writing switches to another field. */
@Override
public void setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
indexOptions = fieldInfo.getIndexOptions();
writeFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
writePositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
writePayloads = fieldInfo.hasPayloads();
if (writeFreqs == false) {
enumFlags = 0;
} else if (writePositions == false) {
enumFlags = PostingsEnum.FREQS;
} else if (writeOffsets == false) {
if (writePayloads) {
enumFlags = PostingsEnum.PAYLOADS;
} else {
enumFlags = PostingsEnum.POSITIONS;
}
} else {
if (writePayloads) {
enumFlags = PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS;
} else {
enumFlags = PostingsEnum.OFFSETS;
}
}
}
@Override
public int getRequiredPostings() {
return PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS;
}
private void mapOneVector(NamedList<Object> docNL, FieldOptions fieldOptions, IndexReader reader, int docID, TermsEnum termsEnum, String field) throws IOException {
NamedList<Object> fieldNL = new NamedList<>();
docNL.add(field, fieldNL);
BytesRef text;
PostingsEnum dpEnum = null;
while((text = termsEnum.next()) != null) {
String term = text.utf8ToString();
NamedList<Object> termInfo = new NamedList<>();
fieldNL.add(term, termInfo);
final int freq = (int) termsEnum.totalTermFreq();
if (fieldOptions.termFreq == true) {
termInfo.add("tf", freq);
}
int dpEnumFlags = 0;
dpEnumFlags |= fieldOptions.positions ? PostingsEnum.POSITIONS : 0;
//payloads require offsets
dpEnumFlags |= (fieldOptions.offsets || fieldOptions.payloads) ? PostingsEnum.OFFSETS : 0;
dpEnumFlags |= fieldOptions.payloads ? PostingsEnum.PAYLOADS : 0;
dpEnum = termsEnum.postings(dpEnum, dpEnumFlags);
boolean atNextDoc = false;
if (dpEnum != null) {
dpEnum.nextDoc();
atNextDoc = true;
}
if (atNextDoc && dpEnumFlags != 0) {
NamedList<Integer> positionsNL = null;
NamedList<Number> theOffsets = null;
NamedList<String> thePayloads = null;
for (int i = 0; i < freq; i++) {
final int pos = dpEnum.nextPosition();
if (fieldOptions.positions && pos >= 0) {
if (positionsNL == null) {
positionsNL = new NamedList<>();
termInfo.add("positions", positionsNL);
}
positionsNL.add("position", pos);
}
int startOffset = fieldOptions.offsets ? dpEnum.startOffset() : -1;
if (startOffset >= 0) {
if (theOffsets == null) {
theOffsets = new NamedList<>();
termInfo.add("offsets", theOffsets);
}
theOffsets.add("start", dpEnum.startOffset());
theOffsets.add("end", dpEnum.endOffset());
}
BytesRef payload = fieldOptions.payloads ? dpEnum.getPayload() : null;
if (payload != null) {
if (thePayloads == null) {
thePayloads = new NamedList<>();
termInfo.add("payloads", thePayloads);
}
thePayloads.add("payload", Base64.byteArrayToBase64(payload.bytes, payload.offset, payload.length));
}
}
}
int df = 0;
if (fieldOptions.docFreq || fieldOptions.tfIdf) {
df = reader.docFreq(new Term(field, text));
}
if (fieldOptions.docFreq) {
termInfo.add("df", df);
}
// TODO: this is not TF/IDF by anyone's definition!
if (fieldOptions.tfIdf) {
double tfIdfVal = ((double) freq) / df;
termInfo.add("tf-idf", tfIdfVal);
}
}
}
@Override
public int getRequiredPostings() {
return PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS;
}