下面列出了org.apache.lucene.index.PostingsEnum#getPayload ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
for (int j = 0; j < termFreq; j++) {
int nextPos = posEnum.nextPosition();
if (curTerms.hasPositions()) {
currentPositions[j] = nextPos;
}
if (curTerms.hasOffsets()) {
currentStartOffset[j] = posEnum.startOffset();
currentEndOffset[j] = posEnum.endOffset();
}
if (curTerms.hasPayloads()) {
BytesRef curPayload = posEnum.getPayload();
if (curPayload != null) {
currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length);
} else {
currentPayloads[j] = null;
}
}
}
}
static TermPosting of(int position, PostingsEnum penum) throws IOException {
TermPosting posting = new TermPosting();
// set position
posting.position = position;
// set offset (if available)
int sOffset = penum.startOffset();
int eOffset = penum.endOffset();
if (sOffset >= 0 && eOffset >= 0) {
posting.startOffset = sOffset;
posting.endOffset = eOffset;
}
// set payload (if available)
if (penum.getPayload() != null) {
posting.payload = BytesRef.deepCopyOf(penum.getPayload());
}
return posting;
}
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
if (!matches)
return;
if (upto >= payloadToMatch.size()) {
matches = false;
return;
}
BytesRef payload = postings.getPayload();
if (payloadToMatch.get(upto) == null) {
matches = payload == null;
upto++;
return;
}
if (payload == null) {
matches = false;
upto++;
return;
}
matches = payloadToMatch.get(upto).bytesEquals(payload);
upto++;
}
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
if (!matches)
return;
if (upto >= payloadToMatch.size()) {
matches = false;
return;
}
BytesRef payload = postings.getPayload();
if (payloadToMatch.get(upto) == null) {
matches = payload == null;
upto++;
return;
}
if (payload == null) {
matches = false;
upto++;
return;
}
matches = payloadToMatch.get(upto).bytesEquals(payload);
upto++;
}
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
BytesRef payload = postings.getPayload();
float payloadFactor = decoder.computePayloadFactor(payload);
payloadScore = function.currentScore(docID(), getField(), in.startPosition(), in.endPosition(),
payloadsSeen, payloadScore, payloadFactor);
payloadsSeen++;
}
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
BytesRef payload = postings.getPayload();
if (payload == null)
return;
final byte[] bytes = new byte[payload.length];
System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length);
payloads.add(bytes);
}
private List<MWESentenceContext> collectTermOffsets(Terms termVectorLookup) throws IOException {
List<MWESentenceContext> result = new ArrayList<>();
TermsEnum tiRef= termVectorLookup.iterator();
BytesRef luceneTerm = tiRef.next();
while (luceneTerm != null) {
if (luceneTerm.length == 0) {
luceneTerm = tiRef.next();
continue;
}
String tString = luceneTerm.utf8ToString();
if(!allCandidates.contains(tString)) {
luceneTerm=tiRef.next();
continue;
}
PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL);
//PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS);
int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV
if (doc != PostingsEnum.NO_MORE_DOCS) {
int totalOccurrence = postingsEnum.freq();
for (int i = 0; i < totalOccurrence; i++) {
postingsEnum.nextPosition();
int start = postingsEnum.startOffset();
int end = postingsEnum.endOffset();
BytesRef payload=postingsEnum.getPayload();
int sentenceId=-1;
if(payload!=null){
sentenceId=new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())).getSentenceId();
}
result.add(new MWESentenceContext(tString,sentenceId, start, end));
}
}
luceneTerm = tiRef.next();
}
Collections.sort(result);
return result;
}
@Override
protected CustomScoreProvider getCustomScoreProvider(LeafReaderContext context) throws IOException {
return new CustomScoreProvider(context){
@Override
public float customScore(int docID, float subQueryScore, float valSrcScore) throws IOException {
float score = 0;
double docVectorNorm = 0;
LeafReader reader = context.reader();
Terms terms = reader.getTermVector(docID, field);
if(vector.size() != terms.size()){
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "indexed and input vector array must have same length");
}
TermsEnum iter = terms.iterator();
BytesRef text;
while ((text = iter.next()) != null) {
String term = text.utf8ToString();
float payloadValue = 0f;
PostingsEnum postings = iter.postings(null, PostingsEnum.ALL);
while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
int freq = postings.freq();
while (freq-- > 0) postings.nextPosition();
BytesRef payload = postings.getPayload();
payloadValue = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
if (cosine)
docVectorNorm += Math.pow(payloadValue, 2.0);
}
score = (float)(score + payloadValue * (vector.get(Integer.parseInt(term))));
}
if (cosine) {
if ((docVectorNorm == 0) || (queryVectorNorm == 0)) return 0f;
return (float)(score / (Math.sqrt(docVectorNorm) * Math.sqrt(queryVectorNorm)));
}
return score;
}
};
}
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
if (postings.getPayload() != null) {
payloads.add(BytesRef.deepCopyOf(postings.getPayload()));
}
}
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
if (postings.getPayload() != null)
payloads.add(BytesRef.deepCopyOf(postings.getPayload()));
}
private void mapOneVector(NamedList<Object> docNL, FieldOptions fieldOptions, IndexReader reader, int docID, TermsEnum termsEnum, String field) throws IOException {
NamedList<Object> fieldNL = new NamedList<>();
docNL.add(field, fieldNL);
BytesRef text;
PostingsEnum dpEnum = null;
while((text = termsEnum.next()) != null) {
String term = text.utf8ToString();
NamedList<Object> termInfo = new NamedList<>();
fieldNL.add(term, termInfo);
final int freq = (int) termsEnum.totalTermFreq();
if (fieldOptions.termFreq == true) {
termInfo.add("tf", freq);
}
int dpEnumFlags = 0;
dpEnumFlags |= fieldOptions.positions ? PostingsEnum.POSITIONS : 0;
//payloads require offsets
dpEnumFlags |= (fieldOptions.offsets || fieldOptions.payloads) ? PostingsEnum.OFFSETS : 0;
dpEnumFlags |= fieldOptions.payloads ? PostingsEnum.PAYLOADS : 0;
dpEnum = termsEnum.postings(dpEnum, dpEnumFlags);
boolean atNextDoc = false;
if (dpEnum != null) {
dpEnum.nextDoc();
atNextDoc = true;
}
if (atNextDoc && dpEnumFlags != 0) {
NamedList<Integer> positionsNL = null;
NamedList<Number> theOffsets = null;
NamedList<String> thePayloads = null;
for (int i = 0; i < freq; i++) {
final int pos = dpEnum.nextPosition();
if (fieldOptions.positions && pos >= 0) {
if (positionsNL == null) {
positionsNL = new NamedList<>();
termInfo.add("positions", positionsNL);
}
positionsNL.add("position", pos);
}
int startOffset = fieldOptions.offsets ? dpEnum.startOffset() : -1;
if (startOffset >= 0) {
if (theOffsets == null) {
theOffsets = new NamedList<>();
termInfo.add("offsets", theOffsets);
}
theOffsets.add("start", dpEnum.startOffset());
theOffsets.add("end", dpEnum.endOffset());
}
BytesRef payload = fieldOptions.payloads ? dpEnum.getPayload() : null;
if (payload != null) {
if (thePayloads == null) {
thePayloads = new NamedList<>();
termInfo.add("payloads", thePayloads);
}
thePayloads.add("payload", Base64.byteArrayToBase64(payload.bytes, payload.offset, payload.length));
}
}
}
int df = 0;
if (fieldOptions.docFreq || fieldOptions.tfIdf) {
df = reader.docFreq(new Term(field, text));
}
if (fieldOptions.docFreq) {
termInfo.add("df", df);
}
// TODO: this is not TF/IDF by anyone's definition!
if (fieldOptions.tfIdf) {
double tfIdfVal = ((double) freq) / df;
termInfo.add("tf-idf", tfIdfVal);
}
}
}
private List<MWEInSentence> collectTermSentenceContext(Terms termVectorLookup,
Map<Integer, Integer> sentenceBoundaries) throws IOException {
List<MWEInSentence> result = new ArrayList<>();
TermsEnum tiRef = termVectorLookup.iterator();
BytesRef luceneTerm = tiRef.next();
while (luceneTerm != null) {
if (luceneTerm.length == 0) {
luceneTerm = tiRef.next();
continue;
}
String tString = luceneTerm.utf8ToString();
if (!allCandidates.contains(tString)) {
luceneTerm = tiRef.next();
continue;
}
PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL);
//PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS);
int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV
if (doc != PostingsEnum.NO_MORE_DOCS) {
int totalOccurrence = postingsEnum.freq();
for (int i = 0; i < totalOccurrence; i++) {
postingsEnum.nextPosition();
int start = postingsEnum.startOffset();
int end = postingsEnum.endOffset();
BytesRef payload = postingsEnum.getPayload();
SentenceContext sentenceContextInfo = null;
if (payload != null) {
sentenceContextInfo = new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString()));
}
if (sentenceContextInfo == null)
result.add(new MWEInSentence(tString, start, end, 0, 0, 0));
else {
result.add(new MWEInSentence(tString, start, end,
sentenceContextInfo.getFirstTokenIdx(),
sentenceContextInfo.getLastTokenIdx(),
sentenceContextInfo.getSentenceId()));
Integer endBound = sentenceBoundaries.get(sentenceContextInfo.getSentenceId());
if (endBound == null || endBound < sentenceContextInfo.getLastTokenIdx())
sentenceBoundaries.put(sentenceContextInfo.getSentenceId(),
sentenceContextInfo.getLastTokenIdx());
}
}
}
luceneTerm = tiRef.next();
}
Collections.sort(result);
return result;
}