下面列出了org.apache.lucene.index.PostingsEnum#endOffset ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
for (int j = 0; j < termFreq; j++) {
int nextPos = posEnum.nextPosition();
if (curTerms.hasPositions()) {
currentPositions[j] = nextPos;
}
if (curTerms.hasOffsets()) {
currentStartOffset[j] = posEnum.startOffset();
currentEndOffset[j] = posEnum.endOffset();
}
if (curTerms.hasPayloads()) {
BytesRef curPayload = posEnum.getPayload();
if (curPayload != null) {
currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length);
} else {
currentPayloads[j] = null;
}
}
}
}
static TermPosting of(int position, PostingsEnum penum) throws IOException {
TermPosting posting = new TermPosting();
// set position
posting.position = position;
// set offset (if available)
int sOffset = penum.startOffset();
int eOffset = penum.endOffset();
if (sOffset >= 0 && eOffset >= 0) {
posting.startOffset = sOffset;
posting.endOffset = eOffset;
}
// set payload (if available)
if (penum.getPayload() != null) {
posting.payload = BytesRef.deepCopyOf(penum.getPayload());
}
return posting;
}
/**
* Returns a new position entry representing the specified posting, and optionally, start and end offsets.
* @param pos - term position
* @param pe - positioned postings iterator
* @return position entry
* @throws IOException - if there is a low level IO error.
*/
static TermVectorPosition of(int pos, PostingsEnum pe) throws IOException {
Objects.requireNonNull(pe);
int sOffset = pe.startOffset();
int eOffset = pe.endOffset();
if (sOffset >= 0 && eOffset >= 0) {
return new TermVectorPosition(pos, sOffset, eOffset);
}
return new TermVectorPosition(pos);
}
private List<MWESentenceContext> collectTermOffsets(Terms termVectorLookup) throws IOException {
List<MWESentenceContext> result = new ArrayList<>();
TermsEnum tiRef= termVectorLookup.iterator();
BytesRef luceneTerm = tiRef.next();
while (luceneTerm != null) {
if (luceneTerm.length == 0) {
luceneTerm = tiRef.next();
continue;
}
String tString = luceneTerm.utf8ToString();
if(!allCandidates.contains(tString)) {
luceneTerm=tiRef.next();
continue;
}
PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL);
//PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS);
int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV
if (doc != PostingsEnum.NO_MORE_DOCS) {
int totalOccurrence = postingsEnum.freq();
for (int i = 0; i < totalOccurrence; i++) {
postingsEnum.nextPosition();
int start = postingsEnum.startOffset();
int end = postingsEnum.endOffset();
BytesRef payload=postingsEnum.getPayload();
int sentenceId=-1;
if(payload!=null){
sentenceId=new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())).getSentenceId();
}
result.add(new MWESentenceContext(tString,sentenceId, start, end));
}
}
luceneTerm = tiRef.next();
}
Collections.sort(result);
return result;
}
public void map(Terms terms,Spans spans) throws IOException {
int primStart = spanStart - primaryWS;
int primEnd = spanEnd + primaryWS;
// stores the start and end of the adjacent previous and following
int adjLBStart = primStart - adjWS;
int adjLBEnd = primStart - 1;//don't overlap
int adjUBStart = primEnd + 1;//don't overlap
int adjUBEnd = primEnd + adjWS;
//stores the start and end of the secondary previous and the secondary following
int secLBStart = adjLBStart - secWS;
int secLBEnd = adjLBStart - 1; //don't overlap the adjacent window
int secUBStart = adjUBEnd + 1;
int secUBEnd = adjUBEnd + secWS;
WindowTerm lastWT = null;
if(terms!=null)
{}
TermsEnum termsEnum = terms.iterator();
BytesRef termref = null;
String term=null;
while ((termref = termsEnum.next()) != null) {
term=termsEnum.term().utf8ToString();
PostingsEnum postings = termsEnum.postings(null, PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS);
postings.nextDoc();
if (term.startsWith(NameFilter.NE_PREFIX) == false && term.startsWith(PassageRankingComponent.NE_PREFIX_LOWER) == false) {//filter out the types, as we don't need them here
//construct the windows, which means we need a bunch of
//bracketing variables to know what window we are in
//start and end of the primary window
//unfortunately, we still have to loop over the positions
//we'll make this inclusive of the boundaries, do an upfront check here so
//we can skip over anything that is outside of all windows
//int position=spans.nextStartPosition();
int position=postings.nextPosition();
if (position >= secLBStart && position <= secUBEnd) {
//fill in the windows
WindowTerm wt;
//offsets aren't required, but they are nice to have
if (postings != null){
//log.warn("terms if postings!=null {}",term);
wt = new WindowTerm(term, position, postings.startOffset(), postings.endOffset());
} else {
wt = new WindowTerm(term, position);
//log.warn("terms if postings==null {}",term);
}
if (position >= primStart && position <= primEnd) {//are we in the primary window
passage.terms.add(wt);
//we are only going to keep bigrams for the primary window. You could do it for the other windows, too
if (lastWT != null) {
WindowTerm bigramWT = new WindowTerm(lastWT.term + "," + term, lastWT.position);//we don't care about offsets for bigrams
passage.bigrams.add(bigramWT);
}
lastWT = wt;
} else if (position >= secLBStart && position <= secLBEnd) {
//are we in the secondary previous window?
passage.secPrevTerms.add(wt);
} else if (position >= secUBStart && position <= secUBEnd) {//are we in the secondary following window?
passage.secFollowTerms.add(wt);
} else if (position >= adjLBStart && position <= adjLBEnd) {//are we in the adjacent previous window?
passage.prevTerms.add(wt);
} else if (position >= adjUBStart && position <= adjUBEnd) {//are we in the adjacent following window?
passage.followTerms.add(wt);
}
}
//}
}}
}
private IntervalMatchesIterator matches(TermsEnum te, int doc) throws IOException {
PostingsEnum pe = te.postings(null, PostingsEnum.ALL);
if (pe.advance(doc) != doc) {
return null;
}
return new IntervalMatchesIterator() {
@Override
public int gaps() {
return 0;
}
@Override
public int width() {
return 1;
}
int upto = pe.freq();
int pos = -1;
@Override
public boolean next() throws IOException {
do {
if (upto <= 0) {
pos = IntervalIterator.NO_MORE_INTERVALS;
return false;
}
upto--;
pos = pe.nextPosition();
}
while (filter.test(pe.getPayload()) == false);
return true;
}
@Override
public int startPosition() {
return pos;
}
@Override
public int endPosition() {
return pos;
}
@Override
public int startOffset() throws IOException {
return pe.startOffset();
}
@Override
public int endOffset() throws IOException {
return pe.endOffset();
}
@Override
public MatchesIterator getSubMatches() {
return null;
}
@Override
public Query getQuery() {
throw new UnsupportedOperationException();
}
};
}
static IntervalMatchesIterator matches(TermsEnum te, int doc, String field) throws IOException {
TermQuery query = new TermQuery(new Term(field, te.term()));
PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS);
if (pe.advance(doc) != doc) {
return null;
}
return new IntervalMatchesIterator() {
@Override
public int gaps() {
return 0;
}
@Override
public int width() {
return 1;
}
int upto = pe.freq();
int pos = -1;
@Override
public boolean next() throws IOException {
if (upto <= 0) {
pos = IntervalIterator.NO_MORE_INTERVALS;
return false;
}
upto--;
pos = pe.nextPosition();
return true;
}
@Override
public int startPosition() {
return pos;
}
@Override
public int endPosition() {
return pos;
}
@Override
public int startOffset() throws IOException {
return pe.startOffset();
}
@Override
public int endOffset() throws IOException {
return pe.endOffset();
}
@Override
public MatchesIterator getSubMatches() {
return null;
}
@Override
public Query getQuery() {
return query;
}
};
}
public TermMatch(PostingsEnum pe, int position) throws IOException {
this.position = position;
this.startOffset = pe.startOffset();
this.endOffset = pe.endOffset();
}
private List<MWEInSentence> collectTermSentenceContext(Terms termVectorLookup,
Map<Integer, Integer> sentenceBoundaries) throws IOException {
List<MWEInSentence> result = new ArrayList<>();
TermsEnum tiRef = termVectorLookup.iterator();
BytesRef luceneTerm = tiRef.next();
while (luceneTerm != null) {
if (luceneTerm.length == 0) {
luceneTerm = tiRef.next();
continue;
}
String tString = luceneTerm.utf8ToString();
if (!allCandidates.contains(tString)) {
luceneTerm = tiRef.next();
continue;
}
PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL);
//PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS);
int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV
if (doc != PostingsEnum.NO_MORE_DOCS) {
int totalOccurrence = postingsEnum.freq();
for (int i = 0; i < totalOccurrence; i++) {
postingsEnum.nextPosition();
int start = postingsEnum.startOffset();
int end = postingsEnum.endOffset();
BytesRef payload = postingsEnum.getPayload();
SentenceContext sentenceContextInfo = null;
if (payload != null) {
sentenceContextInfo = new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString()));
}
if (sentenceContextInfo == null)
result.add(new MWEInSentence(tString, start, end, 0, 0, 0));
else {
result.add(new MWEInSentence(tString, start, end,
sentenceContextInfo.getFirstTokenIdx(),
sentenceContextInfo.getLastTokenIdx(),
sentenceContextInfo.getSentenceId()));
Integer endBound = sentenceBoundaries.get(sentenceContextInfo.getSentenceId());
if (endBound == null || endBound < sentenceContextInfo.getLastTokenIdx())
sentenceBoundaries.put(sentenceContextInfo.getSentenceId(),
sentenceContextInfo.getLastTokenIdx());
}
}
}
luceneTerm = tiRef.next();
}
Collections.sort(result);
return result;
}