下面列出了org.apache.lucene.index.Term#bytes ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
if (!fieldMatcher.test(term.field())) {
return;
}
SpanCollectedOffsetsEnum offsetsEnum = termToOffsetsEnums.get(term.bytes());
if (offsetsEnum == null) {
// If it's pos insensitive we handle it outside of PhraseHelper. term.field() is from the Query.
if (positionInsensitiveTerms.contains(term.bytes())) {
return;
}
offsetsEnum = new SpanCollectedOffsetsEnum(term.bytes(), postings.freq());
termToOffsetsEnums.put(term.bytes(), offsetsEnum);
}
offsetsEnum.add(postings.startOffset(), postings.endOffset());
}
/** SpanNearQuery([A, B], 0, true) = "A B" */
public void testSpanNearVersusPhrase() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] {
spanQuery(new SpanTermQuery(t1)),
spanQuery(new SpanTermQuery(t2))
};
SpanQuery q1 = spanQuery(new SpanNearQuery(subquery, 0, true));
PhraseQuery q2 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
if (t1.equals(t2)) {
assertSameSet(q1, q2);
} else {
assertSameScores(q1, q2);
}
}
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Terms terms = MultiTerms.getTerms(r, t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator();
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
PostingsEnum docs = termsEnum.postings(null, PostingsEnum.NONE);
docs = BitsFilteredPostingsEnum.wrap(docs, MultiBits.getLiveDocs(r));
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
/** "A B"~N ⊆ "A B"~N+1 */
public void testIncreasingSloppiness() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
for (int i = 0; i < 10; i++) {
PhraseQuery q1 = new PhraseQuery(i, t1.field(), t1.bytes(), t2.bytes());
PhraseQuery q2 = new PhraseQuery(i + 1, t1.field(), t1.bytes(), t2.bytes());
assertSubsetOf(q1, q2);
}
}
/** "A B C"~N ⊆ "A B C"~N+1 */
public void testIncreasingSloppiness3() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
Term t3 = randomTerm();
for (int i = 0; i < 10; i++) {
PhraseQuery q1 = new PhraseQuery(i, t1.field(), t1.bytes(), t2.bytes(), t3.bytes());
PhraseQuery q2 = new PhraseQuery(i + 1, t1.field(), t1.bytes(), t2.bytes(), t3.bytes());
assertSubsetOf(q1, q2);
assertSubsetOf(q1, q2);
}
}
/** "A A"~N ⊆ "A A"~N+1 */
public void testRepetitiveIncreasingSloppiness() throws Exception {
Term t = randomTerm();
for (int i = 0; i < 10; i++) {
PhraseQuery q1 = new PhraseQuery(i, t.field(), t.bytes(), t.bytes());
PhraseQuery q2 = new PhraseQuery(i + 1, t.field(), t.bytes(), t.bytes());
assertSubsetOf(q1, q2);
}
}
/** "A A A"~N ⊆ "A A A"~N+1 */
public void testRepetitiveIncreasingSloppiness3() throws Exception {
Term t = randomTerm();
for (int i = 0; i < 10; i++) {
PhraseQuery q1 = new PhraseQuery(i, t.field(), t.bytes(), t.bytes(), t.bytes());
PhraseQuery q2 = new PhraseQuery(i + 1, t.field(), t.bytes(), t.bytes(), t.bytes());
assertSubsetOf(q1, q2);
assertSubsetOf(q1, q2);
}
}
/** "A B" ⊆ (+A +B) */
public void testExactPhraseVersusBooleanAnd() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
PhraseQuery q1 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
BooleanQuery.Builder q2 = new BooleanQuery.Builder();
q2.add(new TermQuery(t1), Occur.MUST);
q2.add(new TermQuery(t2), Occur.MUST);
assertSubsetOf(q1, q2.build());
}
/** "A B" ⊆ "A B"~1 */
public void testPhraseVersusSloppyPhrase() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
PhraseQuery q1 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
PhraseQuery q2 = new PhraseQuery(1, t1.field(), t1.bytes(), t2.bytes());
assertSubsetOf(q1, q2);
}
/** "A B" ⊆ "A (B C)" */
public void testExactPhraseVersusMultiPhrase() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
PhraseQuery q1 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
Term t3 = randomTerm();
MultiPhraseQuery.Builder q2b = new MultiPhraseQuery.Builder();
q2b.add(t1);
q2b.add(new Term[] { t2, t3 });
assertSubsetOf(q1, q2b.build());
}
/** "A B"~∞ = +A +B if A != B */
public void testSloppyPhraseVersusBooleanAnd() throws Exception {
Term t1 = randomTerm();
Term t2 = null;
// semantics differ from SpanNear: SloppyPhrase handles repeats,
// so we must ensure t1 != t2
do {
t2 = randomTerm();
} while (t1.equals(t2));
PhraseQuery q1 = new PhraseQuery(Integer.MAX_VALUE, t1.field(), t1.bytes(), t2.bytes());
BooleanQuery.Builder q2 = new BooleanQuery.Builder();
q2.add(new TermQuery(t1), Occur.MUST);
q2.add(new TermQuery(t2), Occur.MUST);
assertSameSet(q1, q2.build());
}
/** Phrase positions are relative. */
public void testPhraseRelativePositions() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
PhraseQuery q1 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(t1, 10000);
builder.add(t2, 10001);
PhraseQuery q2 = builder.build();
assertSameScores(q1, q2);
}
/** Sloppy-phrase positions are relative. */
public void testSloppyPhraseRelativePositions() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
PhraseQuery q1 = new PhraseQuery(2, t1.field(), t1.bytes(), t2.bytes());
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(t1, 10000);
builder.add(t2, 10001);
builder.setSlop(2);
PhraseQuery q2 = builder.build();
assertSameScores(q1, q2);
}
/**
* Called for each term in the field being uninverted.
* Collects {@link #maxTermCounts} for all bigTerms as well as storing them in {@link #bigTerms}.
* @param te positioned at the current term.
* @param termNum the ID/pointer/ordinal of the current term. Monotonically increasing between calls.
*/
@Override
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
if (termNum >= maxTermCounts.length) {
// resize by doubling - for very large number of unique terms, expanding
// by 4K and resultant GC will dominate uninvert times. Resize at end if material
int[] newMaxTermCounts = new int[ Math.min(Integer.MAX_VALUE-16, maxTermCounts.length*2) ];
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
maxTermCounts = newMaxTermCounts;
}
final BytesRef term = te.term();
if (te.docFreq() > maxTermDocFreq) {
Term t = new Term(field, term); // this makes a deep copy of the term bytes
TopTerm topTerm = new TopTerm();
topTerm.term = t.bytes();
topTerm.termNum = termNum;
topTerm.termQuery = new TermQuery(t);
bigTerms.put(topTerm.termNum, topTerm);
if (deState == null) {
deState = new SolrIndexSearcher.DocsEnumState();
deState.fieldName = field;
deState.liveDocs = searcher.getLiveDocsBits();
deState.termsEnum = te; // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
deState.postingsEnum = postingsEnum;
deState.minSetSizeCached = maxTermDocFreq;
}
postingsEnum = deState.postingsEnum;
DocSet set = searcher.getDocSet(deState);
maxTermCounts[termNum] = set.size();
}
}
private static Query buildRandomQuery(int level) {
if (level == 10) {
// at most 10 levels
return new MatchAllDocsQuery();
}
switch (random().nextInt(6)) {
case 0:
return new TermQuery(randomTerm());
case 1:
BooleanQuery.Builder bq = new BooleanQuery.Builder();
final int numClauses = TestUtil.nextInt(random(), 1, 3);
int numShould = 0;
for (int i = 0; i < numClauses; ++i) {
final Occur occur = RandomPicks.randomFrom(random(), Occur.values());
bq.add(buildRandomQuery(level + 1), occur);
if (occur == Occur.SHOULD) {
numShould++;
}
}
bq.setMinimumNumberShouldMatch(TestUtil.nextInt(random(), 0, numShould));
return bq.build();
case 2:
Term t1 = randomTerm();
Term t2 = randomTerm();
PhraseQuery pq = new PhraseQuery(random().nextInt(2), t1.field(), t1.bytes(), t2.bytes());
return pq;
case 3:
return new MatchAllDocsQuery();
case 4:
return new ConstantScoreQuery(buildRandomQuery(level + 1));
case 5:
List<Query> disjuncts = new ArrayList<>();
final int numQueries = TestUtil.nextInt(random(), 1, 3);
for (int i = 0; i < numQueries; ++i) {
disjuncts.add(buildRandomQuery(level + 1));
}
return new DisjunctionMaxQuery(disjuncts, random().nextFloat());
default:
throw new AssertionError();
}
}
DumbPrefixQuery(Term term) {
super(term.field());
prefix = term.bytes();
}
private Query createQuery(Term term, Function<Term, Query> fn) {
String field = term.field();
BytesRef value = term.bytes();
return createQuery(field, (fieldName) -> fn.apply(new Term(fieldName, value)));
}
/**
* Returns {@link TermStatistics} for a term.
*
* This can be overridden for example, to return a term's statistics
* across a distributed collection.
*
* @param docFreq The document frequency of the term. It must be greater or equal to 1.
* @param totalTermFreq The total term frequency.
* @return A {@link TermStatistics} (never null).
*
* @lucene.experimental
*/
public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
// This constructor will throw an exception if docFreq <= 0.
return new TermStatistics(term.bytes(), docFreq, totalTermFreq);
}