下面列出了怎么用org.apache.lucene.index.FieldInvertState的API类实例代码及写法,或者点击链接到github查看源代码。
@Override
public long computeNorm(FieldInvertState state) {
assert state != null;
assert state.getLength() > 0;
assert state.getPosition() >= 0;
assert state.getOffset() >= 0;
assert state.getMaxTermFrequency() >= 0; // TODO: seems to be 0 for omitTFAP?
assert state.getMaxTermFrequency() <= state.getLength();
assert state.getNumOverlap() >= 0;
assert state.getNumOverlap() < state.getLength();
assert state.getUniqueTermCount() > 0;
assert state.getUniqueTermCount() <= state.getLength();
long norm = delegate.computeNorm(state);
assert norm != 0;
return norm;
}
public void testSameNormsAsBM25() {
BooleanSimilarity sim1 = new BooleanSimilarity();
BM25Similarity sim2 = new BM25Similarity();
sim2.setDiscountOverlaps(true);
for (int iter = 0; iter < 100; ++iter) {
final int length = TestUtil.nextInt(random(), 1, 100);
final int position = random().nextInt(length);
final int numOverlaps = random().nextInt(length);
final int maxTermFrequency = 1;
final int uniqueTermCount = 1;
FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS, position, length, numOverlaps, 100, maxTermFrequency, uniqueTermCount);
assertEquals(
sim2.computeNorm(state),
sim1.computeNorm(state),
0f);
}
}
public void testSameNormsAsBM25() {
ClassicSimilarity sim1 = new ClassicSimilarity();
BM25Similarity sim2 = new BM25Similarity();
sim2.setDiscountOverlaps(true);
for (int iter = 0; iter < 100; ++iter) {
final int length = TestUtil.nextInt(random(), 1, 1000);
final int position = random().nextInt(length);
final int numOverlaps = random().nextInt(length);
final int maxTermFrequency = 1;
final int uniqueTermCount = 1;
FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS, position, length, numOverlaps, 100, maxTermFrequency, uniqueTermCount);
assertEquals(
sim2.computeNorm(state),
sim1.computeNorm(state),
0f);
}
}
/** Implemented as
* <code>state.getBoost()*lengthNorm(numTerms)</code>, where
* <code>numTerms</code> is {@link org.apache.lucene.index.FieldInvertState#getLength()} if {@link
* #setDiscountOverlaps} is false, else it's {@link
* org.apache.lucene.index.FieldInvertState#getLength()} - {@link
* org.apache.lucene.index.FieldInvertState#getNumOverlap()}.
*
* @lucene.experimental */
@Override
public float lengthNorm(FieldInvertState state) {
final int numTerms;
if (discountOverlaps)
numTerms = state.getLength() - state.getNumOverlap();
else
numTerms = state.getLength();
return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms)));
}
@Test
public void testSimilarities() throws IOException {
MemoryIndex mi = new MemoryIndex();
mi.addField("f1", "a long text field that contains many many terms", analyzer);
IndexSearcher searcher = mi.createSearcher();
LeafReader reader = (LeafReader) searcher.getIndexReader();
NumericDocValues norms = reader.getNormValues("f1");
assertEquals(0, norms.nextDoc());
float n1 = norms.longValue();
// Norms are re-computed when we change the Similarity
mi.setSimilarity(new Similarity() {
@Override
public long computeNorm(FieldInvertState state) {
return 74;
}
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new UnsupportedOperationException();
}
});
norms = reader.getNormValues("f1");
assertEquals(0, norms.nextDoc());
float n2 = norms.longValue();
assertTrue(n1 != n2);
TestUtil.checkReader(reader);
}
/** Encodes the document length in the same way as {@link BM25Similarity}. */
@Override
public final long computeNorm(FieldInvertState state) {
final int numTerms;
if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
numTerms = state.getUniqueTermCount();
} else if (discountOverlaps) {
numTerms = state.getLength() - state.getNumOverlap();
} else {
numTerms = state.getLength();
}
return SmallFloat.intToByte4(numTerms);
}
@Override
public final long computeNorm(FieldInvertState state) {
final int numTerms;
if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
numTerms = state.getUniqueTermCount();
} else if (discountOverlaps) {
numTerms = state.getLength() - state.getNumOverlap();
} else {
numTerms = state.getLength();
}
return SmallFloat.intToByte4(numTerms);
}
@Override
public final long computeNorm(FieldInvertState state) {
final int numTerms;
if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
numTerms = state.getUniqueTermCount();
} else if (discountOverlaps) {
numTerms = state.getLength() - state.getNumOverlap();
} else {
numTerms = state.getLength();
}
return SmallFloat.intToByte4(numTerms);
}
public void testDiscountOverlapsBoost() throws IOException {
BM25Similarity expected = new BM25Similarity();
SimilarityBase actual = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2());
expected.setDiscountOverlaps(false);
actual.setDiscountOverlaps(false);
FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS);
state.setLength(5);
state.setNumOverlap(2);
assertEquals(expected.computeNorm(state), actual.computeNorm(state));
expected.setDiscountOverlaps(true);
actual.setDiscountOverlaps(true);
assertEquals(expected.computeNorm(state), actual.computeNorm(state));
}
@Override
public long computeNorm(FieldInvertState state) {
return bm25Similarity.computeNorm(state);
}
@Override
public long computeNorm(FieldInvertState state) {
return BM25_SIM.computeNorm(state);
}
@Override
public final long computeNorm(FieldInvertState state) {
return get(state.getName()).computeNorm(state);
}
@Override
public long computeNorm(FieldInvertState state) {
return sims[0].computeNorm(state);
}
@Override
public long computeNorm(FieldInvertState state) {
return 1;
}
@Override
public long computeNorm(FieldInvertState state) {
return 10;
}
@Override
public long computeNorm(FieldInvertState state) {
return 1;
}
@Override
public long computeNorm(FieldInvertState state) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public long computeNorm(FieldInvertState state) {
return 1; // we dont care
}
@Override
public long computeNorm(FieldInvertState state) {
return 1;
}
@Override
public final long computeNorm(FieldInvertState state)
{
return state.getLength();
}
@Override
public final long computeNorm(FieldInvertState state) {
final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
return encodeNormValue(state.getBoost(), numTerms);
}
@Override
public final long computeNorm(FieldInvertState state)
{
return state.getLength();
}
@Override
public float lengthNorm(FieldInvertState state) {
return 1.f;
}
@Override
public float lengthNorm(FieldInvertState fieldInvertState) {
throw new RuntimeException("not sure");
// return 0;
}
@Override
public long computeNorm(FieldInvertState state) {
return BM25_SIM.computeNorm(state);
}
/**
* Computes the normalization value for a field, given the accumulated
* state of term processing for this field (see {@link FieldInvertState}).
*
* <p>Matches in longer fields are less precise, so implementations of this
* method usually set smaller values when <code>state.getLength()</code> is large,
* and larger values when <code>state.getLength()</code> is small.
*
* <p>Note that for a given term-document frequency, greater unsigned norms
* must produce scores that are lower or equal, ie. for two encoded norms
* {@code n1} and {@code n2} so that
* {@code Long.compareUnsigned(n1, n2) > 0} then
* {@code SimScorer.score(freq, n1) <= SimScorer.score(freq, n2)}
* for any legal {@code freq}.
*
* <p>{@code 0} is not a legal norm, so {@code 1} is the norm that produces
* the highest scores.
*
* @lucene.experimental
*
* @param state current processing state for this field
* @return computed norm value
*/
public abstract long computeNorm(FieldInvertState state);