下面列出了怎么用org.apache.lucene.search.similarities.DefaultSimilarity的API类实例代码及写法,或者点击链接到github查看源代码。
@Override
public Query rewrite(IndexReader reader) throws IOException {
XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);
mlt.setFieldNames(moreLikeFields);
mlt.setAnalyzer(analyzer);
mlt.setMinTermFreq(minTermFrequency);
mlt.setMinDocFreq(minDocFreq);
mlt.setMaxDocFreq(maxDocFreq);
mlt.setMaxQueryTerms(maxQueryTerms);
mlt.setMinWordLen(minWordLen);
mlt.setMaxWordLen(maxWordLen);
mlt.setStopWords(stopWords);
mlt.setBoost(boostTerms);
mlt.setBoostFactor(boostTermsFactor);
if (this.unlikeText != null || this.unlikeFields != null) {
handleUnlike(mlt, this.unlikeText, this.unlikeFields);
}
return createQuery(mlt);
}
public ContextAnalyzerIndex(Directory directory, Rescorer rescorer) throws IOException {
this.indexDirectory = directory;
this.analyzer = new CorpusAnalyzer();
this.rescorer = rescorer;
// Index writer setup
IndexWriterConfig indexConfig = new IndexWriterConfig(Version.LUCENE_4_10_4, this.analyzer);
indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
indexConfig.setSimilarity(new DefaultSimilarity() {
@Override
public float lengthNorm(FieldInvertState state) {
return 1.f;
}
});
this.indexWriter = new IndexWriter(this.indexDirectory, indexConfig);
// Ensure index exists
if (!DirectoryReader.indexExists(directory))
this.indexWriter.commit();
}
public TermVectorsFilter(Fields termVectorsByField, Fields topLevelFields, Set<String> selectedFields, @Nullable AggregatedDfs dfs) {
this.fields = termVectorsByField;
this.topLevelFields = topLevelFields;
this.selectedFields = selectedFields;
this.dfs = dfs;
this.scoreTerms = new HashMap<>();
this.sizes = AtomicLongMap.create();
this.similarity = new DefaultSimilarity();
}
/**
* {@inheritDoc}
*/
@Override
public DefaultSimilarity get() {
return similarity;
}
/**
* Constructor requiring an IndexReader.
*/
public XMoreLikeThis(IndexReader ir) {
this(ir, new DefaultSimilarity());
}
protected ITermDocKey<V> deserialize(final ITuple tuple,
final boolean keyOnly) {
// key is {term,docId,fieldId}
// final byte[] key = tuple.getKey();
//
// // decode the document identifier.
// final long docId = KeyBuilder.decodeLong(key, key.length
// - Bytes.SIZEOF_LONG /*docId*/ - Bytes.SIZEOF_INT/*fieldId*/);
final ByteArrayBuffer kbuf = tuple.getKeyBuffer();
/*
* The byte offset of the docId in the key.
*
* Note: This is also the byte length of the match on the unicode sort
* key, which appears at the head of the key.
*/
final int docIdOffset = kbuf.limit() - Bytes.SIZEOF_LONG /* docId */
- (fieldsEnabled ? Bytes.SIZEOF_INT/* fieldId */: 0);
final V docId = (V) (Object)Long.valueOf(KeyBuilder.decodeLong(kbuf.array(),
docIdOffset));
// Decode field when present
final int fieldId;
if (fieldsEnabled) {
fieldId = KeyBuilder.decodeShort(kbuf.array(), kbuf.limit()
- Bytes.SIZEOF_INT);
} else {
fieldId = -1;
}
final int termWeightOffset = docIdOffset - Bytes.SIZEOF_BYTE;
final byte termWeightCompact = kbuf.getByte(termWeightOffset);
/*
* See: http://lucene.apache.org/core/5_1_0/core/org/apache/lucene/search/similarities/DefaultSimilarity.html
*
* For more information on the round-trip of normalized term weight.
*/
final DefaultSimilarity similarity = new DefaultSimilarity();
final double termWeight = similarity.decodeNormValue(termWeightCompact);
if (keyOnly) {
return new ReadOnlyTermDocKey(docId, fieldId, termWeight);
}
// final int termFreq;
// final double termWeight;
// try {
//
// final DataInputBuffer dis = tuple.getValueStream();
//
// termFreq = dis.readShort();
//
// if(doublePrecision)
// termWeight = dis.readDouble();
// else
// termWeight = dis.readFloat();
//
// } catch (IOException ex) {
//
// throw new RuntimeException(ex);
//
// }
//
return new ReadOnlyTermDocRecord<V>(null/* token */, docId, fieldId,
/* termFreq, */ termWeight);
}
protected ITermDocKey deserialize(final ITuple tuple, final boolean keyOnly) {
final ByteArrayBuffer kbuf = tuple.getKeyBuffer();
// The byte length of the docId IV.
final int byteLength;
try {
// byteLength = LongPacker.unpackInt((DataInput) tuple
// .getValueStream());
byteLength = ShortPacker.unpackShort((DataInput) tuple
.getValueStream());
} catch (IOException ex) {
throw new RuntimeException(ex);
}
final int docIdOffset = kbuf.limit() - byteLength;
// Decode the IV.
final IV docId = (IV) IVUtility.decodeFromOffset(kbuf.array(),
docIdOffset);
final int termWeightOffset = docIdOffset - Bytes.SIZEOF_BYTE;
final byte termWeightCompact = kbuf.getByte(termWeightOffset);
/*
* See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html
*
* For more information on the round-trip of normalized term weight.
*/
final DefaultSimilarity similarity = new DefaultSimilarity();
final double termWeight = similarity.decodeNormValue(termWeightCompact);
if (keyOnly) {
return new ReadOnlyTermDocKey(docId, NO_FIELD, termWeight);
}
// final int termFreq;
// final double termWeight;
// try {
//
// final DataInputBuffer dis = tuple.getValueStream();
//
// // skip the byte length of the IV.
// LongPacker.unpackInt((DataInput) dis);
//
// termFreq = dis.readShort();
// termFreq = LongPacker.unpackInt((DataInput) dis);
// if (doublePrecision)
// termWeight = dis.readDouble();
// else
// termWeight = dis.readFloat();
//
// } catch (IOException ex) {
//
// throw new RuntimeException(ex);
//
// }
return new ReadOnlyTermDocRecord(null/* token */, docId, NO_FIELD,
/* termFreq, */ termWeight);
}
@Override
public byte[] serializeKey(final Object obj) {
@SuppressWarnings("unchecked")
final ITermDocKey<V> entry = (ITermDocKey<V>) obj;
final String termText = entry.getToken();
final double termWeight = entry.getLocalTermWeight();
/*
* See: http://lucene.apache.org/core/5_1_0/core/org/apache/lucene/search/similarities/DefaultSimilarity.html
*
* For more information on the round-trip of normalized term weight.
*/
final DefaultSimilarity similarity = new DefaultSimilarity();
final long termWeightCompact = similarity.encodeNormValue((float) termWeight);
final V docId = entry.getDocId();
final IKeyBuilder keyBuilder = getKeyBuilder();
keyBuilder.reset();
// the token text (or its successor as desired).
keyBuilder
.appendText(termText, true/* unicode */, false/* successor */);
keyBuilder.append(termWeightCompact);
keyBuilder.append((V) docId);
if (fieldsEnabled)
keyBuilder.append(entry.getFieldId());
final byte[] key = keyBuilder.getKey();
if (log.isDebugEnabled()) {
log.debug("{" + termText + "," + docId
+ (fieldsEnabled ? "," + entry.getFieldId() : "")
+ "}, key=" + BytesUtil.toString(key));
}
return key;
}
@Override
public byte[] serializeKey(final Object obj) {
final ITermDocKey entry = (ITermDocKey) obj;
final String termText = entry.getToken();
final double termWeight = entry.getLocalTermWeight();
/*
* See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html
*
* For more information on the round-trip of normalized term weight.
*/
final DefaultSimilarity similarity = new DefaultSimilarity();
final long termWeightCompact = similarity.encodeNormValue((float) termWeight);
final IV docId = (IV)entry.getDocId();
final IKeyBuilder keyBuilder = getKeyBuilder();
keyBuilder.reset();
// the token text (or its successor as desired).
keyBuilder
.appendText(termText, true/* unicode */, false/* successor */);
keyBuilder.append(termWeightCompact);
IVUtility.encode(keyBuilder, docId);
final byte[] key = keyBuilder.getKey();
if (log.isDebugEnabled()) {
log.debug("{" + termText + "," + docId + "}, key="
+ BytesUtil.toString(key));
}
return key;
}