下面列出了怎么用org.apache.lucene.index.Fields的API类实例代码及写法,或者点击链接到github查看源代码。
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
super(indices);
// build a search request with a query of all the terms
final BoolQueryBuilder boolBuilder = boolQuery();
for (String fieldName : termVectorsFields) {
if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
continue;
}
Terms terms = termVectorsFields.terms(fieldName);
TermsEnum iterator = terms.iterator();
while (iterator.next() != null) {
String text = iterator.term().utf8ToString();
boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
}
}
// wrap a search request object
this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
/**
* A convenience method that tries to first get a {@link TokenStreamFromTermVector} for the
* specified docId, then, falls back to using the passed in
* {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
* This is useful when you already have the document, but would prefer to use
* the vector first.
*
* @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
* and get the vector from
* @param docId The docId to retrieve.
* @param field The field to retrieve on the document
* @param document The document to fall back on
* @param analyzer The analyzer to use for creating the TokenStream if the
* vector doesn't exist
* @return The {@link org.apache.lucene.analysis.TokenStream} for the
* {@link org.apache.lucene.index.IndexableField} on the
* {@link org.apache.lucene.document.Document}
* @throws IOException if there was an error loading
*/
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
String field, Document document, Analyzer analyzer) throws IOException {
TokenStream ts = null;
Fields vectors = reader.getTermVectors(docId);
if (vectors != null) {
Terms vector = vectors.terms(field);
if (vector != null) {
ts = getTokenStream(vector);
}
}
// No token info stored so fall back to analyzing raw content
if (ts == null) {
ts = getTokenStream(document, field, analyzer);
}
return ts;
}
/**
* A convenience method that tries a number of approaches to getting a token
* stream. The cost of finding there are no termVectors in the index is
* minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?)
* approach to coding is probably acceptable
*
* @return null if field not stored correctly
* @throws IOException If there is a low-level I/O error
*/
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
String field, Analyzer analyzer) throws IOException {
TokenStream ts = null;
Fields vectors = reader.getTermVectors(docId);
if (vectors != null) {
Terms vector = vectors.terms(field);
if (vector != null) {
ts = getTokenStream(vector);
}
}
// No token info stored so fall back to analyzing raw content
if (ts == null) {
ts = getTokenStream(reader, docId, field, analyzer);
}
return ts;
}
/**
* Returns a {@link TokenStream} with positions and offsets constructed from
* field termvectors. If the field has no termvectors or offsets
* are not included in the termvector, return null. See {@link #getTokenStream(org.apache.lucene.index.Terms)}
* for an explanation of what happens when positions aren't present.
*
* @param reader the {@link IndexReader} to retrieve term vectors from
* @param docId the document to retrieve termvectors for
* @param field the field to retrieve termvectors for
* @return a {@link TokenStream}, or null if offsets are not available
* @throws IOException If there is a low-level I/O error
*
* @see #getTokenStream(org.apache.lucene.index.Terms)
*/
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getTokenStreamWithOffsets(IndexReader reader, int docId,
String field) throws IOException {
Fields vectors = reader.getTermVectors(docId);
if (vectors == null) {
return null;
}
Terms vector = vectors.terms(field);
if (vector == null) {
return null;
}
if (!vector.hasOffsets()) {
return null;
}
return getTokenStream(vector);
}
@Override
public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
IndexReader reader = searcher.getIndexReader();
highlighter.setFragmentScorer(new QueryScorer(q));
// highlighter.setTextFragmenter(); unfortunately no sentence mechanism, not even regex. Default here is trivial
for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
Document document = reader.document(scoreDoc.doc, hlFields);
Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
for (IndexableField indexableField : document) {
TokenStream tokenStream;
if (termVecs) {
tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields,
indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
} else {
tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue());
}
// will close TokenStream:
String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
preventOptimizeAway = fragments.length;
}
}
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
for(String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator();
TermsWriter termsWriter = addField(fieldInfos.fieldInfo(field));
while (true) {
BytesRef term = termsEnum.next();
if (term == null) {
break;
}
termsWriter.write(term, termsEnum, norms);
}
termsWriter.finish();
}
}
@Override
public void write(Fields fields, NormsProducer normsProducer) throws IOException {
BlockWriter blockWriter = new BlockWriter(blockOutput, targetNumBlockLines, deltaNumLines, blockEncoder);
ByteBuffersDataOutput fieldsOutput = new ByteBuffersDataOutput();
int fieldsNumber = 0;
for (String field : fields) {
Terms terms = fields.terms(field);
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
fieldsNumber += writeFieldTerms(blockWriter, fieldsOutput, termsEnum, fieldInfo, normsProducer);
}
}
writeFieldsMetadata(fieldsNumber, fieldsOutput);
CodecUtil.writeFooter(dictionaryOutput);
}
private Collection<FieldMetadata> writeSingleSegment(Fields fields, NormsProducer normsProducer, STBlockWriter blockWriter, IndexDictionary.Builder dictionaryBuilder) throws IOException {
List<FieldMetadata> fieldMetadataList = createFieldMetadataList(new FieldsIterator(fields, fieldInfos), maxDoc);
TermIteratorQueue<FieldTerms> fieldTermsQueue = createFieldTermsQueue(fields, fieldMetadataList);
List<TermIterator<FieldTerms>> groupedFieldTerms = new ArrayList<>(fieldTermsQueue.size());
List<FieldMetadataTermState> termStates = new ArrayList<>(fieldTermsQueue.size());
while (fieldTermsQueue.size() != 0) {
TermIterator<FieldTerms> topFieldTerms = fieldTermsQueue.popTerms();
BytesRef term = BytesRef.deepCopyOf(topFieldTerms.term);
groupByTerm(fieldTermsQueue, topFieldTerms, groupedFieldTerms);
writePostingLines(term, groupedFieldTerms, normsProducer, termStates);
blockWriter.addLine(term, termStates, dictionaryBuilder);
nextTermForIterators(groupedFieldTerms, fieldTermsQueue);
}
return fieldMetadataList;
}
/** Merges in the fields from the readers in
* <code>mergeState</code>. The default implementation skips
* and maps around deleted documents, and calls {@link #write(Fields,NormsProducer)}.
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
final List<Fields> fields = new ArrayList<>();
final List<ReaderSlice> slices = new ArrayList<>();
int docBase = 0;
for(int readerIndex=0;readerIndex<mergeState.fieldsProducers.length;readerIndex++) {
final FieldsProducer f = mergeState.fieldsProducers[readerIndex];
final int maxDoc = mergeState.maxDocs[readerIndex];
f.checkIntegrity();
slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
fields.add(f);
docBase += maxDoc;
}
Fields mergedFields = new MappedMultiFields(mergeState,
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
slices.toArray(ReaderSlice.EMPTY_ARRAY)));
write(mergedFields, norms);
}
private void estimateTermVectors(Map<String, Object> result) throws IOException {
log.info("- estimating term vectors...");
Map<String, Map<String, Object>> stats = new HashMap<>();
for (LeafReaderContext leafReaderContext : reader.leaves()) {
LeafReader leafReader = leafReaderContext.reader();
Bits liveDocs = leafReader.getLiveDocs();
for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
if (liveDocs != null && !liveDocs.get(docId)) {
continue;
}
Fields termVectors = leafReader.getTermVectors(docId);
if (termVectors == null) {
continue;
}
for (String field : termVectors) {
Terms terms = termVectors.terms(field);
if (terms == null) {
continue;
}
estimateTermStats(field, terms, stats, true);
}
}
}
result.put(TERM_VECTORS, stats);
}
@Override
public void merge(MergeState mergeState) throws IOException {
final List<Fields> fields = new ArrayList<>();
final List<ReaderSlice> slices = new ArrayList<>();
int docBase = 0;
for (int readerIndex = 0; readerIndex < mergeState.fieldsProducers.length; readerIndex++) {
final FieldsProducer f = mergeState.fieldsProducers[readerIndex];
final int maxDoc = mergeState.maxDocs[readerIndex];
f.checkIntegrity();
slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
fields.add(f);
docBase += maxDoc;
}
Fields mergedFields = new MappedMultiFields(mergeState,
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
slices.toArray(ReaderSlice.EMPTY_ARRAY)));
write(mergedFields);
}
private void runOldMergeSortRowIdCheckAndDelete(boolean emitDeletes, IndexReader currentIndexReader,
BlurPartitioner blurPartitioner, Text key, int numberOfShards, int shardId, Action action,
AtomicReader atomicReader) throws IOException {
MergeSortRowIdLookup lookup = new MergeSortRowIdLookup(currentIndexReader);
Fields fields = atomicReader.fields();
Terms terms = fields.terms(BlurConstants.ROW_ID);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
BytesRef ref = null;
while ((ref = termsEnum.next()) != null) {
key.set(ref.bytes, ref.offset, ref.length);
int partition = blurPartitioner.getPartition(key, null, numberOfShards);
if (shardId != partition) {
throw new IOException("Index is corrupted, RowIds are found in wrong shard, partition [" + partition
+ "] does not shard [" + shardId + "], this can happen when rows are not hashed correctly.");
}
if (emitDeletes) {
lookup.lookup(ref, action);
}
}
}
}
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
IndexReader indexReader = searcher.getIndexReader();
BytesRef rowIdRef = new BytesRef(rowId);
List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
AtomicReader atomicReader = atomicReaderContext.reader();
Fields fields = atomicReader.fields();
if (fields == null) {
continue;
}
Terms terms = fields.terms(BlurConstants.ROW_ID);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(rowIdRef, true)) {
continue;
}
// need atomic read as well...
possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
}
if (possibleRowIds.isEmpty()) {
return null;
}
return new IterableRow(rowId, getRecords(possibleRowIds));
}
@Override
public Fields getTermVectors(final int docID) throws IOException {
final Fields fields = in.getTermVectors(docID);
if (!flsEnabled || fields == null) {
return fields;
}
return new Fields() {
@Override
public Iterator<String> iterator() {
return Iterators.<String> filter(fields.iterator(), new Predicate<String>() {
@Override
public boolean apply(final String input) {
return isFls(input);
}
});
}
@Override
public Terms terms(final String field) throws IOException {
if (!isFls(field)) {
return null;
}
return wrapTerms(field, in.terms(field));
}
@Override
public int size() {
return flsFieldInfos.size();
}
};
}
/**
* Relod method of spellcheck listner
* @param newSearcher
* @param checker
* @throws IOException
* @throws SpellCheckException
*/
public void reload(SolrIndexSearcher newSearcher, SpellChecker checker)
throws IOException, SpellCheckException {
DirectoryReader productsIndexReader = newSearcher.getIndexReader();
Fields fields = MultiFields.getFields(productsIndexReader);
IndexSchema schema = newSearcher.getCore().getLatestSchema();
long time = System.currentTimeMillis();
for (String field : fields) {
if (!fieldArr.contains(field)) {
continue;
}
FieldType type = schema.getField(field).getType();
int insertionsCount = 0;
for (TermsEnum iterator = fields.terms(field).iterator(); iterator.next() != null; ) {
BytesRef term = iterator.term();
CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
type.indexedToReadable(term, charsRefBuilder);
insertionsCount++;
checker.getDataHolder().addItem(
new DictionaryItem(charsRefBuilder.toString().trim(), (double) iterator.totalTermFreq(),
0.0));
}
log.info("Spellcheck Dictionary populated for Field Name {}, Count {}", field,
insertionsCount);
}
log.info("Data for SpellChecker was populated. Time={} ms",
(System.currentTimeMillis() - time));
}
public static Fields[] getFieldsFor(MultiTermVectorsResponse responses) throws IOException {
List<Fields> likeFields = new ArrayList<>();
for (MultiTermVectorsItemResponse response : responses) {
if (response.isFailed()) {
continue;
}
TermVectorsResponse getResponse = response.getResponse();
if (!getResponse.isExists()) {
continue;
}
likeFields.add(getResponse.getFields());
}
return likeFields.toArray(Fields.EMPTY_ARRAY);
}
public Fields getFields() throws IOException {
if (hasTermVectors() && isExists()) {
if (!sourceCopied) { // make the bytes safe
headerRef = headerRef.copyBytesArray();
termVectors = termVectors.copyBytesArray();
}
TermVectorsFields termVectorsFields = new TermVectorsFields(headerRef, termVectors);
hasScores = termVectorsFields.hasScores;
return termVectorsFields;
} else {
return new Fields() {
@Override
public Iterator<String> iterator() {
return Collections.emptyIterator();
}
@Override
public Terms terms(String field) throws IOException {
return null;
}
@Override
public int size() {
return 0;
}
};
}
}
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
String fieldName = fieldIter.next();
builder.startObject(fieldName);
Terms curTerms = theFields.terms(fieldName);
// write field statistics
buildFieldStatistics(builder, curTerms);
builder.startObject(FieldStrings.TERMS);
TermsEnum termIter = curTerms.iterator();
BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
for (int i = 0; i < curTerms.size(); i++) {
buildTerm(builder, spare, curTerms, termIter, boostAtt);
}
builder.endObject();
builder.endObject();
}
public void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields, @Nullable AggregatedDfs dfs,
TermVectorsFilter termVectorsFilter) throws IOException {
TermVectorsWriter tvw = new TermVectorsWriter(this);
if (termVectorsByField != null) {
tvw.setFields(termVectorsByField, selectedFields, flags, topLevelFields, dfs, termVectorsFilter);
}
}
public TermVectorLeafReader(String field, Terms terms) {
fields = new Fields() {
@Override
public Iterator<String> iterator() {
return Collections.singletonList(field).iterator();
}
@Override
public Terms terms(String fld) throws IOException {
if (!field.equals(fld)) {
return null;
}
return terms;
}
@Override
public int size() {
return 1;
}
};
IndexOptions indexOptions;
if (!terms.hasFreqs()) {
indexOptions = IndexOptions.DOCS;
} else if (!terms.hasPositions()) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if (!terms.hasOffsets()) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
}
FieldInfo fieldInfo = new FieldInfo(field, 0,
true, true, terms.hasPayloads(),
indexOptions, DocValuesType.NONE, -1, Collections.emptyMap(), 0, 0, 0, false);
fieldInfos = new FieldInfos(new FieldInfo[]{fieldInfo});
}
@Override
public Fields getTermVectors(int docID) throws IOException {
if (docID != 0) {
return null;
}
return fields;
}
@Override
public Fields getTermVectors(int docID) throws IOException {
if (docID != lastDocId) {
lastDocId = docID;
tvFields = in.getTermVectors(docID);
}
return tvFields;
}
@Override
public LeafReader wrap(LeafReader reader) {
return new FilterLeafReader(reader) {
BitSet seenDocIDs = new BitSet();
@Override
public Fields getTermVectors(int docID) throws IOException {
// if we're invoked by ParallelLeafReader then we can't do our assertion. TODO see LUCENE-6868
if (callStackContains(ParallelLeafReader.class) == false
&& callStackContains(CheckIndex.class) == false) {
assertFalse("Should not request TVs for doc more than once.", seenDocIDs.get(docID));
seenDocIDs.set(docID);
}
return super.getTermVectors(docID);
}
@Override
public CacheHelper getCoreCacheHelper() {
return null;
}
@Override
public CacheHelper getReaderCacheHelper() {
return null;
}
};
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
for(String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
boolean hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
TermsEnum termsEnum = terms.iterator();
TermsWriter termsWriter = new TermsWriter(fieldInfo);
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
FixedBitSet docsSeen = new FixedBitSet(maxDoc);
while (true) {
BytesRef term = termsEnum.next();
if (term == null) {
break;
}
BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms);
if (termState != null) {
termsWriter.finishTerm(term, termState);
sumTotalTermFreq += termState.totalTermFreq;
sumDocFreq += termState.docFreq;
}
}
termsWriter.finish(hasFreq ? sumTotalTermFreq : -1, sumDocFreq, docsSeen.cardinality());
}
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
String lastField = null;
for(String field : fields) {
assert lastField == null || lastField.compareTo(field) < 0;
lastField = field;
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator();
TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
while (true) {
BytesRef term = termsEnum.next();
if (term == null) {
break;
}
termsWriter.write(term, termsEnum, norms);
}
termsWriter.finish();
}
}
private TermIteratorQueue<FieldTerms> createFieldTermsQueue(Fields fields, List<FieldMetadata> fieldMetadataList) throws IOException {
TermIteratorQueue<FieldTerms> fieldQueue = new TermIteratorQueue<>(fieldMetadataList.size());
for (FieldMetadata fieldMetadata : fieldMetadataList) {
Terms terms = fields.terms(fieldMetadata.getFieldInfo().name);
if (terms != null) {
FieldTerms fieldTerms = new FieldTerms(fieldMetadata, terms.iterator());
if (fieldTerms.nextTerm()) {
// There is at least one term for the field.
fieldQueue.add(fieldTerms);
}
}
}
return fieldQueue;
}
/**
* Find words for a more-like-this query former.
*
* @param docNum the id of the lucene document from which to find terms
*/
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>();
for (String fieldName : fieldNames) {
final Fields vectors = ir.getTermVectors(docNum);
final Terms vector;
if (vectors != null) {
vector = vectors.terms(fieldName);
} else {
vector = null;
}
// field does not store term vector info
if (vector == null) {
Document d = ir.document(docNum);
IndexableField[] fields = d.getFields(fieldName);
for (IndexableField field : fields) {
final String stringValue = field.stringValue();
if (stringValue != null) {
addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName);
}
}
} else {
addTermFrequencies(field2termFreqMap, vector, fieldName);
}
}
return createQueue(field2termFreqMap);
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
delegateFieldsConsumer.write(fields, norms);
for (String field : fields) {
CompletionTermWriter termWriter = new CompletionTermWriter();
Terms terms = fields.terms(field);
if (terms == null) {
// this can happen from ghost fields, where the incoming Fields iterator claims a field exists but it does not
continue;
}
TermsEnum termsEnum = terms.iterator();
// write terms
BytesRef term;
while ((term = termsEnum.next()) != null) {
termWriter.write(term, termsEnum);
}
// store lookup, if needed
long filePointer = dictOut.getFilePointer();
if (termWriter.finish(dictOut)) {
seenFields.put(field, new CompletionMetaData(filePointer,
termWriter.minWeight,
termWriter.maxWeight,
termWriter.type));
}
}
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
String lastField = null;
for(String field : fields) {
assert lastField == null || lastField.compareTo(field) < 0;
lastField = field;
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator();
TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
while (true) {
BytesRef term = termsEnum.next();
if (term == null) {
break;
}
termsWriter.write(term, termsEnum, norms);
}
termsWriter.finish();
}
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
if (random.nextInt(100) == 0) {
throw new IOException("Fake IOException from FieldsConsumer.write()");
}
delegate.write(fields, norms);
}