下面列出了org.apache.lucene.index.SegmentReadState#org.apache.lucene.codecs.FieldsProducer 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
FieldsProducer postings = PostingsFormat.forName("Lucene84").fieldsProducer(state);
if (state.context.context != IOContext.Context.MERGE) {
FieldsProducer loadedPostings;
try {
postings.checkIntegrity();
loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff);
} finally {
postings.close();
}
return loadedPostings;
} else {
// Don't load postings for merge:
return postings;
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState readState)
throws IOException {
// Load our ID:
final String idFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name, readState.segmentSuffix, ID_EXTENSION);
IndexInput in = readState.directory.openInput(idFileName, readState.context);
boolean success = false;
final int id;
try {
CodecUtil.checkHeader(in, RAM_ONLY_NAME, VERSION_START, VERSION_LATEST);
id = in.readVInt();
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(in);
} else {
IOUtils.close(in);
}
}
synchronized(state) {
return state.get(id);
}
}
public void testRandom() throws Exception {
int iters = 5;
for(int iter=0;iter<iters;iter++) {
Path path = createTempDir("testPostingsFormat");
Directory dir = newFSDirectory(path);
boolean indexPayloads = random().nextBoolean();
// TODO test thread safety of buildIndex too
FieldsProducer fieldsProducer = postingsTester.buildIndex(getCodec(), dir, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, indexPayloads, false);
postingsTester.testFields(fieldsProducer);
// NOTE: you can also test "weaker" index options than
// you indexed with:
postingsTester.testTerms(fieldsProducer, EnumSet.allOf(RandomPostingsTester.Option.class), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false);
fieldsProducer.close();
fieldsProducer = null;
dir.close();
}
}
public void testPostingsEnumReuse() throws Exception {
Path path = createTempDir("testPostingsEnumReuse");
Directory dir = newFSDirectory(path);
FieldsProducer fieldsProducer = postingsTester.buildIndex(getCodec(), dir, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, random().nextBoolean(), true);
Collections.shuffle(postingsTester.allTerms, random());
RandomPostingsTester.FieldAndTerm fieldAndTerm = postingsTester.allTerms.get(0);
Terms terms = fieldsProducer.terms(fieldAndTerm.field);
TermsEnum te = terms.iterator();
te.seekExact(fieldAndTerm.term);
checkReuse(te, PostingsEnum.FREQS, PostingsEnum.ALL, false);
if (isPostingsEnumReuseImplemented()) {
checkReuse(te, PostingsEnum.ALL, PostingsEnum.ALL, true);
}
fieldsProducer.close();
dir.close();
}
/** Indexes all fields/terms at the specified
* IndexOptions, and fully tests at that IndexOptions. */
public void testFull(Codec codec, Path path, IndexOptions options, boolean withPayloads) throws Exception {
Directory dir = LuceneTestCase.newFSDirectory(path);
// TODO test thread safety of buildIndex too
FieldsProducer fieldsProducer = buildIndex(codec, dir, options, withPayloads, true);
testFields(fieldsProducer);
IndexOptions[] allOptions = IndexOptions.values();
int maxIndexOption = Arrays.asList(allOptions).indexOf(options);
for(int i=0;i<=maxIndexOption;i++) {
testTerms(fieldsProducer, EnumSet.allOf(Option.class), allOptions[i], options, true);
if (withPayloads) {
// If we indexed w/ payloads, also test enums w/o accessing payloads:
testTerms(fieldsProducer, EnumSet.complementOf(EnumSet.of(Option.PAYLOADS)), allOptions[i], options, true);
}
}
fieldsProducer.close();
dir.close();
}
@Override
public void merge(MergeState mergeState) throws IOException {
final List<Fields> fields = new ArrayList<>();
final List<ReaderSlice> slices = new ArrayList<>();
int docBase = 0;
for (int readerIndex = 0; readerIndex < mergeState.fieldsProducers.length; readerIndex++) {
final FieldsProducer f = mergeState.fieldsProducers[readerIndex];
final int maxDoc = mergeState.maxDocs[readerIndex];
f.checkIntegrity();
slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
fields.add(f);
docBase += maxDoc;
}
Fields mergedFields = new MappedMultiFields(mergeState,
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
slices.toArray(ReaderSlice.EMPTY_ARRAY)));
write(mergedFields);
}
public CompletionFieldsProducer(SegmentReadState state) throws IOException {
String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
FieldsProducer delegateProducer = null;
boolean success = false;
try {
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
String providerName = input.readString();
CompletionLookupProvider completionLookupProvider = providers.get(providerName);
if (completionLookupProvider == null) {
throw new IllegalStateException("no provider with name [" + providerName + "] registered");
}
// TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
delegateProducer = delegatePostingsFormat.fieldsProducer(state);
/*
* If we are merging we don't load the FSTs at all such that we
* don't consume so much memory during merge
*/
if (state.context.context != Context.MERGE) {
// TODO: maybe we can do this in a fully lazy fashion based on some configuration
// eventually we should have some kind of curciut breaker that prevents us from going OOM here
// with some configuration
this.lookupFactory = completionLookupProvider.load(input);
} else {
this.lookupFactory = null;
}
this.delegateProducer = delegateProducer;
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(delegateProducer, input);
} else {
IOUtils.close(input);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new Lucene84PostingsReader(state);
boolean success = false;
try {
FieldsProducer ret = new FSTTermsReader(state, postingsReader);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsReader);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new Lucene84PostingsReader(state);
boolean success = false;
try {
FieldsProducer ret = new OrdsBlockTreeTermsReader(postingsReader, state);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsReader);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new Lucene84PostingsReader(state);
boolean success = false;
try {
FieldsProducer termsReader = createUniformSplitTermsReader(postingsReader, state, blockDecoder);
success = true;
return termsReader;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsReader);
}
}
}
@Override
public void merge(MergeState mergeState, NormsProducer normsProducer) throws IOException {
if (mergeState.needsIndexSort) {
// This custom merging does not support sorted index.
// Fall back to the default merge, which is inefficient for this postings format.
super.merge(mergeState, normsProducer);
return;
}
FieldsProducer[] fieldsProducers = mergeState.fieldsProducers;
List<TermIterator<SegmentTerms>> segmentTermsList = new ArrayList<>(fieldsProducers.length);
for (int segmentIndex = 0; segmentIndex < fieldsProducers.length; segmentIndex++) {
FieldsProducer fieldsProducer = fieldsProducers[segmentIndex];
// Iterate the FieldInfo provided by mergeState.fieldInfos because they may be
// filtered by PerFieldMergeState.
for (FieldInfo fieldInfo : mergeState.fieldInfos[segmentIndex]) {
// Iterate all fields only the get the *first* Terms instanceof STUniformSplitTerms.
// See the break below.
Terms terms = fieldsProducer.terms(fieldInfo.name);
if (terms != null) {
if (!(terms instanceof STUniformSplitTerms)) {
// Terms is not directly an instance of STUniformSplitTerms, it is wrapped/filtered.
// Fall back to the default merge, which is inefficient for this postings format.
super.merge(mergeState, normsProducer);
return;
}
STUniformSplitTerms sharedTerms = (STUniformSplitTerms) terms;
segmentTermsList.add(new SegmentTerms(
segmentIndex, sharedTerms.createMergingBlockReader(), mergeState.docMaps[segmentIndex]));
// We have the STUniformSplitTerms for the segment. Break the field
// loop to iterate the next segment.
break;
}
}
}
writeSegment((blockWriter, dictionaryBuilder) -> mergeSegments(mergeState, normsProducer, segmentTermsList, blockWriter, dictionaryBuilder));
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new IDVersionPostingsReader();
boolean success = false;
try {
FieldsProducer ret = new VersionBlockTreeTermsReader(postingsReader, state);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsReader);
}
}
}
AssertingFieldsProducer(FieldsProducer in) {
this.in = in;
// do a few simple checks on init
assert toString() != null;
assert ramBytesUsed() >= 0;
assert getChildResources() != null;
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postings = new Lucene84PostingsReader(state);
TermsIndexReaderBase indexReader;
boolean success = false;
try {
indexReader = new VariableGapTermsIndexReader(state);
success = true;
} finally {
if (!success) {
postings.close();
}
}
success = false;
try {
FieldsProducer ret = new BlockTermsReader(indexReader, postings, state);
success = true;
return ret;
} finally {
if (!success) {
try {
postings.close();
} finally {
indexReader.close();
}
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postings = new Lucene84PostingsReader(state);
TermsIndexReaderBase indexReader;
boolean success = false;
try {
indexReader = new FixedGapTermsIndexReader(state);
success = true;
} finally {
if (!success) {
postings.close();
}
}
success = false;
try {
FieldsProducer ret = new BlockTermsReader(indexReader, postings, state);
success = true;
return ret;
} finally {
if (!success) {
try {
postings.close();
} finally {
indexReader.close();
}
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postings = new Lucene84PostingsReader(state);
TermsIndexReaderBase indexReader;
boolean success = false;
try {
indexReader = new VariableGapTermsIndexReader(state);
success = true;
} finally {
if (!success) {
postings.close();
}
}
success = false;
try {
FieldsProducer ret = new BlockTermsReader(indexReader, postings, state);
success = true;
return ret;
} finally {
if (!success) {
try {
postings.close();
} finally {
indexReader.close();
}
}
}
}
@Override
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
@SuppressWarnings("unchecked") Iterable<String> indexedFieldNames = () ->
new MergedIterator<>(true,
Arrays.stream(mergeState.fieldsProducers).map(FieldsProducer::iterator).toArray(Iterator[]::new));
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(indexedFieldNames);
// Merge postings
PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
boolean success = false;
try {
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
PostingsFormat format = ent.getKey();
final FieldsGroup group = ent.getValue();
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
consumer.merge(pfMergeState.apply(group.fields), norms);
}
success = true;
} finally {
pfMergeState.reset();
if (!success) {
IOUtils.closeWhileHandlingException(toClose);
}
}
}
PerFieldMergeState(MergeState in) {
this.in = in;
this.orgMergeFieldInfos = in.mergeFieldInfos;
this.orgFieldInfos = new FieldInfos[in.fieldInfos.length];
this.orgFieldsProducers = new FieldsProducer[in.fieldsProducers.length];
System.arraycopy(in.fieldInfos, 0, this.orgFieldInfos, 0, this.orgFieldInfos.length);
System.arraycopy(in.fieldsProducers, 0, this.orgFieldsProducers, 0, this.orgFieldsProducers.length);
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new Lucene84PostingsReader(state);
boolean success = false;
try {
FieldsProducer ret = new BlockTreeTermsReader(postingsReader, state);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsReader);
}
}
}
MergeReaderWrapper(CodecReader in) throws IOException {
this.in = in;
FieldsProducer fields = in.getPostingsReader();
if (fields != null) {
fields = fields.getMergeInstance();
}
this.fields = fields;
NormsProducer norms = in.getNormsReader();
if (norms != null) {
norms = norms.getMergeInstance();
}
this.norms = norms;
DocValuesProducer docValues = in.getDocValuesReader();
if (docValues != null) {
docValues = docValues.getMergeInstance();
}
this.docValues = docValues;
StoredFieldsReader store = in.getFieldsReader();
if (store != null) {
store = store.getMergeInstance();
}
this.store = store;
TermVectorsReader vectors = in.getTermVectorsReader();
if (vectors != null) {
vectors = vectors.getMergeInstance();
}
this.vectors = vectors;
}
private static FieldsProducer readerToFieldsProducer(final LeafReader reader) throws IOException {
ArrayList<String> indexedFields = new ArrayList<>();
for (FieldInfo fieldInfo : reader.getFieldInfos()) {
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
indexedFields.add(fieldInfo.name);
}
}
Collections.sort(indexedFields);
return new FieldsProducer() {
@Override
public Iterator<String> iterator() {
return indexedFields.iterator();
}
@Override
public Terms terms(String field) throws IOException {
return reader.terms(field);
}
@Override
public int size() {
return indexedFields.size();
}
@Override
public void checkIntegrity() throws IOException {
// We already checkIntegrity the entire reader up front
}
@Override
public void close() {
}
@Override
public long ramBytesUsed() {
return 0;
}
};
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new Lucene50PostingsReader(state);
boolean success = false;
try {
FieldsProducer ret = new BlockTreeTermsReader(postingsReader, state);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsReader);
}
}
}
@Override
public FieldsProducer getMergeInstance() throws IOException {
return delegateProducer.getMergeInstance();
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
// we can just return the delegate here since we didn't record bloom filters for
// the other fields.
return bloomPostings.fieldsProducer(state);
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
return new SimpleTextFieldsReader(state);
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state)
throws IOException {
return new BloomFilteredFieldsProducer(state);
}
protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
BlockDecoder blockDecoder) throws IOException {
return new UniformSplitTermsReader(postingsReader, state, blockDecoder, dictionaryOnHeap);
}
@Override
protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
BlockDecoder blockDecoder) throws IOException {
return new STUniformSplitTermsReader(postingsReader, state, blockDecoder, dictionaryOnHeap);
}
private CompletionFieldsProducer(FieldsProducer delegateFieldsProducer, Map<String, CompletionsTermsReader> readers) {
this.delegateFieldsProducer = delegateFieldsProducer;
this.readers = readers;
}
@Override
public FieldsProducer getMergeInstance() {
return new CompletionFieldsProducer(delegateFieldsProducer, readers);
}