下面列出了org.apache.lucene.index.SegmentWriteState#org.apache.lucene.codecs.FieldsConsumer 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state);
boolean success = false;
try {
FieldsConsumer ret = new OrdsBlockTreeTermsWriter(state,
postingsWriter,
minTermBlockSize,
maxTermBlockSize);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsWriter);
}
}
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new IDVersionPostingsWriter(state.liveDocs);
boolean success = false;
try {
FieldsConsumer ret = new VersionBlockTreeTermsWriter(state,
postingsWriter,
minTermsInBlock,
maxTermsInBlock);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsWriter);
}
}
}
protected FieldsConsumer createFieldsConsumer(SegmentWriteState segmentWriteState, PostingsWriterBase postingsWriter) throws IOException {
return new UniformSplitTermsWriter(postingsWriter, segmentWriteState,
UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES,
UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
getBlockEncoder()
) {
@Override
protected void writeDictionary(IndexDictionary.Builder dictionaryBuilder) throws IOException {
recordBlockEncodingCall();
super.writeDictionary(dictionaryBuilder);
recordDictionaryEncodingCall();
}
@Override
protected void writeEncodedFieldsMetadata(ByteBuffersDataOutput fieldsOutput) throws IOException {
super.writeEncodedFieldsMetadata(fieldsOutput);
recordFieldsMetadataEncodingCall();
}
};
}
protected FieldsConsumer createFieldsConsumer(SegmentWriteState segmentWriteState, PostingsWriterBase postingsWriter) throws IOException {
return new STUniformSplitTermsWriter(postingsWriter, segmentWriteState,
UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES,
UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
getBlockEncoder()
) {
@Override
protected void writeDictionary(IndexDictionary.Builder dictionaryBuilder) throws IOException {
recordBlockEncodingCall();
super.writeDictionary(dictionaryBuilder);
recordDictionaryEncodingCall();
}
@Override
protected void writeEncodedFieldsMetadata(ByteBuffersDataOutput fieldsOutput) throws IOException {
recordBlockEncodingCall();
super.writeEncodedFieldsMetadata(fieldsOutput);
recordFieldsMetadataEncodingCall();
}
};
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state);
boolean success = false;
try {
FieldsConsumer ret = new BlockTreeTermsWriter(state,
postingsWriter,
minTermBlockSize,
maxTermBlockSize);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsWriter);
}
}
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
boolean success = false;
try {
FieldsConsumer ret = new BlockTreeTermsWriter(state,
postingsWriter,
BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsWriter);
}
}
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state);
boolean success = false;
try {
FieldsConsumer ret = new FSTTermsWriter(state, postingsWriter);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsWriter);
}
}
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state);
boolean success = false;
try {
FieldsConsumer termsWriter = createUniformSplitTermsWriter(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder);
success = true;
return termsWriter;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsWriter);
}
}
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsFormat delegatePostingsFormat = delegatePostingsFormat();
if (delegatePostingsFormat == null) {
throw new UnsupportedOperationException("Error - " + getClass().getName()
+ " has been constructed without a choice of PostingsFormat");
}
return new CompletionFieldsConsumer(getName(), delegatePostingsFormat, state);
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState writeState) throws IOException {
final int id = nextID.getAndIncrement();
// TODO -- ok to do this up front instead of
// on close....? should be ok?
// Write our ID:
final String idFileName = IndexFileNames.segmentFileName(writeState.segmentInfo.name, writeState.segmentSuffix, ID_EXTENSION);
IndexOutput out = writeState.directory.createOutput(idFileName, writeState.context);
boolean success = false;
try {
CodecUtil.writeHeader(out, RAM_ONLY_NAME, VERSION_LATEST);
out.writeVInt(id);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(out);
} else {
IOUtils.close(out);
}
}
final RAMPostings postings = new RAMPostings();
final RAMFieldsConsumer consumer = new RAMFieldsConsumer(writeState, postings);
synchronized(state) {
state.put(id, postings);
}
return consumer;
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docs = new Lucene84PostingsWriter(state);
// TODO: should we make the terms index more easily
// pluggable? Ie so that this codec would record which
// index impl was used, and switch on loading?
// Or... you must make a new Codec for this?
TermsIndexWriterBase indexWriter;
boolean success = false;
try {
indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThreshold, termIndexInterval));
success = true;
} finally {
if (!success) {
docs.close();
}
}
success = false;
try {
// Must use BlockTermsWriter (not BlockTree) because
// BlockTree doens't support ords (yet)...
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
success = true;
return ret;
} finally {
if (!success) {
try {
docs.close();
} finally {
indexWriter.close();
}
}
}
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docs = new Lucene84PostingsWriter(state);
// TODO: should we make the terms index more easily
// pluggable? Ie so that this codec would record which
// index impl was used, and switch on loading?
// Or... you must make a new Codec for this?
TermsIndexWriterBase indexWriter;
boolean success = false;
try {
indexWriter = new FixedGapTermsIndexWriter(state, termIndexInterval);
success = true;
} finally {
if (!success) {
docs.close();
}
}
success = false;
try {
// Must use BlockTermsWriter (not BlockTree) because
// BlockTree doens't support ords (yet)...
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
success = true;
return ret;
} finally {
if (!success) {
try {
docs.close();
} finally {
indexWriter.close();
}
}
}
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docs = new Lucene84PostingsWriter(state);
// TODO: should we make the terms index more easily
// pluggable? Ie so that this codec would record which
// index impl was used, and switch on loading?
// Or... you must make a new Codec for this?
TermsIndexWriterBase indexWriter;
boolean success = false;
try {
indexWriter = new VariableGapTermsIndexWriter(state, new VariableGapTermsIndexWriter.EveryNTermSelector(termIndexInterval));
success = true;
} finally {
if (!success) {
docs.close();
}
}
success = false;
try {
// Must use BlockTermsWriter (not BlockTree) because
// BlockTree doens't support ords (yet)...
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
success = true;
return ret;
} finally {
if (!success) {
try {
docs.close();
} finally {
indexWriter.close();
}
}
}
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
if (random.nextInt(100) == 0) {
throw new IOException("Fake IOException from PostingsFormat.fieldsConsumer()");
}
return new CrankyFieldsConsumer(delegate.fieldsConsumer(state), random);
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields);
// Write postings
boolean success = false;
try {
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
PostingsFormat format = ent.getKey();
final FieldsGroup group = ent.getValue();
// Exposes only the fields from this group:
Fields maskedFields = new FilterFields(fields) {
@Override
public Iterator<String> iterator() {
return group.fields.iterator();
}
};
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
consumer.write(maskedFields, norms);
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(toClose);
}
}
}
@Override
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
@SuppressWarnings("unchecked") Iterable<String> indexedFieldNames = () ->
new MergedIterator<>(true,
Arrays.stream(mergeState.fieldsProducers).map(FieldsProducer::iterator).toArray(Iterator[]::new));
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(indexedFieldNames);
// Merge postings
PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
boolean success = false;
try {
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
PostingsFormat format = ent.getKey();
final FieldsGroup group = ent.getValue();
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
consumer.merge(pfMergeState.apply(group.fields), norms);
}
success = true;
} finally {
pfMergeState.reset();
if (!success) {
IOUtils.closeWhileHandlingException(toClose);
}
}
}
@Override
public final FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
if (delegatePostingsFormat != null) {
return new MtasFieldsConsumer(
delegatePostingsFormat.fieldsConsumer(state), state, getName(),
delegatePostingsFormat.getName());
} else {
PostingsFormat pf = Codec.forName(delegateCodecName).postingsFormat();
return pf.fieldsConsumer(state);
}
}
@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
return new FieldsConsumer() {
private Map<String, Long> fieldOffsets = new HashMap<>();
@Override
public void close() throws IOException {
try {
/*
* write the offsets per field such that we know where
* we need to load the FSTs from
*/
long pointer = output.getFilePointer();
output.writeVInt(fieldOffsets.size());
for (Map.Entry<String, Long> entry : fieldOffsets.entrySet()) {
output.writeString(entry.getKey());
output.writeVLong(entry.getValue());
}
output.writeLong(pointer);
CodecUtil.writeFooter(output);
} finally {
IOUtils.close(output);
}
}
@Override
public void write(Fields fields) throws IOException {
for(String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator();
PostingsEnum docsEnum = null;
final SuggestPayload spare = new SuggestPayload();
int maxAnalyzedPathsForOneInput = 0;
final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
int docCount = 0;
while (true) {
BytesRef term = termsEnum.next();
if (term == null) {
break;
}
docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS);
builder.startTerm(term);
int docFreq = 0;
while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
for (int i = 0; i < docsEnum.freq(); i++) {
final int position = docsEnum.nextPosition();
AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
// multi fields have the same surface form so we sum up here
maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
}
docFreq++;
docCount = Math.max(docCount, docsEnum.docID()+1);
}
builder.finishTerm(docFreq);
}
/*
* Here we are done processing the field and we can
* buid the FST and write it to disk.
*/
FST<Pair<Long, BytesRef>> build = builder.build();
assert build != null || docCount == 0: "the FST is null but docCount is != 0 actual value: [" + docCount + "]";
/*
* it's possible that the FST is null if we have 2 segments that get merged
* and all docs that have a value in this field are deleted. This will cause
* a consumer to be created but it doesn't consume any values causing the FSTBuilder
* to return null.
*/
if (build != null) {
fieldOffsets.put(field, output.getFilePointer());
build.save(output);
/* write some more meta-info */
output.writeVInt(maxAnalyzedPathsForOneInput);
output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
output.writeInt(maxGraphExpansions); // can be negative
int options = 0;
options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
output.writeVInt(options);
output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
output.writeVInt(XAnalyzingSuggester.END_BYTE);
output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
}
}
}
};
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
throw new UnsupportedOperationException("this codec can only be used for reading");
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return PostingsFormat.forName("Lucene84").fieldsConsumer(state);
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return new SimpleTextFieldsWriter(state);
}
public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
SegmentWriteState state) {
this.delegateFieldsConsumer = fieldsConsumer;
this.state = state;
}
protected FieldsConsumer createUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException {
return new UniformSplitTermsWriter(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder);
}
@Override
protected FieldsConsumer createUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException {
return new STUniformSplitTermsWriter(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder);
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return new AssertingFieldsConsumer(state, in.fieldsConsumer(state));
}
AssertingFieldsConsumer(SegmentWriteState writeState, FieldsConsumer in) {
this.writeState = writeState;
this.in = in;
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
return delegate.fieldsConsumer(state);
}
CrankyFieldsConsumer(FieldsConsumer delegate, Random random) {
this.delegate = delegate;
this.random = random;
}
@Override
public final FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
return new FieldsWriter(state);
}
private void mergeTerms(SegmentWriteState segmentWriteState, NormsProducer norms) throws IOException {
try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState)) {
consumer.merge(mergeState, norms);
}
}