下面列出了org.apache.lucene.index.ConcurrentMergeScheduler#org.apache.lucene.codecs.PostingsFormat 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
FieldsProducer postings = PostingsFormat.forName("Lucene84").fieldsProducer(state);
if (state.context.context != IOContext.Context.MERGE) {
FieldsProducer loadedPostings;
try {
postings.checkIntegrity();
loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff);
} finally {
postings.close();
}
return loadedPostings;
} else {
// Don't load postings for merge:
return postings;
}
}
CompletionFieldsConsumer(String codecName, PostingsFormat delegatePostingsFormat, SegmentWriteState state) throws IOException {
this.codecName = codecName;
this.delegatePostingsFormatName = delegatePostingsFormat.getName();
this.state = state;
String dictFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DICT_EXTENSION);
boolean success = false;
try {
this.delegateFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
dictOut = state.directory.createOutput(dictFile, state.context);
CodecUtil.writeIndexHeader(dictOut, codecName, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(dictOut, delegateFieldsConsumer);
}
}
}
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
iwc.setMergePolicy(newLogMergePolicy());
Codec filterCodec = new Lucene86Codec() {
CompletionPostingsFormat.FSTLoadMode fstLoadMode =
RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode);
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (suggestFields.contains(field)) {
return postingsFormat;
}
return super.getPostingsFormatForField(field);
}
};
iwc.setCodec(filterCodec);
return iwc;
}
/**
* Check codec restrictions.
*
* @throws AssumptionViolatedException if the class does not work with a given codec.
*/
private void checkCodecRestrictions(Codec codec) {
assumeFalse("Class not allowed to use codec: " + codec.getName() + ".",
shouldAvoidCodec(codec.getName()));
if (codec instanceof RandomCodec && !avoidCodecs.isEmpty()) {
for (String name : ((RandomCodec)codec).formatNames) {
assumeFalse("Class not allowed to use postings format: " + name + ".",
shouldAvoidCodec(name));
}
}
PostingsFormat pf = codec.postingsFormat();
assumeFalse("Class not allowed to use postings format: " + pf.getName() + ".",
shouldAvoidCodec(pf.getName()));
assumeFalse("Class not allowed to use postings format: " + LuceneTestCase.TEST_POSTINGSFORMAT + ".",
shouldAvoidCodec(LuceneTestCase.TEST_POSTINGSFORMAT));
}
/**
* Reloads all Lucene SPI implementations using the new classloader.
* This method must be called after {@link #addToClassLoader(List)}
* and before using this ResourceLoader.
*/
synchronized void reloadLuceneSPI() {
// TODO improve to use a static Set<URL> to check when we need to
if (!needToReloadLuceneSPI) {
return;
}
needToReloadLuceneSPI = false; // reset
log.debug("Reloading Lucene SPI");
// Codecs:
PostingsFormat.reloadPostingsFormats(this.classLoader);
DocValuesFormat.reloadDocValuesFormats(this.classLoader);
Codec.reloadCodecs(this.classLoader);
// Analysis:
CharFilterFactory.reloadCharFilters(this.classLoader);
TokenFilterFactory.reloadTokenFilters(this.classLoader);
TokenizerFactory.reloadTokenizers(this.classLoader);
}
@Override
public PostingsFormat postingsFormat() {
initDelegate();
if (delegate.postingsFormat() instanceof PerFieldPostingsFormat) {
Codec defaultCodec = Codec.getDefault();
PostingsFormat defaultPostingsFormat = defaultCodec.postingsFormat();
if (defaultPostingsFormat instanceof PerFieldPostingsFormat) {
defaultPostingsFormat = ((PerFieldPostingsFormat) defaultPostingsFormat)
.getPostingsFormatForField(null);
if ((defaultPostingsFormat == null)
|| (defaultPostingsFormat instanceof PerFieldPostingsFormat)) {
// fallback option
return new MtasCodecPostingsFormat(
PostingsFormat.forName("Lucene70"));
} else {
return new MtasCodecPostingsFormat(defaultPostingsFormat);
}
} else {
return new MtasCodecPostingsFormat(defaultPostingsFormat);
}
} else {
return new MtasCodecPostingsFormat(delegate.postingsFormat());
}
}
/**
* Instantiates a new mtas codec postings format.
*
* @param delegate the delegate
*/
public MtasCodecPostingsFormat(PostingsFormat delegate) {
super(MtasCodec.MTAS_CODEC_NAME);
delegateCodecName = delegate.getName();
delegatePostingsFormat = delegate;
// preload to prevent NoClassDefFoundErrors
try {
Class.forName("mtas.codec.payload.MtasPayloadDecoder");
Class.forName("mtas.codec.payload.MtasBitInputStream");
Class.forName("mtas.analysis.token.MtasPosition");
Class.forName("mtas.analysis.token.MtasOffset");
Class.forName("mtas.codec.tree.MtasRBTree");
Class.forName("mtas.codec.MtasTerms");
Class.forName("mtas.codec.util.CodecInfo");
Class.forName("mtas.codec.tree.MtasTreeNodeId");
} catch (ClassNotFoundException e) {
log.error(e);
}
}
public CompletionFieldsProducer(SegmentReadState state) throws IOException {
String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
FieldsProducer delegateProducer = null;
boolean success = false;
try {
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
String providerName = input.readString();
CompletionLookupProvider completionLookupProvider = providers.get(providerName);
if (completionLookupProvider == null) {
throw new IllegalStateException("no provider with name [" + providerName + "] registered");
}
// TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
delegateProducer = delegatePostingsFormat.fieldsProducer(state);
/*
* If we are merging we don't load the FSTs at all such that we
* don't consume so much memory during merge
*/
if (state.context.context != Context.MERGE) {
// TODO: maybe we can do this in a fully lazy fashion based on some configuration
// eventually we should have some kind of curciut breaker that prevents us from going OOM here
// with some configuration
this.lookupFactory = completionLookupProvider.load(input);
} else {
this.lookupFactory = null;
}
this.delegateProducer = delegateProducer;
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(delegateProducer, input);
} else {
IOUtils.close(input);
}
}
}
@Override
public PostingsFormat getPostingsFormatForField(String field) {
final MappedFieldType indexName = mapperService.indexName(field);
if (indexName == null) {
logger.warn("no index mapper found for field: [{}] returning default postings format", field);
} else if (indexName instanceof CompletionFieldMapper.CompletionFieldType) {
// CompletionFieldMapper needs a special postings format
final CompletionFieldMapper.CompletionFieldType fieldType = (CompletionFieldMapper.CompletionFieldType) indexName;
final PostingsFormat defaultFormat = super.getPostingsFormatForField(field);
return fieldType.postingsFormat(defaultFormat);
}
return super.getPostingsFormatForField(field);
}
public synchronized PostingsFormat postingsFormat(PostingsFormat in) {
if (in instanceof Completion090PostingsFormat) {
throw new IllegalStateException("Double wrapping of " + Completion090PostingsFormat.class);
}
if (postingsFormat == null) {
postingsFormat = new Completion090PostingsFormat(in, analyzingSuggestLookupProvider);
}
return postingsFormat;
}
/**
* Reloads all Lucene SPI implementations using the new classloader.
* This method must be called after the new classloader has been created to
* register the services for use.
*/
static void reloadLuceneSPI(ClassLoader loader) {
// do NOT change the order of these method calls!
// Codecs:
PostingsFormat.reloadPostingsFormats(loader);
DocValuesFormat.reloadDocValuesFormats(loader);
Codec.reloadCodecs(loader);
// Analysis:
CharFilterFactory.reloadCharFilters(loader);
TokenFilterFactory.reloadTokenFilters(loader);
TokenizerFactory.reloadTokenizers(loader);
}
public BloomFilteredFieldsProducer(SegmentReadState state)
throws IOException {
String bloomFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
ChecksumIndexInput bloomIn = null;
boolean success = false;
try {
bloomIn = state.directory.openChecksumInput(bloomFileName, state.context);
CodecUtil.checkIndexHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
// // Load the hash function used in the BloomFilter
// hashFunction = HashFunction.forName(bloomIn.readString());
// Load the delegate postings format
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
.readString());
this.delegateFieldsProducer = delegatePostingsFormat
.fieldsProducer(state);
int numBlooms = bloomIn.readInt();
for (int i = 0; i < numBlooms; i++) {
int fieldNum = bloomIn.readInt();
FuzzySet bloom = FuzzySet.deserialize(bloomIn);
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
bloomsByFieldName.put(fieldInfo.name, bloom);
}
CodecUtil.checkFooter(bloomIn);
IOUtils.close(bloomIn);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer);
}
}
}
CompletionFieldsProducer(String codecName, SegmentReadState state, FSTLoadMode fstLoadMode) throws IOException {
String indexFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, INDEX_EXTENSION);
delegateFieldsProducer = null;
boolean success = false;
try (ChecksumIndexInput index = state.directory.openChecksumInput(indexFile, state.context)) {
// open up dict file containing all fsts
String dictFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DICT_EXTENSION);
dictIn = state.directory.openInput(dictFile, state.context);
CodecUtil.checkIndexHeader(dictIn, codecName, COMPLETION_CODEC_VERSION, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
// just validate the footer for the dictIn
CodecUtil.retrieveChecksum(dictIn);
// open up index file (fieldNumber, offset)
CodecUtil.checkIndexHeader(index, codecName, COMPLETION_CODEC_VERSION, COMPLETION_VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
// load delegate PF
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(index.readString());
delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state);
// read suggest field numbers and their offsets in the terms file from index
int numFields = index.readVInt();
readers = new HashMap<>(numFields);
for (int i = 0; i < numFields; i++) {
int fieldNumber = index.readVInt();
long offset = index.readVLong();
long minWeight = index.readVLong();
long maxWeight = index.readVLong();
byte type = index.readByte();
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNumber);
// we don't load the FST yet
readers.put(fieldInfo.name, new CompletionsTermsReader(dictIn, offset, minWeight, maxWeight, type, fstLoadMode));
}
CodecUtil.checkFooter(index);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(delegateFieldsProducer, dictIn);
}
}
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsFormat delegatePostingsFormat = delegatePostingsFormat();
if (delegatePostingsFormat == null) {
throw new UnsupportedOperationException("Error - " + getClass().getName()
+ " has been constructed without a choice of PostingsFormat");
}
return new CompletionFieldsConsumer(getName(), delegatePostingsFormat, state);
}
CrankyPostingsFormat(PostingsFormat delegate, Random random) {
// we impersonate the passed-in codec, so we don't need to be in SPI,
// and so we dont change file formats
super(delegate.getName());
this.delegate = delegate;
this.random = random;
}
@Override
public PostingsFormat getPostingsFormatForField(String name) {
PostingsFormat codec = previousMappings.get(name);
if (codec == null) {
codec = formats.get(Math.abs(perFieldSeed ^ name.hashCode()) % formats.size());
previousMappings.put(name, codec);
// Safety:
assert previousMappings.size() < 10000: "test went insane";
}
return codec;
}
private final void add(Set<String> avoidCodecs, PostingsFormat... postings) {
for (PostingsFormat p : postings) {
if (!avoidCodecs.contains(p.getName())) {
formats.add(p);
formatNames.add(p.getName());
}
}
}
/** Return a Codec that can read any of the
* default codecs and formats, but always writes in the specified
* format. */
public static Codec alwaysPostingsFormat(final PostingsFormat format) {
// TODO: we really need for postings impls etc to announce themselves
// (and maybe their params, too) to infostream on flush and merge.
// otherwise in a real debugging situation we won't know whats going on!
if (LuceneTestCase.VERBOSE) {
System.out.println("forcing postings format to:" + format);
}
return new AssertingCodec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return format;
}
};
}
/** Returns a random postings format that supports term ordinals */
public static PostingsFormat getPostingsFormatWithOrds(Random r) {
switch (r.nextInt(2)) {
case 0: return new LuceneFixedGap();
case 1: return new BlockTreeOrdsPostingsFormat();
// TODO: these don't actually support ords!
//case 2: return new FSTOrdPostingsFormat();
default: throw new AssertionError();
}
}
public static String getPostingsFormat(Codec codec, String field) {
PostingsFormat p = codec.postingsFormat();
if (p instanceof PerFieldPostingsFormat) {
return ((PerFieldPostingsFormat)p).getPostingsFormatForField(field).getName();
} else {
return p.getName();
}
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(fields);
// Write postings
boolean success = false;
try {
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
PostingsFormat format = ent.getKey();
final FieldsGroup group = ent.getValue();
// Exposes only the fields from this group:
Fields maskedFields = new FilterFields(fields) {
@Override
public Iterator<String> iterator() {
return group.fields.iterator();
}
};
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
consumer.write(maskedFields, norms);
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(toClose);
}
}
}
@Override
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
@SuppressWarnings("unchecked") Iterable<String> indexedFieldNames = () ->
new MergedIterator<>(true,
Arrays.stream(mergeState.fieldsProducers).map(FieldsProducer::iterator).toArray(Iterator[]::new));
Map<PostingsFormat, FieldsGroup> formatToGroups = buildFieldsGroupMapping(indexedFieldNames);
// Merge postings
PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
boolean success = false;
try {
for (Map.Entry<PostingsFormat, FieldsGroup> ent : formatToGroups.entrySet()) {
PostingsFormat format = ent.getKey();
final FieldsGroup group = ent.getValue();
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
consumer.merge(pfMergeState.apply(group.fields), norms);
}
success = true;
} finally {
pfMergeState.reset();
if (!success) {
IOUtils.closeWhileHandlingException(toClose);
}
}
}
public FieldsReader(final SegmentReadState readState) throws IOException {
// Read _X.per and init each format:
boolean success = false;
try {
// Read field name -> format name
for (FieldInfo fi : readState.fieldInfos) {
if (fi.getIndexOptions() != IndexOptions.NONE) {
final String fieldName = fi.name;
final String formatName = fi.getAttribute(PER_FIELD_FORMAT_KEY);
if (formatName != null) {
// null formatName means the field is in fieldInfos, but has no postings!
final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY);
if (suffix == null) {
throw new IllegalStateException("missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
}
PostingsFormat format = PostingsFormat.forName(formatName);
String segmentSuffix = getSuffix(formatName, suffix);
if (!formats.containsKey(segmentSuffix)) {
formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix)));
}
fields.put(fieldName, formats.get(segmentSuffix));
}
}
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(formats.values());
}
}
this.segment = readState.segmentInfo.name;
}
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("id")) {
return direct;
} else {
return luceneDefault;
}
}
public void testSameCodecDifferentInstance() throws Exception {
Codec codec = new AssertingCodec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {
return new DirectPostingsFormat();
} else if ("date".equals(field)) {
return new DirectPostingsFormat();
} else {
return super.getPostingsFormatForField(field);
}
}
};
doTestMixedPostings(codec);
}
public void testSameCodecDifferentParams() throws Exception {
Codec codec = new AssertingCodec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {
return new LuceneVarGapFixedInterval(1);
} else if ("date".equals(field)) {
return new LuceneVarGapFixedInterval(2);
} else {
return super.getPostingsFormatForField(field);
}
}
};
doTestMixedPostings(codec);
}
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("id")) {
return directFormat;
} else {
return defaultFormat;
}
}
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("field2") || field.equals("field1") || field.equals("id")) {
return defaultFormat;
} else {
return ramFormat;
}
}
@Override
public final FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
if (delegatePostingsFormat != null) {
return new MtasFieldsConsumer(
delegatePostingsFormat.fieldsConsumer(state), state, getName(),
delegatePostingsFormat.getName());
} else {
PostingsFormat pf = Codec.forName(delegateCodecName).postingsFormat();
return pf.fieldsConsumer(state);
}
}
@Override
public PostingsFormat getPostingsFormatForField(String field) {
final MappedFieldType fieldType = mapperService.fullName(field);
if (fieldType == null) {
logger.warn("no index mapper found for field: [{}] returning default postings format", field);
}
return super.getPostingsFormatForField(field);
}