下面列出了org.apache.lucene.index.ConcurrentMergeScheduler#org.apache.lucene.codecs.Codec 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public Optional<Codec> getSegmentCodec(long commitGen, String name) throws LukeException {
try {
SegmentInfos infos = findSegmentInfos(commitGen);
if (infos == null) {
return Optional.empty();
}
return infos.asList().stream()
.filter(seg -> seg.info.name.equals(name))
.findAny()
.map(seg -> seg.info.getCodec());
} catch (IOException e) {
throw new LukeException(String.format(Locale.ENGLISH, "Failed to load segment infos for commit generation %d", commitGen), e);
}
}
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
iwc.setMergePolicy(newLogMergePolicy());
Codec filterCodec = new Lucene86Codec() {
CompletionPostingsFormat.FSTLoadMode fstLoadMode =
RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode);
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (suggestFields.contains(field)) {
return postingsFormat;
}
return super.getPostingsFormatForField(field);
}
};
iwc.setCodec(filterCodec);
return iwc;
}
/** Indexes all fields/terms at the specified
* IndexOptions, and fully tests at that IndexOptions. */
public void testFull(Codec codec, Path path, IndexOptions options, boolean withPayloads) throws Exception {
Directory dir = LuceneTestCase.newFSDirectory(path);
// TODO test thread safety of buildIndex too
FieldsProducer fieldsProducer = buildIndex(codec, dir, options, withPayloads, true);
testFields(fieldsProducer);
IndexOptions[] allOptions = IndexOptions.values();
int maxIndexOption = Arrays.asList(allOptions).indexOf(options);
for(int i=0;i<=maxIndexOption;i++) {
testTerms(fieldsProducer, EnumSet.allOf(Option.class), allOptions[i], options, true);
if (withPayloads) {
// If we indexed w/ payloads, also test enums w/o accessing payloads:
testTerms(fieldsProducer, EnumSet.complementOf(EnumSet.of(Option.PAYLOADS)), allOptions[i], options, true);
}
}
fieldsProducer.close();
dir.close();
}
/** Test field infos attributes coming back are not mutable */
public void testImmutableAttributes() throws Exception {
Directory dir = newDirectory();
Codec codec = getCodec();
SegmentInfo segmentInfo = newSegmentInfo(dir, "_123");
FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null));
FieldInfo fi = builder.getOrAdd("field");
fi.setIndexOptions(TextField.TYPE_STORED.indexOptions());
addAttributes(fi);
fi.putAttribute("foo", "bar");
fi.putAttribute("bar", "baz");
FieldInfos infos = builder.finish();
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
FieldInfos infos2 = codec.fieldInfosFormat().read(dir, segmentInfo, "", IOContext.DEFAULT);
assertEquals(1, infos2.size());
assertNotNull(infos2.fieldInfo("field"));
Map<String,String> attributes = infos2.fieldInfo("field").attributes();
// shouldn't be able to modify attributes
expectThrows(UnsupportedOperationException.class, () -> {
attributes.put("bogus", "bogus");
});
dir.close();
}
public void testReader() throws IOException {
TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg.info, fieldInfos, newIOContext(random()));
for (int j = 0; j < 5; j++) {
Terms vector = reader.get(j).terms(testFields[0]);
assertNotNull(vector);
assertEquals(testTerms.length, vector.size());
TermsEnum termsEnum = vector.iterator();
for (int i = 0; i < testTerms.length; i++) {
final BytesRef text = termsEnum.next();
assertNotNull(text);
String term = text.utf8ToString();
//System.out.println("Term: " + term);
assertEquals(testTerms[i], term);
}
assertNull(termsEnum.next());
}
reader.close();
}
/** Test attributes map */
public void testAttributes() throws Exception {
Directory dir = newDirectory();
Codec codec = getCodec();
byte id[] = StringHelper.randomId();
Map<String,String> attributes = new HashMap<>();
attributes.put("key1", "value1");
attributes.put("key2", "value2");
SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec,
Collections.emptyMap(), id, attributes, null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
assertEquals(attributes, info2.getAttributes());
// attributes map should be immutable
expectThrows(UnsupportedOperationException.class, () -> {
info2.getAttributes().put("bogus", "bogus");
});
dir.close();
}
/**
* Check codec restrictions.
*
* @throws AssumptionViolatedException if the class does not work with a given codec.
*/
private void checkCodecRestrictions(Codec codec) {
assumeFalse("Class not allowed to use codec: " + codec.getName() + ".",
shouldAvoidCodec(codec.getName()));
if (codec instanceof RandomCodec && !avoidCodecs.isEmpty()) {
for (String name : ((RandomCodec)codec).formatNames) {
assumeFalse("Class not allowed to use postings format: " + name + ".",
shouldAvoidCodec(name));
}
}
PostingsFormat pf = codec.postingsFormat();
assumeFalse("Class not allowed to use postings format: " + pf.getName() + ".",
shouldAvoidCodec(pf.getName()));
assumeFalse("Class not allowed to use postings format: " + LuceneTestCase.TEST_POSTINGSFORMAT + ".",
shouldAvoidCodec(LuceneTestCase.TEST_POSTINGSFORMAT));
}
public void testVersionsOneSegment() throws IOException {
BaseDirectoryWrapper dir = newDirectory();
dir.setCheckIndexOnClose(false);
byte id[] = StringHelper.randomId();
Codec codec = Codec.getDefault();
SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId());
sis.add(commitInfo);
sis.commit(dir);
sis = SegmentInfos.readLatestCommit(dir);
assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion());
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
dir.close();
}
/**
* Reloads all Lucene SPI implementations using the new classloader.
* This method must be called after {@link #addToClassLoader(List)}
* and before using this ResourceLoader.
*/
synchronized void reloadLuceneSPI() {
// TODO improve to use a static Set<URL> to check when we need to
if (!needToReloadLuceneSPI) {
return;
}
needToReloadLuceneSPI = false; // reset
log.debug("Reloading Lucene SPI");
// Codecs:
PostingsFormat.reloadPostingsFormats(this.classLoader);
DocValuesFormat.reloadDocValuesFormats(this.classLoader);
Codec.reloadCodecs(this.classLoader);
// Analysis:
CharFilterFactory.reloadCharFilters(this.classLoader);
TokenFilterFactory.reloadTokenFilters(this.classLoader);
TokenizerFactory.reloadTokenizers(this.classLoader);
}
public void testDeleteDoc() throws IOException {
Directory dir = new ByteBuffersDirectory();
SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(),
Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId());
PendingDeletes deletes = newPendingDeletes(commitInfo);
assertNull(deletes.getLiveDocs());
int docToDelete = TestUtil.nextInt(random(), 0, 7);
assertTrue(deletes.delete(docToDelete));
assertNotNull(deletes.getLiveDocs());
assertEquals(1, deletes.numPendingDeletes());
Bits liveDocs = deletes.getLiveDocs();
assertFalse(liveDocs.get(docToDelete));
assertFalse(deletes.delete(docToDelete)); // delete again
assertTrue(liveDocs.get(8));
assertTrue(deletes.delete(8));
assertTrue(liveDocs.get(8)); // we have a snapshot
assertEquals(2, deletes.numPendingDeletes());
assertTrue(liveDocs.get(9));
assertTrue(deletes.delete(9));
assertTrue(liveDocs.get(9));
// now make sure new live docs see the deletions
liveDocs = deletes.getLiveDocs();
assertFalse(liveDocs.get(9));
assertFalse(liveDocs.get(8));
assertFalse(liveDocs.get(docToDelete));
assertEquals(3, deletes.numPendingDeletes());
dir.close();
}
@Test
public void testSegmentCodec_invalid_name() {
CommitsImpl commits = new CommitsImpl(reader, indexDir.toString());
Optional<Codec> codec = commits.getSegmentCodec(1, "xxx");
assertFalse(codec.isPresent());
}
private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile)
throws Exception {
IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)));
SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context);
SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context);
final Codec codec = Codec.getDefault();
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2),
si, InfoStream.getDefault(), trackingDir,
new FieldInfos.FieldNumbers(null), context);
MergeState mergeState = merger.merge();
r1.close();
r2.close();;
si.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
if (useCompoundFile) {
Collection<String> filesToDelete = si.files();
codec.compoundFormat().write(dir, si, context);
si.setUseCompoundFile(true);
for(String name : filesToDelete) {
si1.info.dir.deleteFile(name);
}
}
return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId());
}
/**
* Wrap the provided codec with crankiness.
* Try passing Asserting for the most fun.
*/
public CrankyCodec(Codec delegate, Random random) {
// we impersonate the passed-in codec, so we don't need to be in SPI,
// and so we dont change file formats
super(delegate.getName(), delegate);
this.random = random;
}
/** Test field infos read/write with random fields, with different values. */
public void testRandom() throws Exception {
Directory dir = newDirectory();
Codec codec = getCodec();
SegmentInfo segmentInfo = newSegmentInfo(dir, "_123");
// generate a bunch of fields
int numFields = atLeast(2000);
Set<String> fieldNames = new HashSet<>();
for (int i = 0; i < numFields; i++) {
fieldNames.add(TestUtil.randomUnicodeString(random()));
}
FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null));
for (String field : fieldNames) {
IndexableFieldType fieldType = randomFieldType(random());
FieldInfo fi = builder.getOrAdd(field);
IndexOptions indexOptions = fieldType.indexOptions();
if (indexOptions != IndexOptions.NONE) {
fi.setIndexOptions(indexOptions);
if (fieldType.omitNorms()) {
fi.setOmitsNorms();
}
}
fi.setDocValuesType(fieldType.docValuesType());
if (fieldType.indexOptions() != IndexOptions.NONE && fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
if (random().nextBoolean()) {
fi.setStorePayloads();
}
}
addAttributes(fi);
}
FieldInfos infos = builder.finish();
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
FieldInfos infos2 = codec.fieldInfosFormat().read(dir, segmentInfo, "", IOContext.DEFAULT);
assertEquals(infos, infos2);
dir.close();
}
public void testDocValuesFormats() {
// NOTE: Direct (and Disk) DocValues formats were removed, so we use "Asserting"
// as a way to vet that the configuration actually matters.
Codec codec = h.getCore().getCodec();
Map<String, SchemaField> fields = h.getCore().getLatestSchema().getFields();
SchemaField schemaField = fields.get("string_disk_f");
PerFieldDocValuesFormat format = (PerFieldDocValuesFormat) codec.docValuesFormat();
assertEquals(TestUtil.getDefaultDocValuesFormat().getName(), format.getDocValuesFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_direct_f");
assertEquals("Asserting", format.getDocValuesFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_f");
assertEquals(TestUtil.getDefaultDocValuesFormat().getName(),
format.getDocValuesFormatForField(schemaField.getName()).getName());
}
@Override
protected Codec getCodec() {
return new FilterCodec("Lucene84", Codec.forName("Lucene84")) {
@Override
public SegmentInfoFormat segmentInfoFormat() {
return new Lucene70RWSegmentInfoFormat();
}
};
}
/**
* Reloads all Lucene SPI implementations using the new classloader.
* This method must be called after the new classloader has been created to
* register the services for use.
*/
static void reloadLuceneSPI(ClassLoader loader) {
// do NOT change the order of these method calls!
// Codecs:
PostingsFormat.reloadPostingsFormats(loader);
DocValuesFormat.reloadDocValuesFormats(loader);
Codec.reloadCodecs(loader);
// Analysis:
CharFilterFactory.reloadCharFilters(loader);
TokenFilterFactory.reloadTokenFilters(loader);
TokenizerFactory.reloadTokenizers(loader);
}
public void testPostingsFormats() {
Codec codec = h.getCore().getCodec();
Map<String, SchemaField> fields = h.getCore().getLatestSchema().getFields();
SchemaField schemaField = fields.get("string_direct_f");
PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat();
assertEquals("Direct", format.getPostingsFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_standard_f");
assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_f");
assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField(schemaField.getName()).getName());
}
/**
* After suite cleanup (always invoked).
*/
@Override
protected void after() throws Exception {
Codec.setDefault(savedCodec);
InfoStream.setDefault(savedInfoStream);
if (savedLocale != null) Locale.setDefault(savedLocale);
if (savedTimeZone != null) TimeZone.setDefault(savedTimeZone);
}
public static String getDocValuesFormat(Codec codec, String field) {
DocValuesFormat f = codec.docValuesFormat();
if (f instanceof PerFieldDocValuesFormat) {
return ((PerFieldDocValuesFormat) f).getDocValuesFormatForField(field).getName();
} else {
return f.getName();
}
}
@Override
protected Codec getCodec() {
if (TEST_NIGHTLY) {
return CompressingCodec.randomInstance(random());
} else {
return CompressingCodec.reasonableInstance(random());
}
}
/** Can only be called once. */
public void setCodec(Codec codec) {
assert this.codec == null;
if (codec == null) {
throw new IllegalArgumentException("codec must be non-null");
}
this.codec = codec;
}
private static Codec readCodec(DataInput input) throws IOException {
final String name = input.readString();
try {
return Codec.forName(name);
} catch (IllegalArgumentException e) {
// maybe it's an old default codec that moved
if (name.startsWith("Lucene")) {
throw new IllegalArgumentException("Could not load codec '" + name + "'. Did you forget to add lucene-backward-codecs.jar?", e);
}
throw e;
}
}
/**
* Set the {@link Codec}.
*
* <p>
* Only takes effect when IndexWriter is first created.
*/
public IndexWriterConfig setCodec(Codec codec) {
if (codec == null) {
throw new IllegalArgumentException("codec must not be null");
}
this.codec = codec;
return this;
}
public void testSameCodecDifferentInstance() throws Exception {
Codec codec = new AssertingCodec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {
return new DirectPostingsFormat();
} else if ("date".equals(field)) {
return new DirectPostingsFormat();
} else {
return super.getPostingsFormatForField(field);
}
}
};
doTestMixedPostings(codec);
}
private synchronized IndexWriter getIndexWriter() throws IOException
{
if (_indexWriter == null) {
log.debug("Opening index for project [{}]({})", project.getName(), project.getId());
OPEN_INDEXES.put(project.getId(), this);
// Initialize and populate the hash maps for the layers and features
features = schemaService.listAnnotationFeature(project).stream()
.filter(feat -> feat.getLayer().isEnabled())
.filter(feat -> feat.isEnabled())
.collect(Collectors.toList());
// Add the project id to the configuration
JSONObject jsonParserConfiguration = new JSONObject();
jsonParserConfiguration.put(PARAM_PROJECT_ID, project.getId());
// Tokenizer parameters
Map<String, String> tokenizerArguments = new HashMap<>();
tokenizerArguments.put(ARGUMENT_PARSER, MtasUimaParser.class.getName());
tokenizerArguments.put(ARGUMENT_PARSER_ARGS, jsonParserConfiguration.toString());
// Build analyzer
Analyzer mtasAnalyzer = CustomAnalyzer.builder()
.withTokenizer(MtasTokenizerFactory.class, tokenizerArguments)
.build();
Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
analyzerPerField.put(FIELD_CONTENT, mtasAnalyzer);
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(),
analyzerPerField);
// Build IndexWriter
FileUtils.forceMkdir(getIndexDir());
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setCodec(Codec.forName(MTAS_CODEC_NAME));
IndexWriter indexWriter = new IndexWriter(FSDirectory.open(getIndexDir().toPath()),
config);
// Initialize the index
indexWriter.commit();
// After the index has been initialized, assign the _indexWriter - this is also used
// by isOpen() to check if the index writer is available.
_indexWriter = indexWriter;
}
return _indexWriter;
}
/** Test toString method */
public void testToString() throws Throwable{
SegmentInfo si;
final Directory dir = newDirectory();
Codec codec = Codec.getDefault();
// diagnostics map
Map<String, String> diagnostics = Map.of("key1", "value1", "key2", "value2");
// attributes map
Map<String,String> attributes = Map.of("akey1", "value1", "akey2", "value2");
// diagnostics X, attributes X
si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "TEST", 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), Sort.INDEXORDER);
assertEquals("TEST(" + Version.LATEST.toString() + ")" +
":C10000" +
":[indexSort=<doc>]", si.toString());
// diagnostics O, attributes X
si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "TEST", 10000, false, codec, diagnostics, StringHelper.randomId(), new HashMap<>(), Sort.INDEXORDER);
assertEquals("TEST(" + Version.LATEST.toString() + ")" +
":C10000" +
":[indexSort=<doc>]" +
":[diagnostics=" + diagnostics + "]", si.toString());
// diagnostics X, attributes O
si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "TEST", 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), attributes, Sort.INDEXORDER);
assertEquals("TEST(" + Version.LATEST.toString() + ")" +
":C10000" +
":[indexSort=<doc>]" +
":[attributes=" + attributes + "]", si.toString());
// diagnostics O, attributes O
si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "TEST", 10000, false, codec, diagnostics, StringHelper.randomId(), attributes, Sort.INDEXORDER);
assertEquals("TEST(" + Version.LATEST.toString() + ")" +
":C10000" +
":[indexSort=<doc>]" +
":[diagnostics=" + diagnostics + "]" +
":[attributes=" + attributes + "]", si.toString());
dir.close();
}
@Override
protected Codec getCodec() {
return TestUtil.getDefaultCodec();
}
private static Codec getCodec() {
return Codec.forName("Lucene84");
}
@Override
protected Codec getCodec() {
return codec;
}