下面列出了怎么用org.apache.lucene.index.LogByteSizeMergePolicy的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Index all text files under a directory.
*/
public void main(final File INDEX_DIR, final String[] args) throws IOException {
IndexWriterConfig IWConfig = new IndexWriterConfig();
IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
for (int arg = 0; arg < args.length; arg++) {
final File docDir = new File(args[arg]);
if (!docDir.exists() || !docDir.canRead()) {
System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
throw new IOException("Cannot read from document directory");
}
indexDocs(writer, docDir);
System.out.println("Optimizing...");
writer.forceMerge(1);
}
writer.close();
}
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException {
Directory dir = FSDirectory.open(Paths.get(indexPath));
Analyzer analyzer = new SmartChineseAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
mergePolicy.setMergeFactor(50);
mergePolicy.setMaxMergeDocs(5000);
if (create){
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
} else {
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
}
return new IndexWriter(dir, iwc);
}
public void testLogMergePolicyFactoryConfig() throws Exception {
final boolean byteSizeMP = random().nextBoolean();
final Class<? extends LogMergePolicy> mpClass = byteSizeMP
? LogByteSizeMergePolicy.class : LogDocMergePolicy.class;
final Class<? extends MergePolicyFactory> mpfClass = byteSizeMP
? LogByteSizeMergePolicyFactory.class : LogDocMergePolicyFactory.class;
System.setProperty("solr.test.log.merge.policy.factory", mpfClass.getName());
implTestLogMergePolicyConfig("solrconfig-logmergepolicyfactory.xml", mpClass);
}
@Test
public void testOpenIfChangedMergedSegment() throws Exception {
// test openIfChanged() when all index segments were merged - used to be
// a bug in ParentArray, caught by testOpenIfChangedManySegments - only
// this test is not random
Directory dir = newDirectory();
// hold onto IW to forceMerge
// note how we don't close it, since DTW will close it.
final IndexWriter iw = new IndexWriter(dir,
new IndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(new LogByteSizeMergePolicy()));
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
@Override
protected IndexWriter openIndexWriter(Directory directory,
IndexWriterConfig config) throws IOException {
return iw;
}
};
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
assertEquals(1, reader.getSize());
assertEquals(1, reader.getParallelTaxonomyArrays().parents().length);
// add category and call forceMerge -- this should flush IW and merge segments down to 1
// in ParentArray.initFromReader, this used to fail assuming there are no parents.
writer.addCategory(new FacetLabel("1"));
iw.forceMerge(1);
// now calling openIfChanged should trip on the bug
TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
assertNotNull(newtr);
reader.close();
reader = newtr;
assertEquals(2, reader.getSize());
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
reader.close();
writer.close();
dir.close();
}
@Test
public void testOpenIfChangedNoChangesButSegmentMerges() throws Exception {
// test openIfChanged() when the taxonomy hasn't really changed, but segments
// were merged. The NRT reader will be reopened, and ParentArray used to assert
// that the new reader contains more ordinals than were given from the old
// TaxReader version
Directory dir = newDirectory();
// hold onto IW to forceMerge
// note how we don't close it, since DTW will close it.
final IndexWriter iw = new IndexWriter(dir,
new IndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(new LogByteSizeMergePolicy()));
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
@Override
protected IndexWriter openIndexWriter(Directory directory,
IndexWriterConfig config) throws IOException {
return iw;
}
};
// add a category so that the following DTR open will cause a flush and
// a new segment will be created
writer.addCategory(new FacetLabel("a"));
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
assertEquals(2, reader.getSize());
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
// merge all the segments so that NRT reader thinks there's a change
iw.forceMerge(1);
// now calling openIfChanged should trip on the wrong assert in ParetArray's ctor
TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
assertNotNull(newtr);
reader.close();
reader = newtr;
assertEquals(2, reader.getSize());
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
reader.close();
writer.close();
dir.close();
}
@Override
protected MergePolicy getMergePolicyInstance() {
return new LogByteSizeMergePolicy();
}
private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
try {
IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
// Get the field's analyzer
if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
// Do index-time analysis using the given fieldType's analyzer
Directory ramDir = new ByteBuffersDirectory();
LogMergePolicy mp = new LogByteSizeMergePolicy();
mp.setMergeFactor(300);
IndexWriter writer = new IndexWriter(
ramDir,
new IndexWriterConfig(fieldType.getIndexAnalyzer()).
setMaxBufferedDocs(150).
setMergePolicy(mp).
setOpenMode(IndexWriterConfig.OpenMode.CREATE)
// TODO: if we enable this, codec gets angry since field won't exist in the schema
// .setCodec(core.getCodec())
);
List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);
for (String s : lines) {
Document d = new Document();
d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
writer.addDocument(d);
}
writer.forceMerge(1);
writer.close();
dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir),
WORD_FIELD_NAME, 0.0f);
} else {
// check if character encoding is defined
if (characterEncoding == null) {
dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
} else {
dictionary = new PlainTextDictionary(new InputStreamReader(core.getResourceLoader().openResource(sourceLocation), characterEncoding));
}
}
} catch (IOException e) {
log.error( "Unable to load spellings", e);
}
}
/**
* Create the {@link IndexWriterConfig} that would be used for opening the internal index writer.
* <br>Extensions can configure the {@link IndexWriter} as they see fit,
* including setting a {@link org.apache.lucene.index.MergeScheduler merge-scheduler}, or
* {@link org.apache.lucene.index.IndexDeletionPolicy deletion-policy}, different RAM size
* etc.<br>
* <br><b>NOTE:</b> internal docids of the configured index must not be altered.
* For that, categories are never deleted from the taxonomy index.
* In addition, merge policy in effect must not merge none adjacent segments.
*
* @see #openIndexWriter(Directory, IndexWriterConfig)
*
* @param openMode see {@link OpenMode}
*/
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
// TODO: should we use a more optimized Codec?
// The taxonomy has a unique structure, where each term is associated with one document
// Make sure we use a MergePolicy which always merges adjacent segments and thus
// keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
return new IndexWriterConfig(null).setOpenMode(openMode).setMergePolicy(
new LogByteSizeMergePolicy());
}