下面列出了怎么用org.apache.lucene.index.LogMergePolicy的API类实例代码及写法,或者点击链接到github查看源代码。
private static IndexWriter initWriter(int id, Random random, Path indexPath, boolean doCheckIndexOnClose) throws IOException {
Directory dir = SimpleReplicaNode.getDirectory(random, id, indexPath, doCheckIndexOnClose);
MockAnalyzer analyzer = new MockAnalyzer(random);
analyzer.setMaxTokenLength(TestUtil.nextInt(random, 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(random, analyzer);
MergePolicy mp = iwc.getMergePolicy();
//iwc.setInfoStream(new PrintStreamInfoStream(System.out));
// Force more frequent merging so we stress merge warming:
if (mp instanceof TieredMergePolicy) {
TieredMergePolicy tmp = (TieredMergePolicy) mp;
tmp.setSegmentsPerTier(3);
tmp.setMaxMergeAtOnce(3);
} else if (mp instanceof LogMergePolicy) {
LogMergePolicy lmp = (LogMergePolicy) mp;
lmp.setMergeFactor(3);
}
IndexWriter writer = new IndexWriter(dir, iwc);
TestUtil.reduceOpenFiles(writer);
return writer;
}
/** just tries to configure things to keep the open file
* count lowish */
public static void reduceOpenFiles(IndexWriter w) {
// keep number of open files lowish
MergePolicy mp = w.getConfig().getMergePolicy();
mp.setNoCFSRatio(1.0);
if (mp instanceof LogMergePolicy) {
LogMergePolicy lmp = (LogMergePolicy) mp;
lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
} else if (mp instanceof TieredMergePolicy) {
TieredMergePolicy tmp = (TieredMergePolicy) mp;
tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier()));
}
MergeScheduler ms = w.getConfig().getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
// wtf... shouldnt it be even lower since it's 1 by default?!?!
((ConcurrentMergeScheduler) ms).setMaxMergesAndThreads(3, 2);
}
}
private void implTestLogMergePolicyConfig(String solrConfigFileName,
Class<? extends LogMergePolicy> mpClass) throws Exception {
initCore(solrConfigFileName, "schema-minimal.xml");
IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
// verify some props set to -1 get lucene internal defaults
assertEquals(-1, solrConfig.indexConfig.maxBufferedDocs);
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH,
iwc.getMaxBufferedDocs());
assertEquals(-1, solrConfig.indexConfig.ramBufferSizeMB, 0.0D);
assertEquals(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB,
iwc.getRAMBufferSizeMB(), 0.0D);
LogMergePolicy logMP = assertAndCast(mpClass, iwc.getMergePolicy());
assertEquals(11, logMP.getMergeFactor());
assertEquals(456, logMP.getMaxMergeDocs());
}
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException {
Directory dir = FSDirectory.open(Paths.get(indexPath));
Analyzer analyzer = new SmartChineseAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
mergePolicy.setMergeFactor(50);
mergePolicy.setMaxMergeDocs(5000);
if (create){
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
} else {
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
}
return new IndexWriter(dir, iwc);
}
public void testLogMergePolicyFactoryConfig() throws Exception {
final boolean byteSizeMP = random().nextBoolean();
final Class<? extends LogMergePolicy> mpClass = byteSizeMP
? LogByteSizeMergePolicy.class : LogDocMergePolicy.class;
final Class<? extends MergePolicyFactory> mpfClass = byteSizeMP
? LogByteSizeMergePolicyFactory.class : LogDocMergePolicyFactory.class;
System.setProperty("solr.test.log.merge.policy.factory", mpfClass.getName());
implTestLogMergePolicyConfig("solrconfig-logmergepolicyfactory.xml", mpClass);
}
/**
* Test that IndexWriter settings stick.
*/
public void testIndexWriterSettings() throws Exception {
// 1. alg definition (required in every "logic" test)
String algLines[] = {
"# ----- properties ",
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + getReuters20LinesFile(),
"content.source.log.step=3",
"ram.flush.mb=-1",
"max.buffered=2",
"compound=cmpnd:true:false",
"doc.term.vector=vector:false:true",
"content.source.forever=false",
"directory=ByteBuffersDirectory",
"doc.stored=false",
"merge.factor=3",
"doc.tokenized=false",
"debug.level=1",
"# ----- alg ",
"{ \"Rounds\"",
" ResetSystemErase",
" CreateIndex",
" { \"AddDocs\" AddDoc > : * ",
" NewRound",
"} : 2",
};
// 2. execute the algorithm (required in every "logic" test)
Benchmark benchmark = execBenchmark(algLines);
final IndexWriter writer = benchmark.getRunData().getIndexWriter();
assertEquals(2, writer.getConfig().getMaxBufferedDocs());
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB());
assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor());
assertEquals(0.0d, writer.getConfig().getMergePolicy().getNoCFSRatio(), 0.0);
writer.close();
Directory dir = benchmark.getRunData().getDirectory();
IndexReader reader = DirectoryReader.open(dir);
Fields tfv = reader.getTermVectors(0);
assertNotNull(tfv);
assertTrue(tfv.size() > 0);
reader.close();
}
@Test
public void testOpenIfChangedManySegments() throws Exception {
// test openIfChanged() when the taxonomy contains many segments
Directory dir = newDirectory();
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
@Override
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
IndexWriterConfig conf = super.createIndexWriterConfig(openMode);
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
lmp.setMergeFactor(2);
return conf;
}
};
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
int numRounds = random().nextInt(10) + 10;
int numCategories = 1; // one for root
for (int i = 0; i < numRounds; i++) {
int numCats = random().nextInt(4) + 1;
for (int j = 0; j < numCats; j++) {
writer.addCategory(new FacetLabel(Integer.toString(i), Integer.toString(j)));
}
numCategories += numCats + 1 /* one for round-parent */;
TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
assertNotNull(newtr);
reader.close();
reader = newtr;
// assert categories
assertEquals(numCategories, reader.getSize());
int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i)));
int[] parents = reader.getParallelTaxonomyArrays().parents();
assertEquals(0, parents[roundOrdinal]); // round's parent is root
for (int j = 0; j < numCats; j++) {
int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j)));
assertEquals(roundOrdinal, parents[ord]); // round's parent is root
}
}
reader.close();
writer.close();
dir.close();
}
public void testSubclassConcurrentMergeScheduler() throws IOException {
MockDirectoryWrapper dir = newMockDirectory();
dir.failOn(new FailOnlyOnMerge());
Document doc = new Document();
Field idField = newStringField("id", "", Field.Store.YES);
doc.add(idField);
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()))
.setMergeScheduler(new MyMergeScheduler())
.setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
.setMergePolicy(newLogMergePolicy());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
infoStream = new PrintStreamInfoStream(new PrintStream(baos, true, IOUtils.UTF_8));
iwc.setInfoStream(infoStream);
IndexWriter writer = new IndexWriter(dir, iwc);
LogMergePolicy logMP = (LogMergePolicy) writer.getConfig().getMergePolicy();
logMP.setMergeFactor(10);
for(int i=0;i<20;i++) {
writer.addDocument(doc);
}
try {
((MyMergeScheduler) writer.getConfig().getMergeScheduler()).sync();
} catch (IllegalStateException ise) {
// OK
}
writer.rollback();
try {
assertTrue(mergeThreadCreated);
assertTrue(mergeCalled);
assertTrue(excCalled);
} catch (AssertionError ae) {
System.out.println("TEST FAILED; IW infoStream output:");
System.out.println(baos.toString(IOUtils.UTF_8));
throw ae;
}
dir.close();
}
private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
try {
IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
// Get the field's analyzer
if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
// Do index-time analysis using the given fieldType's analyzer
Directory ramDir = new ByteBuffersDirectory();
LogMergePolicy mp = new LogByteSizeMergePolicy();
mp.setMergeFactor(300);
IndexWriter writer = new IndexWriter(
ramDir,
new IndexWriterConfig(fieldType.getIndexAnalyzer()).
setMaxBufferedDocs(150).
setMergePolicy(mp).
setOpenMode(IndexWriterConfig.OpenMode.CREATE)
// TODO: if we enable this, codec gets angry since field won't exist in the schema
// .setCodec(core.getCodec())
);
List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);
for (String s : lines) {
Document d = new Document();
d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
writer.addDocument(d);
}
writer.forceMerge(1);
writer.close();
dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir),
WORD_FIELD_NAME, 0.0f);
} else {
// check if character encoding is defined
if (characterEncoding == null) {
dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
} else {
dictionary = new PlainTextDictionary(new InputStreamReader(core.getResourceLoader().openResource(sourceLocation), characterEncoding));
}
}
} catch (IOException e) {
log.error( "Unable to load spellings", e);
}
}
@Override
public void close(TaskAttemptContext context) throws IOException {
LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
writeShardNumberFile(context);
heartBeater.needHeartBeat();
try {
Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
// TODO: shouldn't we pull the Version from the solrconfig.xml?
IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
.setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
//.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
//.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
;
if (LOG.isDebugEnabled()) {
writerConfig.setInfoStream(System.out);
}
// writerConfig.setRAMBufferSizeMB(100); // improve performance
// writerConfig.setMaxThreadStates(1);
// disable compound file to improve performance
// also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
// also see defaults in SolrIndexConfig
MergePolicy mergePolicy = writerConfig.getMergePolicy();
LOG.debug("mergePolicy was: {}", mergePolicy);
if (mergePolicy instanceof TieredMergePolicy) {
((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
// ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);
// ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);
// ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
} else if (mergePolicy instanceof LogMergePolicy) {
((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
}
LOG.info("Using mergePolicy: {}", mergePolicy);
IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
Directory[] indexes = new Directory[shards.size()];
for (int i = 0; i < shards.size(); i++) {
indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
}
context.setStatus("Logically merging " + shards.size() + " shards into one shard");
LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
long start = System.nanoTime();
writer.addIndexes(indexes);
// TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename)
// This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
// See https://issues.apache.org/jira/browse/LUCENE-4746
if (LOG.isDebugEnabled()) {
context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
}
float secs = (System.nanoTime() - start) / (float)(10^9);
LOG.info("Logical merge took {} secs", secs);
int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
start = System.nanoTime();
if (maxSegments < Integer.MAX_VALUE) {
writer.forceMerge(maxSegments);
// TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data
// see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
}
if (LOG.isDebugEnabled()) {
context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
}
secs = (System.nanoTime() - start) / (float)(10^9);
LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
start = System.nanoTime();
LOG.info("Optimizing Solr: Closing index writer");
writer.close();
secs = (System.nanoTime() - start) / (float)(10^9);
LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
context.setStatus("Done");
} finally {
heartBeater.cancelHeartBeat();
heartBeater.close();
}
}