下面列出了org.apache.lucene.index.IndexWriterConfig#getMergePolicy ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private static IndexWriter initWriter(int id, Random random, Path indexPath, boolean doCheckIndexOnClose) throws IOException {
Directory dir = SimpleReplicaNode.getDirectory(random, id, indexPath, doCheckIndexOnClose);
MockAnalyzer analyzer = new MockAnalyzer(random);
analyzer.setMaxTokenLength(TestUtil.nextInt(random, 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(random, analyzer);
MergePolicy mp = iwc.getMergePolicy();
//iwc.setInfoStream(new PrintStreamInfoStream(System.out));
// Force more frequent merging so we stress merge warming:
if (mp instanceof TieredMergePolicy) {
TieredMergePolicy tmp = (TieredMergePolicy) mp;
tmp.setSegmentsPerTier(3);
tmp.setMaxMergeAtOnce(3);
} else if (mp instanceof LogMergePolicy) {
LogMergePolicy lmp = (LogMergePolicy) mp;
lmp.setMergeFactor(3);
}
IndexWriter writer = new IndexWriter(dir, iwc);
TestUtil.reduceOpenFiles(writer);
return writer;
}
@Test
public void testTieredMPSolrIndexConfigCreation() throws Exception {
String solrConfigFileName = solrConfigFileNameTieredMergePolicyFactory;
SolrConfig solrConfig = new SolrConfig(instanceDir, solrConfigFileName);
SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema(schemaFileName, solrConfig);
h.getCore().setLatestSchema(indexSchema);
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());
assertNotNull("null mp", iwc.getMergePolicy());
assertTrue("mp is not TieredMergePolicy", iwc.getMergePolicy() instanceof TieredMergePolicy);
TieredMergePolicy mp = (TieredMergePolicy) iwc.getMergePolicy();
assertEquals("mp.maxMergeAtOnceExplicit", 19, mp.getMaxMergeAtOnceExplicit());
assertEquals("mp.segmentsPerTier",9,(int)mp.getSegmentsPerTier());
assertNotNull("null ms", iwc.getMergeScheduler());
assertTrue("ms is not CMS", iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler);
ConcurrentMergeScheduler ms = (ConcurrentMergeScheduler) iwc.getMergeScheduler();
assertEquals("ms.maxMergeCount", 987, ms.getMaxMergeCount());
assertEquals("ms.maxThreadCount", 42, ms.getMaxThreadCount());
assertEquals("ms.isAutoIOThrottle", true, ms.getAutoIOThrottle());
}
public void testSortingMPSolrIndexConfigCreation() throws Exception {
final String expectedFieldName = "timestamp_i_dvo";
final SortField.Type expectedFieldType = SortField.Type.INT;
final boolean expectedFieldSortDescending = true;
SolrConfig solrConfig = new SolrConfig(instanceDir, solrConfigFileNameSortingMergePolicyFactory);
SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
assertNotNull(solrIndexConfig);
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema(schemaFileName, solrConfig);
h.getCore().setLatestSchema(indexSchema);
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());
final MergePolicy mergePolicy = iwc.getMergePolicy();
assertNotNull("null mergePolicy", mergePolicy);
assertTrue("mergePolicy ("+mergePolicy+") is not a SortingMergePolicy", mergePolicy instanceof SortingMergePolicy);
final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) mergePolicy;
final Sort expected = new Sort(new SortField(expectedFieldName, expectedFieldType, expectedFieldSortDescending));
final Sort actual = sortingMergePolicy.getSort();
assertEquals("SortingMergePolicy.getSort", expected, actual);
}
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
throws IOException {
HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
mergePolicy.setUseCompoundFile(false);
IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);
Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
assertEquals(i, partition);
Document doc = getDoc(i);
indexWriter.addDocument(doc);
indexWriter.close();
}
private void setupWriter(Configuration configuration) throws IOException {
TableDescriptor tableDescriptor = new TableDescriptor();
tableDescriptor.setName("test-table");
String uuid = UUID.randomUUID().toString();
tableDescriptor.setTableUri(new Path(_base, "table-table").toUri().toString());
tableDescriptor.setShardCount(2);
TableContext tableContext = TableContext.create(tableDescriptor);
ShardContext shardContext = ShardContext.create(tableContext, "shard-00000000");
Path tablePath = new Path(_base, "table-table");
_shardPath = new Path(tablePath, "shard-00000000");
String indexDirName = "index_" + uuid;
_path = new Path(_shardPath, indexDirName + ".commit");
_fileSystem.mkdirs(_path);
_badRowIdsPath = new Path(_shardPath, indexDirName + ".badrowids");
_badIndexPath = new Path(_shardPath, indexDirName + ".badindex");
_inUsePath = new Path(_shardPath, indexDirName + ".inuse");
Directory commitDirectory = new HdfsDirectory(configuration, _path);
_mainDirectory = new HdfsDirectory(configuration, _shardPath);
_fieldManager = tableContext.getFieldManager();
Analyzer analyzerForIndex = _fieldManager.getAnalyzerForIndex();
IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, analyzerForIndex);
// conf.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
mergePolicy.setUseCompoundFile(false);
_commitWriter = new IndexWriter(commitDirectory, conf.clone());
// Make sure there's an empty index...
new IndexWriter(_mainDirectory, conf.clone()).close();
_mainWriter = new IndexWriter(_mainDirectory, conf.clone());
BufferStore.initNewBuffer(128, 128 * 128);
_indexImporter = new IndexImporter(_timer, getBlurIndex(shardContext, _mainDirectory), shardContext,
TimeUnit.MINUTES, 10, 10, null, _mainDirectory);
}
@Test
public void testOpenIfChangedManySegments() throws Exception {
// test openIfChanged() when the taxonomy contains many segments
Directory dir = newDirectory();
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
@Override
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
IndexWriterConfig conf = super.createIndexWriterConfig(openMode);
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
lmp.setMergeFactor(2);
return conf;
}
};
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
int numRounds = random().nextInt(10) + 10;
int numCategories = 1; // one for root
for (int i = 0; i < numRounds; i++) {
int numCats = random().nextInt(4) + 1;
for (int j = 0; j < numCats; j++) {
writer.addCategory(new FacetLabel(Integer.toString(i), Integer.toString(j)));
}
numCategories += numCats + 1 /* one for round-parent */;
TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
assertNotNull(newtr);
reader.close();
reader = newtr;
// assert categories
assertEquals(numCategories, reader.getSize());
int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i)));
int[] parents = reader.getParallelTaxonomyArrays().parents();
assertEquals(0, parents[roundOrdinal]); // round's parent is root
for (int j = 0; j < numCats; j++) {
int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j)));
assertEquals(roundOrdinal, parents[ord]); // round's parent is root
}
}
reader.close();
writer.close();
dir.close();
}
private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS) throws Exception {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random);
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
final MergePolicy mp = conf.getMergePolicy();
mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0);
IndexWriter writer = new IndexWriter(directory, conf);
if (VERBOSE) {
System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS);
}
for (int j = 0; j < MAX_DOCS; j++) {
Document d = new Document();
d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES));
d.add(new StoredField(ID_FIELD, j));
d.add(new NumericDocValuesField(ID_FIELD, j));
writer.addDocument(d);
}
writer.close();
// try a search without OR
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = newSearcher(reader);
Query query = new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY));
out.println("Query: " + query.toString(PRIORITY_FIELD));
if (VERBOSE) {
System.out.println("TEST: search query=" + query);
}
final Sort sort = new Sort(SortField.FIELD_SCORE,
new SortField(ID_FIELD, SortField.Type.INT));
ScoreDoc[] hits = searcher.search(query, MAX_DOCS, sort).scoreDocs;
printHits(out, hits, searcher);
checkHits(hits, MAX_DOCS, searcher);
// try a new search with OR
searcher = newSearcher(reader);
hits = null;
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
booleanQuery.add(new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)), BooleanClause.Occur.SHOULD);
booleanQuery.add(new TermQuery(new Term(PRIORITY_FIELD, MED_PRIORITY)), BooleanClause.Occur.SHOULD);
out.println("Query: " + booleanQuery.build().toString(PRIORITY_FIELD));
hits = searcher.search(booleanQuery.build(), MAX_DOCS, sort).scoreDocs;
printHits(out, hits, searcher);
checkHits(hits, MAX_DOCS, searcher);
reader.close();
directory.close();
}
private void createRandomIndex(boolean singleSortedSegment) throws IOException {
dir = newDirectory();
numDocs = atLeast(150);
final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
Set<String> randomTerms = new HashSet<>();
while (randomTerms.size() < numTerms) {
randomTerms.add(TestUtil.randomSimpleString(random()));
}
terms = new ArrayList<>(randomTerms);
final long seed = random().nextLong();
final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
// MockRandomMP randomly wraps the leaf readers which makes merging angry
iwc.setMergePolicy(newTieredMergePolicy());
}
iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
iwc.setIndexSort(sort);
iw = new RandomIndexWriter(new Random(seed), dir, iwc);
iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
for (int i = 0; i < numDocs; ++i) {
final Document doc = randomDocument();
iw.addDocument(doc);
if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
iw.commit();
}
if (random().nextInt(15) == 0) {
final String term = RandomPicks.randomFrom(random(), terms);
iw.deleteDocuments(new Term("s", term));
}
}
if (singleSortedSegment) {
iw.forceMerge(1);
}
else if (random().nextBoolean()) {
iw.forceMerge(FORCE_MERGE_MAX_SEGMENT_COUNT);
}
reader = iw.getReader();
if (reader.numDocs() == 0) {
iw.addDocument(new Document());
reader.close();
reader = iw.getReader();
}
}
@Override
public void close(TaskAttemptContext context) throws IOException {
LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
writeShardNumberFile(context);
heartBeater.needHeartBeat();
try {
Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
// TODO: shouldn't we pull the Version from the solrconfig.xml?
IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
.setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
//.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
//.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
;
if (LOG.isDebugEnabled()) {
writerConfig.setInfoStream(System.out);
}
// writerConfig.setRAMBufferSizeMB(100); // improve performance
// writerConfig.setMaxThreadStates(1);
// disable compound file to improve performance
// also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
// also see defaults in SolrIndexConfig
MergePolicy mergePolicy = writerConfig.getMergePolicy();
LOG.debug("mergePolicy was: {}", mergePolicy);
if (mergePolicy instanceof TieredMergePolicy) {
((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
// ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);
// ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);
// ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
} else if (mergePolicy instanceof LogMergePolicy) {
((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
}
LOG.info("Using mergePolicy: {}", mergePolicy);
IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
Directory[] indexes = new Directory[shards.size()];
for (int i = 0; i < shards.size(); i++) {
indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
}
context.setStatus("Logically merging " + shards.size() + " shards into one shard");
LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
long start = System.nanoTime();
writer.addIndexes(indexes);
// TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename)
// This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
// See https://issues.apache.org/jira/browse/LUCENE-4746
if (LOG.isDebugEnabled()) {
context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
}
float secs = (System.nanoTime() - start) / (float)(10^9);
LOG.info("Logical merge took {} secs", secs);
int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
start = System.nanoTime();
if (maxSegments < Integer.MAX_VALUE) {
writer.forceMerge(maxSegments);
// TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data
// see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
}
if (LOG.isDebugEnabled()) {
context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
}
secs = (System.nanoTime() - start) / (float)(10^9);
LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
start = System.nanoTime();
LOG.info("Optimizing Solr: Closing index writer");
writer.close();
secs = (System.nanoTime() - start) / (float)(10^9);
LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
context.setStatus("Done");
} finally {
heartBeater.cancelHeartBeat();
heartBeater.close();
}
}
@Test
public void testMulipleCommitsAndReopens() throws IOException {
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
conf.setMergeScheduler(new SerialMergeScheduler());
TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
mergePolicy.setUseCompoundFile(false);
Set<String> fileSet = new TreeSet<String>();
long seed = new Random().nextLong();
System.out.println("Seed:" + seed);
Random random = new Random(seed);
int docCount = 0;
int passes = 10;
byte[] segmentsGenContents = null;
for (int run = 0; run < passes; run++) {
final FastHdfsKeyValueDirectory directory = new FastHdfsKeyValueDirectory(false, _timer, _configuration,
new Path(_path, "test_multiple_commits_reopens"));
if (segmentsGenContents != null) {
byte[] segmentsGenContentsCurrent = readSegmentsGen(directory);
assertTrue(Arrays.equals(segmentsGenContents, segmentsGenContentsCurrent));
}
assertFiles(fileSet, run, -1, directory);
assertEquals(docCount, getDocumentCount(directory));
IndexWriter writer = new IndexWriter(directory, conf.clone());
int numberOfCommits = random.nextInt(100);
for (int i = 0; i < numberOfCommits; i++) {
assertFiles(fileSet, run, i, directory);
addDocuments(writer, random.nextInt(100));
// Before Commit
writer.commit();
// After Commit
// Set files after commit
{
fileSet.clear();
List<IndexCommit> listCommits = DirectoryReader.listCommits(directory);
assertEquals(1, listCommits.size());
IndexCommit indexCommit = listCommits.get(0);
fileSet.addAll(indexCommit.getFileNames());
}
segmentsGenContents = readSegmentsGen(directory);
}
docCount = getDocumentCount(directory);
}
}