org.apache.lucene.index.IndexWriterConfig#getMergePolicy ( )源码实例Demo

下面列出了org.apache.lucene.index.IndexWriterConfig#getMergePolicy ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: lucene-solr   文件: SimplePrimaryNode.java
private static IndexWriter initWriter(int id, Random random, Path indexPath, boolean doCheckIndexOnClose) throws IOException {
  Directory dir = SimpleReplicaNode.getDirectory(random, id, indexPath, doCheckIndexOnClose);

  MockAnalyzer analyzer = new MockAnalyzer(random);
  analyzer.setMaxTokenLength(TestUtil.nextInt(random, 1, IndexWriter.MAX_TERM_LENGTH));
  IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(random, analyzer);

  MergePolicy mp = iwc.getMergePolicy();
  //iwc.setInfoStream(new PrintStreamInfoStream(System.out));

  // Force more frequent merging so we stress merge warming:
  if (mp instanceof TieredMergePolicy) {
    TieredMergePolicy tmp = (TieredMergePolicy) mp;
    tmp.setSegmentsPerTier(3);
    tmp.setMaxMergeAtOnce(3);
  } else if (mp instanceof LogMergePolicy) {
    LogMergePolicy lmp = (LogMergePolicy) mp;
    lmp.setMergeFactor(3);
  }

  IndexWriter writer = new IndexWriter(dir, iwc);

  TestUtil.reduceOpenFiles(writer);
  return writer;
}
 
源代码2 项目: lucene-solr   文件: SolrIndexConfigTest.java
@Test
public void testTieredMPSolrIndexConfigCreation() throws Exception {
  String solrConfigFileName = solrConfigFileNameTieredMergePolicyFactory;
  SolrConfig solrConfig = new SolrConfig(instanceDir, solrConfigFileName);
  SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
  IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema(schemaFileName, solrConfig);
  
  h.getCore().setLatestSchema(indexSchema);
  IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());

  assertNotNull("null mp", iwc.getMergePolicy());
  assertTrue("mp is not TieredMergePolicy", iwc.getMergePolicy() instanceof TieredMergePolicy);
  TieredMergePolicy mp = (TieredMergePolicy) iwc.getMergePolicy();
  assertEquals("mp.maxMergeAtOnceExplicit", 19, mp.getMaxMergeAtOnceExplicit());
  assertEquals("mp.segmentsPerTier",9,(int)mp.getSegmentsPerTier());

  assertNotNull("null ms", iwc.getMergeScheduler());
  assertTrue("ms is not CMS", iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler);
  ConcurrentMergeScheduler ms = (ConcurrentMergeScheduler)  iwc.getMergeScheduler();
  assertEquals("ms.maxMergeCount", 987, ms.getMaxMergeCount());
  assertEquals("ms.maxThreadCount", 42, ms.getMaxThreadCount());
  assertEquals("ms.isAutoIOThrottle", true, ms.getAutoIOThrottle());

}
 
源代码3 项目: lucene-solr   文件: SolrIndexConfigTest.java
public void testSortingMPSolrIndexConfigCreation() throws Exception {
  final String expectedFieldName = "timestamp_i_dvo";
  final SortField.Type expectedFieldType = SortField.Type.INT;
  final boolean expectedFieldSortDescending = true;

  SolrConfig solrConfig = new SolrConfig(instanceDir, solrConfigFileNameSortingMergePolicyFactory);
  SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
  assertNotNull(solrIndexConfig);
  IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema(schemaFileName, solrConfig);

  h.getCore().setLatestSchema(indexSchema);
  IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());

  final MergePolicy mergePolicy = iwc.getMergePolicy();
  assertNotNull("null mergePolicy", mergePolicy);
  assertTrue("mergePolicy ("+mergePolicy+") is not a SortingMergePolicy", mergePolicy instanceof SortingMergePolicy);
  final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) mergePolicy;
  final Sort expected = new Sort(new SortField(expectedFieldName, expectedFieldType, expectedFieldSortDescending));
  final Sort actual = sortingMergePolicy.getSort();
  assertEquals("SortingMergePolicy.getSort", expected, actual);
}
 
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
    throws IOException {
  HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);

  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
  assertEquals(i, partition);

  Document doc = getDoc(i);
  indexWriter.addDocument(doc);
  indexWriter.close();
}
 
private void setupWriter(Configuration configuration) throws IOException {
  TableDescriptor tableDescriptor = new TableDescriptor();
  tableDescriptor.setName("test-table");
  String uuid = UUID.randomUUID().toString();

  tableDescriptor.setTableUri(new Path(_base, "table-table").toUri().toString());
  tableDescriptor.setShardCount(2);

  TableContext tableContext = TableContext.create(tableDescriptor);
  ShardContext shardContext = ShardContext.create(tableContext, "shard-00000000");
  Path tablePath = new Path(_base, "table-table");
  _shardPath = new Path(tablePath, "shard-00000000");
  String indexDirName = "index_" + uuid;
  _path = new Path(_shardPath, indexDirName + ".commit");
  _fileSystem.mkdirs(_path);
  _badRowIdsPath = new Path(_shardPath, indexDirName + ".badrowids");
  _badIndexPath = new Path(_shardPath, indexDirName + ".badindex");
  _inUsePath = new Path(_shardPath, indexDirName + ".inuse");
  Directory commitDirectory = new HdfsDirectory(configuration, _path);
  _mainDirectory = new HdfsDirectory(configuration, _shardPath);
  _fieldManager = tableContext.getFieldManager();
  Analyzer analyzerForIndex = _fieldManager.getAnalyzerForIndex();
  IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, analyzerForIndex);
  // conf.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  _commitWriter = new IndexWriter(commitDirectory, conf.clone());

  // Make sure there's an empty index...
  new IndexWriter(_mainDirectory, conf.clone()).close();
  _mainWriter = new IndexWriter(_mainDirectory, conf.clone());
  BufferStore.initNewBuffer(128, 128 * 128);

  _indexImporter = new IndexImporter(_timer, getBlurIndex(shardContext, _mainDirectory), shardContext,
      TimeUnit.MINUTES, 10, 10, null, _mainDirectory);
}
 
源代码6 项目: lucene-solr   文件: TestDirectoryTaxonomyReader.java
@Test
public void testOpenIfChangedManySegments() throws Exception {
  // test openIfChanged() when the taxonomy contains many segments
  Directory dir = newDirectory();
  
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
    @Override
    protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
      IndexWriterConfig conf = super.createIndexWriterConfig(openMode);
      LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
      lmp.setMergeFactor(2);
      return conf;
    }
  };
  TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
  
  int numRounds = random().nextInt(10) + 10;
  int numCategories = 1; // one for root
  for (int i = 0; i < numRounds; i++) {
    int numCats = random().nextInt(4) + 1;
    for (int j = 0; j < numCats; j++) {
      writer.addCategory(new FacetLabel(Integer.toString(i), Integer.toString(j)));
    }
    numCategories += numCats + 1 /* one for round-parent */;
    TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
    assertNotNull(newtr);
    reader.close();
    reader = newtr;
    
    // assert categories
    assertEquals(numCategories, reader.getSize());
    int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i)));
    int[] parents = reader.getParallelTaxonomyArrays().parents();
    assertEquals(0, parents[roundOrdinal]); // round's parent is root
    for (int j = 0; j < numCats; j++) {
      int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j)));
      assertEquals(roundOrdinal, parents[ord]); // round's parent is root
    }
  }
  
  reader.close();
  writer.close();
  dir.close();
}
 
源代码7 项目: lucene-solr   文件: TestSearchForDuplicates.java
private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS) throws Exception {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random);
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    final MergePolicy mp = conf.getMergePolicy();
    mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0);
    IndexWriter writer = new IndexWriter(directory, conf);
    if (VERBOSE) {
      System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS);
    }

    for (int j = 0; j < MAX_DOCS; j++) {
      Document d = new Document();
      d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES));
      d.add(new StoredField(ID_FIELD, j));
      d.add(new NumericDocValuesField(ID_FIELD, j));
      writer.addDocument(d);
    }
    writer.close();

    // try a search without OR
    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = newSearcher(reader);

    Query query = new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY));
    out.println("Query: " + query.toString(PRIORITY_FIELD));
    if (VERBOSE) {
      System.out.println("TEST: search query=" + query);
    }

    final Sort sort = new Sort(SortField.FIELD_SCORE,
                               new SortField(ID_FIELD, SortField.Type.INT));

    ScoreDoc[] hits = searcher.search(query, MAX_DOCS, sort).scoreDocs;
    printHits(out, hits, searcher);
    checkHits(hits, MAX_DOCS, searcher);

    // try a new search with OR
    searcher = newSearcher(reader);
    hits = null;

    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    booleanQuery.add(new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)), BooleanClause.Occur.SHOULD);
    booleanQuery.add(new TermQuery(new Term(PRIORITY_FIELD, MED_PRIORITY)), BooleanClause.Occur.SHOULD);
    out.println("Query: " + booleanQuery.build().toString(PRIORITY_FIELD));

    hits = searcher.search(booleanQuery.build(), MAX_DOCS, sort).scoreDocs;
    printHits(out, hits, searcher);
    checkHits(hits, MAX_DOCS, searcher);

    reader.close();
    directory.close();
}
 
private void createRandomIndex(boolean singleSortedSegment) throws IOException {
  dir = newDirectory();
  numDocs = atLeast(150);
  final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
  Set<String> randomTerms = new HashSet<>();
  while (randomTerms.size() < numTerms) {
    randomTerms.add(TestUtil.randomSimpleString(random()));
  }
  terms = new ArrayList<>(randomTerms);
  final long seed = random().nextLong();
  final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
  if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
    // MockRandomMP randomly wraps the leaf readers which makes merging angry
    iwc.setMergePolicy(newTieredMergePolicy());
  }
  iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
  iwc.setIndexSort(sort);
  iw = new RandomIndexWriter(new Random(seed), dir, iwc);
  iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
  for (int i = 0; i < numDocs; ++i) {
    final Document doc = randomDocument();
    iw.addDocument(doc);
    if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
      iw.commit();
    }
    if (random().nextInt(15) == 0) {
      final String term = RandomPicks.randomFrom(random(), terms);
      iw.deleteDocuments(new Term("s", term));
    }
  }
  if (singleSortedSegment) {
    iw.forceMerge(1);
  }
  else if (random().nextBoolean()) {
    iw.forceMerge(FORCE_MERGE_MAX_SEGMENT_COUNT);
  }
  reader = iw.getReader();
  if (reader.numDocs() == 0) {
    iw.addDocument(new Document());
    reader.close();
    reader = iw.getReader();
  }
}
 
源代码9 项目: examples   文件: TreeMergeOutputFormat.java
@Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
      writeShardNumberFile(context);      
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
        
        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
            .setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;
          
        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
//        writerConfig.setRAMBufferSizeMB(100); // improve performance
//        writerConfig.setMaxThreadStates(1);
        
        // disable compound file to improve performance
        // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);          
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);       
//          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);
        
        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
        
        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        long start = System.nanoTime();
        
        writer.addIndexes(indexes); 
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) 
        // This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746
        
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        float secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Logical merge took {} secs", secs);        
        int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        start = System.nanoTime();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments); 
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data 
          // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
        
        start = System.nanoTime();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }
 
@Test
public void testMulipleCommitsAndReopens() throws IOException {
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  conf.setMergeScheduler(new SerialMergeScheduler());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);

  Set<String> fileSet = new TreeSet<String>();
  long seed = new Random().nextLong();
  System.out.println("Seed:" + seed);
  Random random = new Random(seed);
  int docCount = 0;
  int passes = 10;
  byte[] segmentsGenContents = null;
  for (int run = 0; run < passes; run++) {
    final FastHdfsKeyValueDirectory directory = new FastHdfsKeyValueDirectory(false, _timer, _configuration,
        new Path(_path, "test_multiple_commits_reopens"));
    if (segmentsGenContents != null) {
      byte[] segmentsGenContentsCurrent = readSegmentsGen(directory);
      assertTrue(Arrays.equals(segmentsGenContents, segmentsGenContentsCurrent));
    }
    assertFiles(fileSet, run, -1, directory);
    assertEquals(docCount, getDocumentCount(directory));
    IndexWriter writer = new IndexWriter(directory, conf.clone());
    int numberOfCommits = random.nextInt(100);
    for (int i = 0; i < numberOfCommits; i++) {
      assertFiles(fileSet, run, i, directory);
      addDocuments(writer, random.nextInt(100));
      // Before Commit
      writer.commit();
      // After Commit

      // Set files after commit
      {
        fileSet.clear();
        List<IndexCommit> listCommits = DirectoryReader.listCommits(directory);
        assertEquals(1, listCommits.size());
        IndexCommit indexCommit = listCommits.get(0);
        fileSet.addAll(indexCommit.getFileNames());
      }
      segmentsGenContents = readSegmentsGen(directory);
    }
    docCount = getDocumentCount(directory);
  }
}