类org.apache.lucene.index.LogByteSizeMergePolicy源码实例Demo

下面列出了怎么用org.apache.lucene.index.LogByteSizeMergePolicy的API类实例代码及写法,或者点击链接到github查看源代码。

源代码1 项目: dacapobench   文件: Index.java
/**
 * Index all text files under a directory.
 */
public void main(final File INDEX_DIR, final String[] args) throws IOException {
  IndexWriterConfig IWConfig = new IndexWriterConfig();
  IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
  IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
  IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
  for (int arg = 0; arg < args.length; arg++) {
    final File docDir = new File(args[arg]);
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
      throw new IOException("Cannot read from document directory");
    }

    indexDocs(writer, docDir);
    System.out.println("Optimizing...");
    writer.forceMerge(1);
  }
  writer.close();
}
 
源代码2 项目: everywhere   文件: IndexUtil.java
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException {
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new SmartChineseAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
    mergePolicy.setMergeFactor(50);
    mergePolicy.setMaxMergeDocs(5000);
    if (create){
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    return new IndexWriter(dir, iwc);
}
 
源代码3 项目: lucene-solr   文件: TestMergePolicyConfig.java
public void testLogMergePolicyFactoryConfig() throws Exception {

    final boolean byteSizeMP = random().nextBoolean();
    final Class<? extends LogMergePolicy> mpClass = byteSizeMP
        ? LogByteSizeMergePolicy.class : LogDocMergePolicy.class;
    final Class<? extends MergePolicyFactory> mpfClass = byteSizeMP
        ? LogByteSizeMergePolicyFactory.class : LogDocMergePolicyFactory.class;

    System.setProperty("solr.test.log.merge.policy.factory", mpfClass.getName());

    implTestLogMergePolicyConfig("solrconfig-logmergepolicyfactory.xml", mpClass);
  }
 
源代码4 项目: lucene-solr   文件: TestDirectoryTaxonomyReader.java
@Test
public void testOpenIfChangedMergedSegment() throws Exception {
  // test openIfChanged() when all index segments were merged - used to be
  // a bug in ParentArray, caught by testOpenIfChangedManySegments - only
  // this test is not random
  Directory dir = newDirectory();
  
  // hold onto IW to forceMerge
  // note how we don't close it, since DTW will close it.
  final IndexWriter iw = new IndexWriter(dir,
      new IndexWriterConfig(new MockAnalyzer(random()))
          .setMergePolicy(new LogByteSizeMergePolicy()));
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
    @Override
    protected IndexWriter openIndexWriter(Directory directory,
        IndexWriterConfig config) throws IOException {
      return iw;
    }
  };
  
  TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
  assertEquals(1, reader.getSize());
  assertEquals(1, reader.getParallelTaxonomyArrays().parents().length);

  // add category and call forceMerge -- this should flush IW and merge segments down to 1
  // in ParentArray.initFromReader, this used to fail assuming there are no parents.
  writer.addCategory(new FacetLabel("1"));
  iw.forceMerge(1);
  
  // now calling openIfChanged should trip on the bug
  TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
  assertNotNull(newtr);
  reader.close();
  reader = newtr;
  assertEquals(2, reader.getSize());
  assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
  
  reader.close();
  writer.close();
  dir.close();
}
 
源代码5 项目: lucene-solr   文件: TestDirectoryTaxonomyReader.java
@Test
public void testOpenIfChangedNoChangesButSegmentMerges() throws Exception {
  // test openIfChanged() when the taxonomy hasn't really changed, but segments
  // were merged. The NRT reader will be reopened, and ParentArray used to assert
  // that the new reader contains more ordinals than were given from the old
  // TaxReader version
  Directory dir = newDirectory();
  
  // hold onto IW to forceMerge
  // note how we don't close it, since DTW will close it.
  final IndexWriter iw = new IndexWriter(dir,
      new IndexWriterConfig(new MockAnalyzer(random()))
          .setMergePolicy(new LogByteSizeMergePolicy()));
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
    @Override
    protected IndexWriter openIndexWriter(Directory directory,
        IndexWriterConfig config) throws IOException {
      return iw;
    }
  };
  
  // add a category so that the following DTR open will cause a flush and 
  // a new segment will be created
  writer.addCategory(new FacetLabel("a"));
  
  TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
  assertEquals(2, reader.getSize());
  assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);

  // merge all the segments so that NRT reader thinks there's a change 
  iw.forceMerge(1);
  
  // now calling openIfChanged should trip on the wrong assert in ParetArray's ctor
  TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
  assertNotNull(newtr);
  reader.close();
  reader = newtr;
  assertEquals(2, reader.getSize());
  assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
  
  reader.close();
  writer.close();
  dir.close();
}
 
@Override
protected MergePolicy getMergePolicyInstance() {
  return new LogByteSizeMergePolicy();
}
 
源代码7 项目: lucene-solr   文件: FileBasedSpellChecker.java
private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
  try {
    IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
    // Get the field's analyzer
    if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
      FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
      // Do index-time analysis using the given fieldType's analyzer
      Directory ramDir = new ByteBuffersDirectory();

      LogMergePolicy mp = new LogByteSizeMergePolicy();
      mp.setMergeFactor(300);

      IndexWriter writer = new IndexWriter(
          ramDir,
          new IndexWriterConfig(fieldType.getIndexAnalyzer()).
              setMaxBufferedDocs(150).
              setMergePolicy(mp).
              setOpenMode(IndexWriterConfig.OpenMode.CREATE)
              // TODO: if we enable this, codec gets angry since field won't exist in the schema
              // .setCodec(core.getCodec())
      );

      List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);

      for (String s : lines) {
        Document d = new Document();
        d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
        writer.addDocument(d);
      }
      writer.forceMerge(1);
      writer.close();

      dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir),
              WORD_FIELD_NAME, 0.0f);
    } else {
      // check if character encoding is defined
      if (characterEncoding == null) {
        dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
      } else {
        dictionary = new PlainTextDictionary(new InputStreamReader(core.getResourceLoader().openResource(sourceLocation), characterEncoding));
      }
    }


  } catch (IOException e) {
    log.error( "Unable to load spellings", e);
  }
}
 
源代码8 项目: lucene-solr   文件: DirectoryTaxonomyWriter.java
/**
 * Create the {@link IndexWriterConfig} that would be used for opening the internal index writer.
 * <br>Extensions can configure the {@link IndexWriter} as they see fit,
 * including setting a {@link org.apache.lucene.index.MergeScheduler merge-scheduler}, or
 * {@link org.apache.lucene.index.IndexDeletionPolicy deletion-policy}, different RAM size
 * etc.<br>
 * <br><b>NOTE:</b> internal docids of the configured index must not be altered.
 * For that, categories are never deleted from the taxonomy index.
 * In addition, merge policy in effect must not merge none adjacent segments.
 * 
 * @see #openIndexWriter(Directory, IndexWriterConfig)
 * 
 * @param openMode see {@link OpenMode}
 */
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
  // TODO: should we use a more optimized Codec?
  // The taxonomy has a unique structure, where each term is associated with one document

  // Make sure we use a MergePolicy which always merges adjacent segments and thus
  // keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
  return new IndexWriterConfig(null).setOpenMode(openMode).setMergePolicy(
      new LogByteSizeMergePolicy());
}
 
 类所在包
 类方法
 同包方法