org.apache.lucene.index.IndexWriterConfig#setRAMBufferSizeMB ( )源码实例Demo

下面列出了org.apache.lucene.index.IndexWriterConfig#setRAMBufferSizeMB ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: SourcererCC   文件: WordFrequencyStore.java
public void prepareIndex() throws IOException {
    File globalWFMDIr = new File(Util.GTPM_INDEX_DIR);
    if (!globalWFMDIr.exists()) {
        Util.createDirs(Util.GTPM_INDEX_DIR);
    }
    KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
    IndexWriterConfig wfmIndexWriterConfig = new IndexWriterConfig(Version.LUCENE_46, keywordAnalyzer);
    wfmIndexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    wfmIndexWriterConfig.setRAMBufferSizeMB(1024);

    logger.info("PREPARE INDEX");
    try {
        wfmIndexWriter = new IndexWriter(FSDirectory.open(new File(Util.GTPM_INDEX_DIR)), wfmIndexWriterConfig);
        wfmIndexWriter.commit();
        wfmIndexer = new DocumentMaker(wfmIndexWriter);
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
源代码2 项目: vscode-extension   文件: test.java
private IndexWriterConfig getIndexWriterConfig() {
    final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
    iwc.setCommitOnClose(false); // we by default don't commit on close
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
    // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
    boolean verbose = false;
    try {
        verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
    } catch (Exception ignore) {
    }
    iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
    iwc.setMergeScheduler(mergeScheduler);
    // Give us the opportunity to upgrade old segments while performing
    // background merges
    MergePolicy mergePolicy = config().getMergePolicy();
    // always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
    iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
    if (softDeleteEnabled) {
        mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
            new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
    }
    iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
    iwc.setSimilarity(engineConfig.getSimilarity());
    iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
    iwc.setCodec(engineConfig.getCodec());
    iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
    if (config().getIndexSort() != null) {
        iwc.setIndexSort(config().getIndexSort());
    }
    return iwc;
}
 
源代码3 项目: linden   文件: LindenConfig.java
public IndexWriterConfig createIndexWriterConfig() throws IOException {
  IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST, getIndexAnalyzerInstance());
  indexWriterConfig.setRAMBufferSizeMB(48);

  MergePolicy mergePolicy = getPluginManager().getInstance(LindenConfigBuilder.MERGE_POLICY, MergePolicy.class);
  if (mergePolicy != null) {
    indexWriterConfig.setMergePolicy(mergePolicy);
  }
  LOGGER.info("Merge policy : {}", mergePolicy == null ? "Default" : mergePolicy);

  ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
  cms.setMaxMergesAndThreads(8, 1);
  indexWriterConfig.setMergeScheduler(cms);
  return indexWriterConfig;
}
 
源代码4 项目: lucene-solr   文件: TestLucene80DocValuesFormat.java
private IndexWriter createFastIndexWriter(Directory dir, int maxBufferedDocs) throws IOException {
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(maxBufferedDocs);
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  return new IndexWriter(dir, conf);
}
 
源代码5 项目: lucene-solr   文件: TestLucene80DocValuesFormat.java
private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  Document doc = new Document();
  Field storedField = newStringField("stored", "", Field.Store.YES);
  Field dvField = new NumericDocValuesField("dv", 0);
  doc.add(storedField);
  doc.add(dvField);

  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier longs = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    if (random().nextDouble() > density) {
      writer.addDocument(new Document());
      continue;
    }
    long value = longs.getAsLong();
    storedField.setStringValue(Long.toString(value));
    dvField.setLongValue(value);
    writer.addDocument(doc);
  }

  writer.forceMerge(1);

  writer.close();
  
  // compare
  assertDVIterate(dir);
  assertDVAdvance(dir, 1); // Tests all jump-lengths from 1 to maxDoc (quite slow ~= 1 minute for 200K docs)

  dir.close();
}
 
源代码6 项目: lucene-solr   文件: SolrIndexConfig.java
public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException {
  IndexSchema schema = core.getLatestSchema();
  IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
  if (maxBufferedDocs != -1)
    iwc.setMaxBufferedDocs(maxBufferedDocs);

  if (ramBufferSizeMB != -1)
    iwc.setRAMBufferSizeMB(ramBufferSizeMB);

  if (ramPerThreadHardLimitMB != -1) {
    iwc.setRAMPerThreadHardLimitMB(ramPerThreadHardLimitMB);
  }

  iwc.setSimilarity(schema.getSimilarity());
  MergePolicy mergePolicy = buildMergePolicy(core.getResourceLoader(), schema);
  iwc.setMergePolicy(mergePolicy);
  MergeScheduler mergeScheduler = buildMergeScheduler(core.getResourceLoader());
  iwc.setMergeScheduler(mergeScheduler);
  iwc.setInfoStream(infoStream);

  if (mergePolicy instanceof SortingMergePolicy) {
    Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort();
    iwc.setIndexSort(indexSort);
  }

  iwc.setUseCompoundFile(useCompoundFile);

  if (mergedSegmentWarmerInfo != null) {
    // TODO: add infostream -> normal logging system (there is an issue somewhere)
    @SuppressWarnings({"rawtypes"})
    IndexReaderWarmer warmer = core.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
                                                                      IndexReaderWarmer.class,
                                                                      null,
                                                                      new Class[] { InfoStream.class },
                                                                      new Object[] { iwc.getInfoStream() });
    iwc.setMergedSegmentWarmer(warmer);
  }

  return iwc;
}
 
源代码7 项目: incubator-pinot   文件: TextSearchQueriesTest.java
@Test
public void testMultiThreadedLuceneRealtime() throws Exception {
  File indexFile = new File(INDEX_DIR.getPath() + "/realtime-test3.index");
  Directory indexDirectory = FSDirectory.open(indexFile.toPath());
  StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
  // create and open a writer
  IndexWriterConfig indexWriterConfig = new IndexWriterConfig(standardAnalyzer);
  indexWriterConfig.setRAMBufferSizeMB(500);
  IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);

  // create an NRT index reader
  SearcherManager searcherManager = new SearcherManager(indexWriter, false, false, null);

  // background thread to refresh NRT reader
  ControlledRealTimeReopenThread controlledRealTimeReopenThread =
      new ControlledRealTimeReopenThread(indexWriter, searcherManager, 0.01, 0.01);
  controlledRealTimeReopenThread.start();

  // start writer and reader
  Thread writer = new Thread(new RealtimeWriter(indexWriter));
  Thread realtimeReader = new Thread(new RealtimeReader(searcherManager, standardAnalyzer));

  writer.start();
  realtimeReader.start();

  writer.join();
  realtimeReader.join();
  controlledRealTimeReopenThread.join();
}
 
源代码8 项目: crate   文件: InternalEngine.java
private IndexWriterConfig getIndexWriterConfig() {
    final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
    iwc.setCommitOnClose(false); // we by default don't commit on close
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    iwc.setReaderAttributes(getReaderAttributes(store.directory()));
    iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
    // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
    boolean verbose = false;
    try {
        verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
    } catch (Exception ignore) {
        // ignored
    }
    iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
    iwc.setMergeScheduler(mergeScheduler);
    // Give us the opportunity to upgrade old segments while performing
    // background merges
    MergePolicy mergePolicy = config().getMergePolicy();
    // always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
    iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
    if (softDeleteEnabled) {
        mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
            new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
    }
    iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
    iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
    iwc.setCodec(engineConfig.getCodec());
    iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
    return iwc;
}
 
源代码9 项目: act   文件: DocumentIndexer.java
public static void main(String[] args) throws Exception {
  System.out.println("Starting up...");
  System.out.flush();
  Options opts = new Options();
  opts.addOption(Option.builder("i").
      longOpt("input").hasArg().required().desc("Input file or directory to index").build());
  opts.addOption(Option.builder("x").
      longOpt("index").hasArg().required().desc("Path to index file to generate").build());
  opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
  opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());

  HelpFormatter helpFormatter = new HelpFormatter();
  CommandLineParser cmdLineParser = new DefaultParser();
  CommandLine cmdLine = null;
  try {
    cmdLine = cmdLineParser.parse(opts, args);
  } catch (ParseException e) {
    System.out.println("Caught exception when parsing command line: " + e.getMessage());
    helpFormatter.printHelp("DocumentIndexer", opts);
    System.exit(1);
  }

  if (cmdLine.hasOption("help")) {
    helpFormatter.printHelp("DocumentIndexer", opts);
    System.exit(0);
  }

  if (cmdLine.hasOption("verbose")) {
    // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
    LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
    Configuration ctxConfig = ctx.getConfiguration();
    LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
    logConfig.setLevel(Level.DEBUG);

    ctx.updateLoggers();
    LOGGER.debug("Verbose logging enabled");
  }

  LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));
  Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());

  /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one
   * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination.  This custom
   * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much
   * cruft to the index. */
  Analyzer analyzer = CustomAnalyzer.builder().
      withTokenizer("whitespace").
      addTokenFilter("lowercase").
      addTokenFilter("stop").
      build();

  IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
  writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
  writerConfig.setRAMBufferSizeMB(1 << 10);
  IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig);

  String inputFileOrDir = cmdLine.getOptionValue("input");
  File splitFileOrDir = new File(inputFileOrDir);
  if (!(splitFileOrDir.exists())) {
    LOGGER.error("Unable to find directory at " + inputFileOrDir);
    System.exit(1);
  }

  DocumentIndexer indexer = new DocumentIndexer(indexWriter);
  PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir);
  corpusReader.readPatentCorpus();
  indexer.commitAndClose();
}
 
源代码10 项目: lucene-solr   文件: TestLucene80DocValuesFormat.java
private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  
  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier values = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    
    int valueCount = (int) counts.getAsLong();
    long valueArray[] = new long[valueCount];
    for (int j = 0; j < valueCount; j++) {
      long value = values.getAsLong();
      valueArray[j] = value;
      doc.add(new SortedNumericDocValuesField("dv", value));
    }
    Arrays.sort(valueArray);
    for (int j = 0; j < valueCount; j++) {
      doc.add(new StoredField("stored", Long.toString(valueArray[j])));
    }
    writer.addDocument(doc);
    if (random().nextInt(31) == 0) {
      writer.commit();
    }
  }
  writer.forceMerge(1);

  writer.close();
  
  // compare
  DirectoryReader ir = DirectoryReader.open(dir);
  TestUtil.checkReader(ir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();
    SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
    for (int i = 0; i < r.maxDoc(); i++) {
      if (i > docValues.docID()) {
        docValues.nextDoc();
      }
      String expected[] = r.document(i).getValues("stored");
      if (i < docValues.docID()) {
        assertEquals(0, expected.length);
      } else {
        String actual[] = new String[docValues.docValueCount()];
        for (int j = 0; j < actual.length; j++) {
          actual[j] = Long.toString(docValues.nextValue());
        }
        assertArrayEquals(expected, actual);
      }
    }
  }
  ir.close();
  dir.close();
}
 
源代码11 项目: incubator-pinot   文件: LuceneTextIndexCreator.java
/**
 * Called by {@link org.apache.pinot.core.segment.creator.impl.SegmentColumnarIndexCreator}
 * when building an offline segment. Similar to how it creates per column
 * dictionary, forward and inverted index, a text index is also created
 * if text search is enabled on a column.
 * @param column column name
 * @param segmentIndexDir segment index directory
 * @param commit true if the index should be committed (at the end after all documents have
 *               been added), false if index should not be committed
 * Note on commit:
 *               Once {@link org.apache.pinot.core.segment.creator.impl.SegmentColumnarIndexCreator}
 *               finishes indexing all documents/rows for the segment, we need to commit and close
 *               the Lucene index which will internally persist the index on disk, do the necessary
 *               resource cleanup etc. We commit during {@link InvertedIndexCreator#seal()}
 *               and close during {@link InvertedIndexCreator#close()}.
 *               This lucene index writer is used by both offline and realtime (both during
 *               indexing in-memory MutableSegment and later during conversion to offline).
 *               Since realtime segment conversion is again going to go through the offline
 *               indexing path and will do everything (indexing, commit, close etc), there is
 *               no need to commit the index from the realtime side. So when the realtime segment
 *               is destroyed (which is after the realtime segment has been committed and converted
 *               to offline), we close this lucene index writer to release resources but don't commit.
 *               This is the reason to have commit flag part of the constructor.
 */
public LuceneTextIndexCreator(String column, File segmentIndexDir, boolean commit) {
  _textColumn = column;
  try {
    // segment generation is always in V1 and later we convert (as part of post creation processing)
    // to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig.
    File indexFile = getV1TextIndexFile(segmentIndexDir);
    _indexDirectory = FSDirectory.open(indexFile.toPath());
    StandardAnalyzer standardAnalyzer = new StandardAnalyzer(ENGLISH_STOP_WORDS_SET);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(standardAnalyzer);
    indexWriterConfig.setRAMBufferSizeMB(LUCENE_INDEX_MAX_BUFFER_SIZE_MB);
    indexWriterConfig.setCommitOnClose(commit);
    _indexWriter = new IndexWriter(_indexDirectory, indexWriterConfig);
  } catch (Exception e) {
    LOGGER.error("Failed to instantiate Lucene text index creator for column {}, exception {}", column, e.getMessage());
    throw new RuntimeException(e);
  }
}