下面列出了org.apache.lucene.index.IndexWriterConfig#setRAMBufferSizeMB ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public void prepareIndex() throws IOException {
File globalWFMDIr = new File(Util.GTPM_INDEX_DIR);
if (!globalWFMDIr.exists()) {
Util.createDirs(Util.GTPM_INDEX_DIR);
}
KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
IndexWriterConfig wfmIndexWriterConfig = new IndexWriterConfig(Version.LUCENE_46, keywordAnalyzer);
wfmIndexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
wfmIndexWriterConfig.setRAMBufferSizeMB(1024);
logger.info("PREPARE INDEX");
try {
wfmIndexWriter = new IndexWriter(FSDirectory.open(new File(Util.GTPM_INDEX_DIR)), wfmIndexWriterConfig);
wfmIndexWriter.commit();
wfmIndexer = new DocumentMaker(wfmIndexWriter);
} catch (IOException e) {
e.printStackTrace();
}
}
private IndexWriterConfig getIndexWriterConfig() {
final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
iwc.setCommitOnClose(false); // we by default don't commit on close
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
// with tests.verbose, lucene sets this up: plumb to align with filesystem stream
boolean verbose = false;
try {
verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
} catch (Exception ignore) {
}
iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
iwc.setMergeScheduler(mergeScheduler);
// Give us the opportunity to upgrade old segments while performing
// background merges
MergePolicy mergePolicy = config().getMergePolicy();
// always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
if (softDeleteEnabled) {
mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
}
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
iwc.setSimilarity(engineConfig.getSimilarity());
iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
iwc.setCodec(engineConfig.getCodec());
iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
if (config().getIndexSort() != null) {
iwc.setIndexSort(config().getIndexSort());
}
return iwc;
}
public IndexWriterConfig createIndexWriterConfig() throws IOException {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST, getIndexAnalyzerInstance());
indexWriterConfig.setRAMBufferSizeMB(48);
MergePolicy mergePolicy = getPluginManager().getInstance(LindenConfigBuilder.MERGE_POLICY, MergePolicy.class);
if (mergePolicy != null) {
indexWriterConfig.setMergePolicy(mergePolicy);
}
LOGGER.info("Merge policy : {}", mergePolicy == null ? "Default" : mergePolicy);
ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
cms.setMaxMergesAndThreads(8, 1);
indexWriterConfig.setMergeScheduler(cms);
return indexWriterConfig;
}
private IndexWriter createFastIndexWriter(Directory dir, int maxBufferedDocs) throws IOException {
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(maxBufferedDocs);
conf.setRAMBufferSizeMB(-1);
conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
return new IndexWriter(dir, conf);
}
private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
conf.setRAMBufferSizeMB(-1);
conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
IndexWriter writer = new IndexWriter(dir, conf);
Document doc = new Document();
Field storedField = newStringField("stored", "", Field.Store.YES);
Field dvField = new NumericDocValuesField("dv", 0);
doc.add(storedField);
doc.add(dvField);
final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
final LongSupplier longs = blocksOfVariousBPV();
for (int i = 0; i < numDocs; i++) {
if (random().nextDouble() > density) {
writer.addDocument(new Document());
continue;
}
long value = longs.getAsLong();
storedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.close();
// compare
assertDVIterate(dir);
assertDVAdvance(dir, 1); // Tests all jump-lengths from 1 to maxDoc (quite slow ~= 1 minute for 200K docs)
dir.close();
}
public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException {
IndexSchema schema = core.getLatestSchema();
IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
if (maxBufferedDocs != -1)
iwc.setMaxBufferedDocs(maxBufferedDocs);
if (ramBufferSizeMB != -1)
iwc.setRAMBufferSizeMB(ramBufferSizeMB);
if (ramPerThreadHardLimitMB != -1) {
iwc.setRAMPerThreadHardLimitMB(ramPerThreadHardLimitMB);
}
iwc.setSimilarity(schema.getSimilarity());
MergePolicy mergePolicy = buildMergePolicy(core.getResourceLoader(), schema);
iwc.setMergePolicy(mergePolicy);
MergeScheduler mergeScheduler = buildMergeScheduler(core.getResourceLoader());
iwc.setMergeScheduler(mergeScheduler);
iwc.setInfoStream(infoStream);
if (mergePolicy instanceof SortingMergePolicy) {
Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort();
iwc.setIndexSort(indexSort);
}
iwc.setUseCompoundFile(useCompoundFile);
if (mergedSegmentWarmerInfo != null) {
// TODO: add infostream -> normal logging system (there is an issue somewhere)
@SuppressWarnings({"rawtypes"})
IndexReaderWarmer warmer = core.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
IndexReaderWarmer.class,
null,
new Class[] { InfoStream.class },
new Object[] { iwc.getInfoStream() });
iwc.setMergedSegmentWarmer(warmer);
}
return iwc;
}
@Test
public void testMultiThreadedLuceneRealtime() throws Exception {
File indexFile = new File(INDEX_DIR.getPath() + "/realtime-test3.index");
Directory indexDirectory = FSDirectory.open(indexFile.toPath());
StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
// create and open a writer
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(standardAnalyzer);
indexWriterConfig.setRAMBufferSizeMB(500);
IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
// create an NRT index reader
SearcherManager searcherManager = new SearcherManager(indexWriter, false, false, null);
// background thread to refresh NRT reader
ControlledRealTimeReopenThread controlledRealTimeReopenThread =
new ControlledRealTimeReopenThread(indexWriter, searcherManager, 0.01, 0.01);
controlledRealTimeReopenThread.start();
// start writer and reader
Thread writer = new Thread(new RealtimeWriter(indexWriter));
Thread realtimeReader = new Thread(new RealtimeReader(searcherManager, standardAnalyzer));
writer.start();
realtimeReader.start();
writer.join();
realtimeReader.join();
controlledRealTimeReopenThread.join();
}
private IndexWriterConfig getIndexWriterConfig() {
final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
iwc.setCommitOnClose(false); // we by default don't commit on close
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
iwc.setReaderAttributes(getReaderAttributes(store.directory()));
iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
// with tests.verbose, lucene sets this up: plumb to align with filesystem stream
boolean verbose = false;
try {
verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
} catch (Exception ignore) {
// ignored
}
iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
iwc.setMergeScheduler(mergeScheduler);
// Give us the opportunity to upgrade old segments while performing
// background merges
MergePolicy mergePolicy = config().getMergePolicy();
// always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
if (softDeleteEnabled) {
mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
}
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
iwc.setCodec(engineConfig.getCodec());
iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
return iwc;
}
public static void main(String[] args) throws Exception {
System.out.println("Starting up...");
System.out.flush();
Options opts = new Options();
opts.addOption(Option.builder("i").
longOpt("input").hasArg().required().desc("Input file or directory to index").build());
opts.addOption(Option.builder("x").
longOpt("index").hasArg().required().desc("Path to index file to generate").build());
opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());
HelpFormatter helpFormatter = new HelpFormatter();
CommandLineParser cmdLineParser = new DefaultParser();
CommandLine cmdLine = null;
try {
cmdLine = cmdLineParser.parse(opts, args);
} catch (ParseException e) {
System.out.println("Caught exception when parsing command line: " + e.getMessage());
helpFormatter.printHelp("DocumentIndexer", opts);
System.exit(1);
}
if (cmdLine.hasOption("help")) {
helpFormatter.printHelp("DocumentIndexer", opts);
System.exit(0);
}
if (cmdLine.hasOption("verbose")) {
// With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
Configuration ctxConfig = ctx.getConfiguration();
LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
logConfig.setLevel(Level.DEBUG);
ctx.updateLoggers();
LOGGER.debug("Verbose logging enabled");
}
LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));
Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());
/* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one
* thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination. This custom
* analyzer appears to treat chemical entities better than the standard analyzer without admitting too much
* cruft to the index. */
Analyzer analyzer = CustomAnalyzer.builder().
withTokenizer("whitespace").
addTokenFilter("lowercase").
addTokenFilter("stop").
build();
IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
writerConfig.setRAMBufferSizeMB(1 << 10);
IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig);
String inputFileOrDir = cmdLine.getOptionValue("input");
File splitFileOrDir = new File(inputFileOrDir);
if (!(splitFileOrDir.exists())) {
LOGGER.error("Unable to find directory at " + inputFileOrDir);
System.exit(1);
}
DocumentIndexer indexer = new DocumentIndexer(indexWriter);
PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir);
corpusReader.readPatentCorpus();
indexer.commitAndClose();
}
private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
conf.setRAMBufferSizeMB(-1);
conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
IndexWriter writer = new IndexWriter(dir, conf);
final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
final LongSupplier values = blocksOfVariousBPV();
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int valueCount = (int) counts.getAsLong();
long valueArray[] = new long[valueCount];
for (int j = 0; j < valueCount; j++) {
long value = values.getAsLong();
valueArray[j] = value;
doc.add(new SortedNumericDocValuesField("dv", value));
}
Arrays.sort(valueArray);
for (int j = 0; j < valueCount; j++) {
doc.add(new StoredField("stored", Long.toString(valueArray[j])));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
writer.forceMerge(1);
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
for (int i = 0; i < r.maxDoc(); i++) {
if (i > docValues.docID()) {
docValues.nextDoc();
}
String expected[] = r.document(i).getValues("stored");
if (i < docValues.docID()) {
assertEquals(0, expected.length);
} else {
String actual[] = new String[docValues.docValueCount()];
for (int j = 0; j < actual.length; j++) {
actual[j] = Long.toString(docValues.nextValue());
}
assertArrayEquals(expected, actual);
}
}
}
ir.close();
dir.close();
}
/**
* Called by {@link org.apache.pinot.core.segment.creator.impl.SegmentColumnarIndexCreator}
* when building an offline segment. Similar to how it creates per column
* dictionary, forward and inverted index, a text index is also created
* if text search is enabled on a column.
* @param column column name
* @param segmentIndexDir segment index directory
* @param commit true if the index should be committed (at the end after all documents have
* been added), false if index should not be committed
* Note on commit:
* Once {@link org.apache.pinot.core.segment.creator.impl.SegmentColumnarIndexCreator}
* finishes indexing all documents/rows for the segment, we need to commit and close
* the Lucene index which will internally persist the index on disk, do the necessary
* resource cleanup etc. We commit during {@link InvertedIndexCreator#seal()}
* and close during {@link InvertedIndexCreator#close()}.
* This lucene index writer is used by both offline and realtime (both during
* indexing in-memory MutableSegment and later during conversion to offline).
* Since realtime segment conversion is again going to go through the offline
* indexing path and will do everything (indexing, commit, close etc), there is
* no need to commit the index from the realtime side. So when the realtime segment
* is destroyed (which is after the realtime segment has been committed and converted
* to offline), we close this lucene index writer to release resources but don't commit.
* This is the reason to have commit flag part of the constructor.
*/
public LuceneTextIndexCreator(String column, File segmentIndexDir, boolean commit) {
_textColumn = column;
try {
// segment generation is always in V1 and later we convert (as part of post creation processing)
// to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig.
File indexFile = getV1TextIndexFile(segmentIndexDir);
_indexDirectory = FSDirectory.open(indexFile.toPath());
StandardAnalyzer standardAnalyzer = new StandardAnalyzer(ENGLISH_STOP_WORDS_SET);
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(standardAnalyzer);
indexWriterConfig.setRAMBufferSizeMB(LUCENE_INDEX_MAX_BUFFER_SIZE_MB);
indexWriterConfig.setCommitOnClose(commit);
_indexWriter = new IndexWriter(_indexDirectory, indexWriterConfig);
} catch (Exception e) {
LOGGER.error("Failed to instantiate Lucene text index creator for column {}, exception {}", column, e.getMessage());
throw new RuntimeException(e);
}
}