下面列出了org.apache.lucene.index.IndexWriterConfig#setMaxBufferedDocs ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private void verify(Object... shapes) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergeScheduler(new SerialMergeScheduler());
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < shapes.length / 100) {
iwc.setMaxBufferedDocs(shapes.length / 100);
}
Directory dir;
if (shapes.length > 1000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
IndexWriter w = new IndexWriter(dir, iwc);
// index random polygons
indexRandomShapes(w, shapes);
// query testing
final IndexReader reader = DirectoryReader.open(w);
// test random bbox queries
verifyRandomQueries(reader, shapes);
IOUtils.close(w, reader, dir);
}
protected static void createIndex(boolean doMultiSegment) throws Exception {
if (VERBOSE) {
System.out.println("TEST: setUp");
}
// prepare a small index with just a few documents.
dir = newDirectory();
anlzr = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(anlzr).setMergePolicy(newLogMergePolicy());
if (doMultiSegment) {
iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 7));
}
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
// add docs not exactly in natural ID order, to verify we do check the order of docs by scores
int remaining = N_DOCS;
boolean done[] = new boolean[N_DOCS];
int i = 0;
while (remaining > 0) {
if (done[i]) {
throw new Exception("to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!");
}
addDoc(iw, i);
done[i] = true;
i = (i + 4) % N_DOCS;
remaining --;
}
if (!doMultiSegment) {
if (VERBOSE) {
System.out.println("TEST: setUp full merge");
}
iw.forceMerge(1);
}
iw.close();
if (VERBOSE) {
System.out.println("TEST: setUp done close");
}
}
private IndexWriter createFastIndexWriter(Directory dir, int maxBufferedDocs) throws IOException {
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(maxBufferedDocs);
conf.setRAMBufferSizeMB(-1);
conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
return new IndexWriter(dir, conf);
}
private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
conf.setRAMBufferSizeMB(-1);
conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
IndexWriter writer = new IndexWriter(dir, conf);
Document doc = new Document();
Field storedField = newStringField("stored", "", Field.Store.YES);
Field dvField = new NumericDocValuesField("dv", 0);
doc.add(storedField);
doc.add(dvField);
final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
final LongSupplier longs = blocksOfVariousBPV();
for (int i = 0; i < numDocs; i++) {
if (random().nextDouble() > density) {
writer.addDocument(new Document());
continue;
}
long value = longs.getAsLong();
storedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.close();
// compare
assertDVIterate(dir);
assertDVAdvance(dir, 1); // Tests all jump-lengths from 1 to maxDoc (quite slow ~= 1 minute for 200K docs)
dir.close();
}
public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException {
IndexSchema schema = core.getLatestSchema();
IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
if (maxBufferedDocs != -1)
iwc.setMaxBufferedDocs(maxBufferedDocs);
if (ramBufferSizeMB != -1)
iwc.setRAMBufferSizeMB(ramBufferSizeMB);
if (ramPerThreadHardLimitMB != -1) {
iwc.setRAMPerThreadHardLimitMB(ramPerThreadHardLimitMB);
}
iwc.setSimilarity(schema.getSimilarity());
MergePolicy mergePolicy = buildMergePolicy(core.getResourceLoader(), schema);
iwc.setMergePolicy(mergePolicy);
MergeScheduler mergeScheduler = buildMergeScheduler(core.getResourceLoader());
iwc.setMergeScheduler(mergeScheduler);
iwc.setInfoStream(infoStream);
if (mergePolicy instanceof SortingMergePolicy) {
Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort();
iwc.setIndexSort(indexSort);
}
iwc.setUseCompoundFile(useCompoundFile);
if (mergedSegmentWarmerInfo != null) {
// TODO: add infostream -> normal logging system (there is an issue somewhere)
@SuppressWarnings({"rawtypes"})
IndexReaderWarmer warmer = core.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
IndexReaderWarmer.class,
null,
new Class[] { InfoStream.class },
new Object[] { iwc.getInfoStream() });
iwc.setMergedSegmentWarmer(warmer);
}
return iwc;
}
protected static void createIndex(boolean doMultiSegment) throws Exception {
if (VERBOSE) {
System.out.println("TEST: setUp");
}
// prepare a small index with just a few documents.
dir = newDirectory();
anlzr = new MockAnalyzer(random());
IndexWriterConfig iwc = newIndexWriterConfig(anlzr).setMergePolicy(newLogMergePolicy());
if (doMultiSegment) {
iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 7));
}
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
// add docs not exactly in natural ID order, to verify we do check the order of docs by scores
int remaining = N_DOCS;
boolean done[] = new boolean[N_DOCS];
int i = 0;
while (remaining > 0) {
if (done[i]) {
throw new Exception("to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!");
}
addDoc(iw, i);
done[i] = true;
i = (i + 4) % N_DOCS;
remaining --;
}
if (!doMultiSegment) {
if (VERBOSE) {
System.out.println("TEST: setUp full merge");
}
iw.forceMerge(1);
}
iw.close();
if (VERBOSE) {
System.out.println("TEST: setUp done close");
}
}
public void testNRT() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// Don't allow tiny maxBufferedDocs; it can make this
// test too slow:
iwc.setMaxBufferedDocs(Math.max(500, iwc.getMaxBufferedDocs()));
// MockRandom/AlcololicMergePolicy are too slow:
TieredMergePolicy tmp = new TieredMergePolicy();
tmp.setFloorSegmentMB(.001);
iwc.setMergePolicy(tmp);
final IndexWriter w = new IndexWriter(dir, iwc);
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
final FacetsConfig config = new FacetsConfig();
config.setMultiValued("field", true);
final AtomicBoolean stop = new AtomicBoolean();
// How many unique facets to index before stopping:
final int ordLimit = TEST_NIGHTLY ? 100000 : 6000;
Thread indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);
final SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(w, true, null, tw);
Thread reopener = new Thread() {
@Override
public void run() {
while(!stop.get()) {
try {
// Sleep for up to 20 msec:
Thread.sleep(random().nextInt(20));
if (VERBOSE) {
System.out.println("TEST: reopen");
}
mgr.maybeRefresh();
if (VERBOSE) {
System.out.println("TEST: reopen done");
}
} catch (Exception ioe) {
throw new RuntimeException(ioe);
}
}
}
};
reopener.setName("reopener");
reopener.start();
indexer.setName("indexer");
indexer.start();
try {
while (!stop.get()) {
SearcherAndTaxonomy pair = mgr.acquire();
try {
//System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
FacetsCollector sfc = new FacetsCollector();
pair.searcher.search(new MatchAllDocsQuery(), sfc);
Facets facets = getTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
FacetResult result = facets.getTopChildren(10, "field");
if (pair.searcher.getIndexReader().numDocs() > 0) {
//System.out.println(pair.taxonomyReader.getSize());
assertTrue(result.childCount > 0);
assertTrue(result.labelValues.length > 0);
}
//if (VERBOSE) {
//System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
//}
} finally {
mgr.release(pair);
}
}
} finally {
indexer.join();
reopener.join();
}
if (VERBOSE) {
System.out.println("TEST: now stop");
}
w.close();
IOUtils.close(mgr, tw, taxoDir, dir);
}
public void testDeletePartiallyWrittenFilesIfAbort() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
iwConf.setCodec(getCodec());
// disable CFS because this test checks file names
iwConf.setMergePolicy(newLogMergePolicy(false));
iwConf.setUseCompoundFile(false);
// Cannot use RIW because this test wants CFS to stay off:
IndexWriter iw = new IndexWriter(dir, iwConf);
final Document validDoc = new Document();
validDoc.add(new IntPoint("id", 0));
validDoc.add(new StoredField("id", 0));
iw.addDocument(validDoc);
iw.commit();
// make sure that #writeField will fail to trigger an abort
final Document invalidDoc = new Document();
FieldType fieldType = new FieldType();
fieldType.setStored(true);
invalidDoc.add(new Field("invalid", fieldType) {
@Override
public String stringValue() {
// TODO: really bad & scary that this causes IW to
// abort the segment!! We should fix this.
return null;
}
});
try {
iw.addDocument(invalidDoc);
iw.commit();
} catch(IllegalArgumentException iae) {
// expected
assertEquals(iae, iw.getTragicException());
}
// Writer should be closed by tragedy
assertFalse(iw.isOpen());
dir.close();
}
private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
conf.setRAMBufferSizeMB(-1);
conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
IndexWriter writer = new IndexWriter(dir, conf);
final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
final LongSupplier values = blocksOfVariousBPV();
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int valueCount = (int) counts.getAsLong();
long valueArray[] = new long[valueCount];
for (int j = 0; j < valueCount; j++) {
long value = values.getAsLong();
valueArray[j] = value;
doc.add(new SortedNumericDocValuesField("dv", value));
}
Arrays.sort(valueArray);
for (int j = 0; j < valueCount; j++) {
doc.add(new StoredField("stored", Long.toString(valueArray[j])));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
writer.forceMerge(1);
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
for (int i = 0; i < r.maxDoc(); i++) {
if (i > docValues.docID()) {
docValues.nextDoc();
}
String expected[] = r.document(i).getValues("stored");
if (i < docValues.docID()) {
assertEquals(0, expected.length);
} else {
String actual[] = new String[docValues.docValueCount()];
for (int j = 0; j < actual.length; j++) {
actual[j] = Long.toString(docValues.nextValue());
}
assertArrayEquals(expected, actual);
}
}
}
ir.close();
dir.close();
}
@Test
public void testChangeCodecAndMerge() throws IOException {
Directory dir = newDirectory();
if (VERBOSE) {
System.out.println("TEST: make new index");
}
IndexWriterConfig iwconf = newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.CREATE).setCodec(new MockCodec());
iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
//((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
IndexWriter writer = newWriter(dir, iwconf);
addDocs(writer, 10);
writer.commit();
assertQuery(new Term("content", "aaa"), dir, 10);
if (VERBOSE) {
System.out.println("TEST: addDocs3");
}
addDocs3(writer, 10);
writer.commit();
writer.close();
assertQuery(new Term("content", "ccc"), dir, 10);
assertQuery(new Term("content", "aaa"), dir, 10);
Codec codec = iwconf.getCodec();
iwconf = newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND).setCodec(codec);
//((LogMergePolicy) iwconf.getMergePolicy()).setNoCFSRatio(0.0);
//((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
iwconf.setCodec(new MockCodec()); // uses standard for field content
writer = newWriter(dir, iwconf);
// swap in new codec for currently written segments
if (VERBOSE) {
System.out.println("TEST: add docs w/ Standard codec for content field");
}
addDocs2(writer, 10);
writer.commit();
codec = iwconf.getCodec();
assertEquals(30, writer.getDocStats().maxDoc);
assertQuery(new Term("content", "bbb"), dir, 10);
assertQuery(new Term("content", "ccc"), dir, 10); ////
assertQuery(new Term("content", "aaa"), dir, 10);
if (VERBOSE) {
System.out.println("TEST: add more docs w/ new codec");
}
addDocs2(writer, 10);
writer.commit();
assertQuery(new Term("content", "ccc"), dir, 10);
assertQuery(new Term("content", "bbb"), dir, 20);
assertQuery(new Term("content", "aaa"), dir, 10);
assertEquals(40, writer.getDocStats().maxDoc);
if (VERBOSE) {
System.out.println("TEST: now optimize");
}
writer.forceMerge(1);
assertEquals(40, writer.getDocStats().maxDoc);
writer.close();
assertQuery(new Term("content", "ccc"), dir, 10);
assertQuery(new Term("content", "bbb"), dir, 20);
assertQuery(new Term("content", "aaa"), dir, 10);
dir.close();
}
/** test we can search for a point with a large number of vertices*/
public void testLargeVertexPolygon() throws Exception {
int numVertices = TEST_NIGHTLY ? TestUtil.nextInt(random(), 200000, 500000) : TestUtil.nextInt(random(), 20000, 50000);
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergeScheduler(new SerialMergeScheduler());
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < numVertices/100) {
iwc.setMaxBufferedDocs(numVertices/100);
}
Directory dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
IndexWriter writer = new IndexWriter(dir, iwc);
// add a random polygon without a hole
Polygon p = GeoTestUtil.createRegularPolygon(0, 90, atLeast(1000000), numVertices);
Document document = new Document();
addPolygonsToDoc(FIELDNAME, document, p);
writer.addDocument(document);
// add a random polygon with a hole
Polygon inner = new Polygon(new double[] {-1d, -1d, 1d, 1d, -1d},
new double[] {-91d, -89d, -89d, -91.0, -91.0});
Polygon outer = GeoTestUtil.createRegularPolygon(0, -90, atLeast(1000000), numVertices);
document = new Document();
addPolygonsToDoc(FIELDNAME, document, new Polygon(outer.getPolyLats(), outer.getPolyLons(), inner));
writer.addDocument(document);
////// search /////
// search an intersecting bbox
IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = newSearcher(reader);
Query q = newRectQuery(FIELDNAME, -1d, 1d, p.minLon, p.maxLon);
assertEquals(1, searcher.count(q));
// search a disjoint bbox
q = newRectQuery(FIELDNAME, p.minLat-1d, p.minLat+1, p.minLon-1d, p.minLon+1d);
assertEquals(0, searcher.count(q));
// search a bbox in the hole
q = newRectQuery(FIELDNAME, inner.minLat + 1e-6, inner.maxLat - 1e-6, inner.minLon + 1e-6, inner.maxLon - 1e-6);
assertEquals(0, searcher.count(q));
IOUtils.close(reader, dir);
}