下面列出了怎么用org.apache.lucene.index.SerialMergeScheduler的API类实例代码及写法,或者点击链接到github查看源代码。
private void verify(Object... shapes) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergeScheduler(new SerialMergeScheduler());
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < shapes.length / 100) {
iwc.setMaxBufferedDocs(shapes.length / 100);
}
Directory dir;
if (shapes.length > 1000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
IndexWriter w = new IndexWriter(dir, iwc);
// index random polygons
indexRandomShapes(w, shapes);
// query testing
final IndexReader reader = DirectoryReader.open(w);
// test random bbox queries
verifyRandomQueries(reader, shapes);
IOUtils.close(w, reader, dir);
}
public void testNearestNeighborRandom() throws Exception {
int numPoints = atLeast(1000);
Directory dir;
if (numPoints > 100000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
double[] lats = new double[numPoints];
double[] lons = new double[numPoints];
IndexWriterConfig iwc = getIndexWriterConfig();
iwc.setMergePolicy(newLogMergePolicy());
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for(int id=0;id<numPoints;id++) {
lats[id] = quantizeLat(GeoTestUtil.nextLatitude());
lons[id] = quantizeLon(GeoTestUtil.nextLongitude());
Document doc = new Document();
doc.add(new LatLonPoint("point", lats[id], lons[id]));
doc.add(new LatLonDocValuesField("point", lats[id], lons[id]));
doc.add(new StoredField("id", id));
w.addDocument(doc);
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
DirectoryReader r = w.getReader();
if (VERBOSE) {
System.out.println("TEST: reader=" + r);
}
// can't wrap because we require Lucene60PointsFormat directly but e.g. ParallelReader wraps with its own points impl:
IndexSearcher s = newSearcher(r, false);
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
}
double pointLat = GeoTestUtil.nextLatitude();
double pointLon = GeoTestUtil.nextLongitude();
// dumb brute force search to get the expected result:
FieldDoc[] expectedHits = new FieldDoc[lats.length];
for(int id=0;id<lats.length;id++) {
double distance = SloppyMath.haversinMeters(pointLat, pointLon, lats[id], lons[id]);
FieldDoc hit = new FieldDoc(id, 0.0f, new Object[] {Double.valueOf(distance)});
expectedHits[id] = hit;
}
Arrays.sort(expectedHits, new Comparator<FieldDoc>() {
@Override
public int compare(FieldDoc a, FieldDoc b) {
int cmp = Double.compare(((Double) a.fields[0]).doubleValue(), ((Double) b.fields[0]).doubleValue());
if (cmp != 0) {
return cmp;
}
// tie break by smaller docID:
return a.doc - b.doc;
}
});
int topN = TestUtil.nextInt(random(), 1, lats.length);
if (VERBOSE) {
System.out.println("\nhits for pointLat=" + pointLat + " pointLon=" + pointLon);
}
// Also test with MatchAllDocsQuery, sorting by distance:
TopFieldDocs fieldDocs = s.search(new MatchAllDocsQuery(), topN, new Sort(LatLonDocValuesField.newDistanceSort("point", pointLat, pointLon)));
ScoreDoc[] hits = LatLonPointPrototypeQueries.nearest(s, "point", pointLat, pointLon, topN).scoreDocs;
for(int i=0;i<topN;i++) {
FieldDoc expected = expectedHits[i];
FieldDoc expected2 = (FieldDoc) fieldDocs.scoreDocs[i];
FieldDoc actual = (FieldDoc) hits[i];
Document actualDoc = r.document(actual.doc);
if (VERBOSE) {
System.out.println("hit " + i);
System.out.println(" expected id=" + expected.doc+ " lat=" + lats[expected.doc] + " lon=" + lons[expected.doc]
+ " distance=" + ((Double) expected.fields[0]).doubleValue() + " meters");
System.out.println(" actual id=" + actualDoc.getField("id") + " distance=" + actual.fields[0] + " meters");
}
assertEquals(expected.doc, actual.doc);
assertEquals(((Double) expected.fields[0]).doubleValue(), ((Double) actual.fields[0]).doubleValue(), 0.0);
assertEquals(expected2.doc, actual.doc);
assertEquals(((Double) expected2.fields[0]).doubleValue(), ((Double) actual.fields[0]).doubleValue(), 0.0);
}
}
r.close();
w.close();
dir.close();
}
public void testRectBoundariesAreInclusive() throws Exception {
Rectangle rect;
// TODO: why this dateline leniency???
while (true) {
rect = nextBox();
if (rect.crossesDateline() == false) {
break;
}
}
// this test works in quantized space: for testing inclusiveness of exact edges it must be aware of index-time quantization!
rect = new Rectangle(quantizeLat(rect.minLat), quantizeLat(rect.maxLat), quantizeLon(rect.minLon), quantizeLon(rect.maxLon));
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for(int x=0;x<3;x++) {
double lat;
if (x == 0) {
lat = rect.minLat;
} else if (x == 1) {
lat = quantizeLat((rect.minLat+rect.maxLat)/2.0);
} else {
lat = rect.maxLat;
}
for(int y=0;y<3;y++) {
double lon;
if (y == 0) {
lon = rect.minLon;
} else if (y == 1) {
if (x == 1) {
continue;
}
lon = quantizeLon((rect.minLon+rect.maxLon)/2.0);
} else {
lon = rect.maxLon;
}
Document doc = new Document();
addPointToDoc(FIELD_NAME, doc, lat, lon);
w.addDocument(doc);
}
}
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r, false);
// exact edge cases
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon)));
// expand 1 ulp in each direction if possible and test a slightly larger box!
if (rect.minLat != -90) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, Math.nextDown(rect.minLat), rect.maxLat, rect.minLon, rect.maxLon)));
}
if (rect.maxLat != 90) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minLat, Math.nextUp(rect.maxLat), rect.minLon, rect.maxLon)));
}
if (rect.minLon != -180) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, Math.nextDown(rect.minLon), rect.maxLon)));
}
if (rect.maxLon != 180) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, Math.nextUp(rect.maxLon))));
}
// now shrink 1 ulp in each direction if possible: it should not include bogus stuff
// we can't shrink if values are already at extremes, and
// we can't do this if rectangle is actually a line or we will create a cross-dateline query
if (rect.minLat != 90 && rect.maxLat != -90 && rect.minLon != 80 && rect.maxLon != -180 && rect.minLon != rect.maxLon) {
// note we put points on "sides" not just "corners" so we just shrink all 4 at once for now: it should exclude all points!
assertEquals(0, s.count(newRectQuery(FIELD_NAME, Math.nextUp(rect.minLat),
Math.nextDown(rect.maxLat),
Math.nextUp(rect.minLon),
Math.nextDown(rect.maxLon))));
}
r.close();
w.close();
dir.close();
}
public void testRectBoundariesAreInclusive() throws Exception {
XYRectangle rect = ShapeTestUtil.nextBox(random());
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for(int i = 0; i < 3; i++) {
float y;
if (i == 0) {
y = rect.minY;
} else if (i == 1) {
y = (float) (((double) rect.minY + rect.maxY) / 2.0);
} else {
y = rect.maxY;
}
for(int j = 0; j < 3; j++) {
float x;
if (j == 0) {
x = rect.minX;
} else if (j == 1) {
if (i == 1) {
continue;
}
x = (float) (((double) rect.minX + rect.maxX) / 2.0);
} else {
x = rect.maxX;
}
Document doc = new Document();
addPointToDoc(FIELD_NAME, doc, x, y);
w.addDocument(doc);
}
}
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r, false);
// exact edge cases
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minX, rect.maxX, rect.minY, rect.maxY)));
// expand 1 ulp in each direction if possible and test a slightly larger box!
if (rect.minX != -Float.MAX_VALUE) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, Math.nextDown(rect.minX), rect.maxX, rect.minY, rect.maxY)));
}
if (rect.maxX != Float.MAX_VALUE) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minX, Math.nextUp(rect.maxX), rect.minY, rect.maxY)));
}
if (rect.minY != -Float.MAX_VALUE) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minX, rect.maxX, Math.nextDown(rect.minY), rect.maxY)));
}
if (rect.maxY != Float.MAX_VALUE) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minX, rect.maxX, rect.minY, Math.nextUp(rect.maxY))));
}
r.close();
w.close();
dir.close();
}
private void createRandomIndex(boolean singleSortedSegment) throws IOException {
dir = newDirectory();
numDocs = atLeast(150);
final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
Set<String> randomTerms = new HashSet<>();
while (randomTerms.size() < numTerms) {
randomTerms.add(TestUtil.randomSimpleString(random()));
}
terms = new ArrayList<>(randomTerms);
final long seed = random().nextLong();
final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
// MockRandomMP randomly wraps the leaf readers which makes merging angry
iwc.setMergePolicy(newTieredMergePolicy());
}
iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
iwc.setIndexSort(sort);
iw = new RandomIndexWriter(new Random(seed), dir, iwc);
iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
for (int i = 0; i < numDocs; ++i) {
final Document doc = randomDocument();
iw.addDocument(doc);
if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
iw.commit();
}
if (random().nextInt(15) == 0) {
final String term = RandomPicks.randomFrom(random(), terms);
iw.deleteDocuments(new Term("s", term));
}
}
if (singleSortedSegment) {
iw.forceMerge(1);
}
else if (random().nextBoolean()) {
iw.forceMerge(FORCE_MERGE_MAX_SEGMENT_COUNT);
}
reader = iw.getReader();
if (reader.numDocs() == 0) {
iw.addDocument(new Document());
reader.close();
reader = iw.getReader();
}
}
/** test we can search for a point with a large number of vertices*/
public void testLargeVertexPolygon() throws Exception {
int numVertices = TEST_NIGHTLY ? TestUtil.nextInt(random(), 200000, 500000) : TestUtil.nextInt(random(), 20000, 50000);
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergeScheduler(new SerialMergeScheduler());
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < numVertices/100) {
iwc.setMaxBufferedDocs(numVertices/100);
}
Directory dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
IndexWriter writer = new IndexWriter(dir, iwc);
// add a random polygon without a hole
Polygon p = GeoTestUtil.createRegularPolygon(0, 90, atLeast(1000000), numVertices);
Document document = new Document();
addPolygonsToDoc(FIELDNAME, document, p);
writer.addDocument(document);
// add a random polygon with a hole
Polygon inner = new Polygon(new double[] {-1d, -1d, 1d, 1d, -1d},
new double[] {-91d, -89d, -89d, -91.0, -91.0});
Polygon outer = GeoTestUtil.createRegularPolygon(0, -90, atLeast(1000000), numVertices);
document = new Document();
addPolygonsToDoc(FIELDNAME, document, new Polygon(outer.getPolyLats(), outer.getPolyLons(), inner));
writer.addDocument(document);
////// search /////
// search an intersecting bbox
IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = newSearcher(reader);
Query q = newRectQuery(FIELDNAME, -1d, 1d, p.minLon, p.maxLon);
assertEquals(1, searcher.count(q));
// search a disjoint bbox
q = newRectQuery(FIELDNAME, p.minLat-1d, p.minLat+1, p.minLon-1d, p.minLon+1d);
assertEquals(0, searcher.count(q));
// search a bbox in the hole
q = newRectQuery(FIELDNAME, inner.minLat + 1e-6, inner.maxLat - 1e-6, inner.minLon + 1e-6, inner.maxLon - 1e-6);
assertEquals(0, searcher.count(q));
IOUtils.close(reader, dir);
}
@Test
public void testMulipleCommitsAndReopens() throws IOException {
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
conf.setMergeScheduler(new SerialMergeScheduler());
TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
mergePolicy.setUseCompoundFile(false);
Set<String> fileSet = new TreeSet<String>();
long seed = new Random().nextLong();
System.out.println("Seed:" + seed);
Random random = new Random(seed);
int docCount = 0;
int passes = 10;
byte[] segmentsGenContents = null;
for (int run = 0; run < passes; run++) {
final FastHdfsKeyValueDirectory directory = new FastHdfsKeyValueDirectory(false, _timer, _configuration,
new Path(_path, "test_multiple_commits_reopens"));
if (segmentsGenContents != null) {
byte[] segmentsGenContentsCurrent = readSegmentsGen(directory);
assertTrue(Arrays.equals(segmentsGenContents, segmentsGenContentsCurrent));
}
assertFiles(fileSet, run, -1, directory);
assertEquals(docCount, getDocumentCount(directory));
IndexWriter writer = new IndexWriter(directory, conf.clone());
int numberOfCommits = random.nextInt(100);
for (int i = 0; i < numberOfCommits; i++) {
assertFiles(fileSet, run, i, directory);
addDocuments(writer, random.nextInt(100));
// Before Commit
writer.commit();
// After Commit
// Set files after commit
{
fileSet.clear();
List<IndexCommit> listCommits = DirectoryReader.listCommits(directory);
assertEquals(1, listCommits.size());
IndexCommit indexCommit = listCommits.get(0);
fileSet.addAll(indexCommit.getFileNames());
}
segmentsGenContents = readSegmentsGen(directory);
}
docCount = getDocumentCount(directory);
}
}