下面列出了org.apache.lucene.index.IndexWriter#forceMerge ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
protected void indexRandomShapes(IndexWriter w, Object... shapes) throws Exception {
Set<Integer> deleted = new HashSet<>();
for (int id = 0; id < shapes.length; ++id) {
Document doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (shapes[id] != null) {
addShapeToDoc(FIELD_NAME, doc, shapes[id]);
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", ""+idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (randomBoolean()) {
w.forceMerge(1);
}
}
/**
* Sets up a RAM-resident Directory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
* and analyzes them using the PayloadAnalyzer
* @param similarity The Similarity class to use in the Searcher
* @param numDocs The num docs to add
* @return An IndexSearcher
*/
// TODO: randomize
public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
Directory directory = new MockDirectoryWrapper(random, new ByteBuffersDirectory());
PayloadAnalyzer analyzer = new PayloadAnalyzer();
// TODO randomize this
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
analyzer).setSimilarity(similarity));
// writer.infoStream = System.out;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES));
doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES));
doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES));
writer.addDocument(doc);
}
writer.forceMerge(1);
reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = LuceneTestCase.newSearcher(LuceneTestCase.getOnlyLeafReader(reader));
searcher.setSimilarity(similarity);
return searcher;
}
public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception {
int[] freq = new int[nTerms];
Term[] terms = new Term[nTerms];
for (int i=0; i<nTerms; i++) {
int f = (nTerms+1)-i; // make first terms less frequent
freq[i] = (int)Math.ceil(Math.pow(f,power));
terms[i] = new Term("f",Character.toString((char)('A'+i)));
}
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
for (int i=0; i<nDocs; i++) {
Document d = new Document();
for (int j=0; j<nTerms; j++) {
if (random().nextInt(freq[j]) == 0) {
d.add(newStringField("f", terms[j].text(), Field.Store.NO));
//System.out.println(d);
}
}
iw.addDocument(d);
}
iw.forceMerge(1);
iw.close();
}
/** test that we throw exception on multi-valued field, creates corrupt reader, use SORTED_SET instead */
public void testMultiValuedField() throws IOException {
Directory indexStore = newDirectory();
IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(new MockAnalyzer(random())));
for(int i=0; i<5; i++) {
Document doc = new Document();
doc.add(new StringField("string", "a"+i, Field.Store.NO));
doc.add(new StringField("string", "b"+i, Field.Store.NO));
writer.addDocument(doc);
}
writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases
writer.close();
Sort sort = new Sort(
new SortField("string", SortField.Type.STRING),
SortField.FIELD_DOC);
IndexReader reader = UninvertingReader.wrap(DirectoryReader.open(indexStore),
Collections.singletonMap("string", Type.SORTED));
IndexSearcher searcher = new IndexSearcher(reader);
expectThrows(IllegalStateException.class, () -> {
searcher.search(new MatchAllDocsQuery(), 500, sort);
});
reader.close();
indexStore.close();
}
/** Make sure the final sub-block(s) are not skipped. */
public void testFinalBlock() throws Exception {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
for(int i=0;i<25;i++) {
Document doc = new Document();
doc.add(newStringField("field", Character.toString((char) (97+i)), Field.Store.NO));
doc.add(newStringField("field", "z" + Character.toString((char) (97+i)), Field.Store.NO));
w.addDocument(doc);
}
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
assertEquals(1, r.leaves().size());
FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
// We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
Stats stats = field.getStats();
assertEquals(0, stats.floorBlockCount);
assertEquals(2, stats.nonFloorBlockCount);
r.close();
w.close();
d.close();
}
public void testCustomMergeScheduler() throws Exception {
// we don't really need to execute anything, just to make sure the custom MS
// compiles. But ensure that it can be used as well, e.g., no other hidden
// dependencies or something. Therefore, don't use any random API !
Directory dir = new ByteBuffersDirectory();
IndexWriterConfig conf = new IndexWriterConfig(null);
conf.setMergeScheduler(new ReportingMergeScheduler());
IndexWriter writer = new IndexWriter(dir, conf);
writer.addDocument(new Document());
writer.commit(); // trigger flush
writer.addDocument(new Document());
writer.commit(); // trigger flush
writer.forceMerge(1);
writer.close();
dir.close();
}
@Nightly
public void testInversePointRange() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
final int numDims = TestUtil.nextInt(random(), 1, 3);
final int numDocs = atLeast(10 * BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE); // we need multiple leaves to enable this optimization
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
int[] values = new int[numDims];
Arrays.fill(values, i);
doc.add(new IntPoint("f", values));
w.addDocument(doc);
}
w.forceMerge(1);
IndexReader r = DirectoryReader.open(w);
w.close();
IndexSearcher searcher = newSearcher(r);
int[] low = new int[numDims];
int[] high = new int[numDims];
Arrays.fill(high, numDocs - 2);
assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
Arrays.fill(low, 1);
assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
Arrays.fill(high, numDocs - 1);
assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
Arrays.fill(low, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE + 1);
assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
Arrays.fill(high, numDocs - BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
r.close();
dir.close();
}
@Override
public void execute(Namespace args, PrintStream out) throws Exception {
int count = args.getInt("num");
IndexWriter writer = ctx.getIndexWriter();
if (writer != null) {
writer.forceMerge(count, true);
writer.commit();
ctx.refreshReader();
}
else {
out.println("unable to open index writer, index is in readonly mode");
}
}
private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
conf.setRAMBufferSizeMB(-1);
conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
IndexWriter writer = new IndexWriter(dir, conf);
Document doc = new Document();
Field storedField = newStringField("stored", "", Field.Store.YES);
Field dvField = new NumericDocValuesField("dv", 0);
doc.add(storedField);
doc.add(dvField);
final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
final LongSupplier longs = blocksOfVariousBPV();
for (int i = 0; i < numDocs; i++) {
if (random().nextDouble() > density) {
writer.addDocument(new Document());
continue;
}
long value = longs.getAsLong();
storedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.close();
// compare
assertDVIterate(dir);
assertDVAdvance(dir, 1); // Tests all jump-lengths from 1 to maxDoc (quite slow ~= 1 minute for 200K docs)
dir.close();
}
@BeforeClass
public static void beforeClass() throws Exception {
NUM_DOCS = atLeast(500);
NUM_ORDS = atLeast(2);
directory = newDirectory();
IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
unicodeStrings = new String[NUM_DOCS];
multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
if (VERBOSE) {
System.out.println("TEST: setUp");
}
for (int i = 0; i < NUM_DOCS; i++){
Document doc = new Document();
doc.add(new LongPoint("theLong", theLong--));
doc.add(new DoublePoint("theDouble", theDouble--));
doc.add(new IntPoint("theInt", theInt--));
doc.add(new FloatPoint("theFloat", theFloat--));
if (i%2 == 0) {
doc.add(new IntPoint("sparse", i));
}
if (i%2 == 0) {
doc.add(new IntPoint("numInt", i));
}
// sometimes skip the field:
if (random().nextInt(40) != 17) {
unicodeStrings[i] = generateString(i);
doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
}
// sometimes skip the field:
if (random().nextInt(10) != 8) {
for (int j = 0; j < NUM_ORDS; j++) {
String newValue = generateString(i);
multiValued[i][j] = new BytesRef(newValue);
doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
}
Arrays.sort(multiValued[i]);
}
writer.addDocument(doc);
}
writer.forceMerge(1); // this test relies on one segment and docid order
IndexReader r = DirectoryReader.open(writer);
assertEquals(1, r.leaves().size());
reader = r.leaves().get(0).reader();
TestUtil.checkReader(reader);
writer.close();
}
public void testUseIndexForSelectiveMultiValueQueries() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()
// relies on costs and PointValues.estimateCost so we need the default codec
.setCodec(TestUtil.getDefaultCodec()));
for (int i = 0; i < 2000; ++i) {
Document doc = new Document();
if (i < 1000) {
doc.add(new StringField("f1", "bar", Store.NO));
for (int j =0; j < 500; j++) {
doc.add(new LongPoint("f2", 42L));
doc.add(new SortedNumericDocValuesField("f2", 42L));
}
} else if (i == 1001) {
doc.add(new StringField("f1", "foo", Store.NO));
doc.add(new LongPoint("f2", 2L));
doc.add(new SortedNumericDocValuesField("f2", 42L));
} else {
doc.add(new StringField("f1", "bar", Store.NO));
for (int j =0; j < 100; j++) {
doc.add(new LongPoint("f2", 2L));
doc.add(new SortedNumericDocValuesField("f2", 2L));
}
}
w.addDocument(doc);
}
w.forceMerge(1);
IndexReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCache(null);
// The term query is less selective, so the IndexOrDocValuesQuery should use points
final Query q1 = new BooleanQuery.Builder()
.add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
.add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), SortedNumericDocValuesField.newSlowRangeQuery("f2", 2L, 2L)), Occur.MUST)
.build();
final Weight w1 = searcher.createWeight(searcher.rewrite(q1), ScoreMode.COMPLETE, 1);
final Scorer s1 = w1.scorer(searcher.getIndexReader().leaves().get(0));
assertNull(s1.twoPhaseIterator()); // means we use points
// The term query is less selective, so the IndexOrDocValuesQuery should use points
final Query q2 = new BooleanQuery.Builder()
.add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
.add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST)
.build();
final Weight w2 = searcher.createWeight(searcher.rewrite(q2), ScoreMode.COMPLETE, 1);
final Scorer s2 = w2.scorer(searcher.getIndexReader().leaves().get(0));
assertNull(s2.twoPhaseIterator()); // means we use points
// The term query is more selective, so the IndexOrDocValuesQuery should use doc values
final Query q3 = new BooleanQuery.Builder()
.add(new TermQuery(new Term("f1", "foo")), Occur.MUST)
.add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST)
.build();
final Weight w3 = searcher.createWeight(searcher.rewrite(q3), ScoreMode.COMPLETE, 1);
final Scorer s3 = w3.scorer(searcher.getIndexReader().leaves().get(0));
assertNotNull(s3.twoPhaseIterator()); // means we use doc values
reader.close();
w.close();
dir.close();
}
@Test
public void testOpenIfChangedMergedSegment() throws Exception {
// test openIfChanged() when all index segments were merged - used to be
// a bug in ParentArray, caught by testOpenIfChangedManySegments - only
// this test is not random
Directory dir = newDirectory();
// hold onto IW to forceMerge
// note how we don't close it, since DTW will close it.
final IndexWriter iw = new IndexWriter(dir,
new IndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(new LogByteSizeMergePolicy()));
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
@Override
protected IndexWriter openIndexWriter(Directory directory,
IndexWriterConfig config) throws IOException {
return iw;
}
};
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
assertEquals(1, reader.getSize());
assertEquals(1, reader.getParallelTaxonomyArrays().parents().length);
// add category and call forceMerge -- this should flush IW and merge segments down to 1
// in ParentArray.initFromReader, this used to fail assuming there are no parents.
writer.addCategory(new FacetLabel("1"));
iw.forceMerge(1);
// now calling openIfChanged should trip on the bug
TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
assertNotNull(newtr);
reader.close();
reader = newtr;
assertEquals(2, reader.getSize());
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
reader.close();
writer.close();
dir.close();
}
@Nightly
public void testSortedSetAroundBlockSize() throws IOException {
final int frontier = 1 << Lucene80DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
for (int maxDoc = frontier - 1; maxDoc <= frontier + 1; ++maxDoc) {
final Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
ByteBuffersDataOutput out = new ByteBuffersDataOutput();
Document doc = new Document();
SortedSetDocValuesField field1 = new SortedSetDocValuesField("sset", new BytesRef());
doc.add(field1);
SortedSetDocValuesField field2 = new SortedSetDocValuesField("sset", new BytesRef());
doc.add(field2);
for (int i = 0; i < maxDoc; ++i) {
BytesRef s1 = new BytesRef(TestUtil.randomSimpleString(random(), 2));
BytesRef s2 = new BytesRef(TestUtil.randomSimpleString(random(), 2));
field1.setBytesValue(s1);
field2.setBytesValue(s2);
w.addDocument(doc);
Set<BytesRef> set = new TreeSet<>(Arrays.asList(s1, s2));
out.writeVInt(set.size());
for (BytesRef ref : set) {
out.writeVInt(ref.length);
out.writeBytes(ref.bytes, ref.offset, ref.length);
}
}
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
w.close();
LeafReader sr = getOnlyLeafReader(r);
assertEquals(maxDoc, sr.maxDoc());
SortedSetDocValues values = sr.getSortedSetDocValues("sset");
assertNotNull(values);
ByteBuffersDataInput in = out.toDataInput();
BytesRefBuilder b = new BytesRefBuilder();
for (int i = 0; i < maxDoc; ++i) {
assertEquals(i, values.nextDoc());
final int numValues = in.readVInt();
for (int j = 0; j < numValues; ++j) {
b.setLength(in.readVInt());
b.grow(b.length());
in.readBytes(b.bytes(), 0, b.length());
assertEquals(b.get(), values.lookupOrd(values.nextOrd()));
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, values.nextOrd());
}
r.close();
dir.close();
}
}
@Nightly
public void testSortedNumericAroundBlockSize() throws IOException {
final int frontier = 1 << Lucene80DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
for (int maxDoc = frontier - 1; maxDoc <= frontier + 1; ++maxDoc) {
final Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
ByteBuffersDataOutput buffer = new ByteBuffersDataOutput();
Document doc = new Document();
SortedNumericDocValuesField field1 = new SortedNumericDocValuesField("snum", 0L);
doc.add(field1);
SortedNumericDocValuesField field2 = new SortedNumericDocValuesField("snum", 0L);
doc.add(field2);
for (int i = 0; i < maxDoc; ++i) {
long s1 = random().nextInt(100);
long s2 = random().nextInt(100);
field1.setLongValue(s1);
field2.setLongValue(s2);
w.addDocument(doc);
buffer.writeVLong(Math.min(s1, s2));
buffer.writeVLong(Math.max(s1, s2));
}
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
w.close();
LeafReader sr = getOnlyLeafReader(r);
assertEquals(maxDoc, sr.maxDoc());
SortedNumericDocValues values = sr.getSortedNumericDocValues("snum");
assertNotNull(values);
ByteBuffersDataInput dataInput = buffer.toDataInput();
for (int i = 0; i < maxDoc; ++i) {
assertEquals(i, values.nextDoc());
assertEquals(2, values.docValueCount());
assertEquals(dataInput.readVLong(), values.nextValue());
assertEquals(dataInput.readVLong(), values.nextValue());
}
r.close();
dir.close();
}
}
private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
conf.setRAMBufferSizeMB(-1);
conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
IndexWriter writer = new IndexWriter(dir, conf);
final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
final LongSupplier values = blocksOfVariousBPV();
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int valueCount = (int) counts.getAsLong();
long valueArray[] = new long[valueCount];
for (int j = 0; j < valueCount; j++) {
long value = values.getAsLong();
valueArray[j] = value;
doc.add(new SortedNumericDocValuesField("dv", value));
}
Arrays.sort(valueArray);
for (int j = 0; j < valueCount; j++) {
doc.add(new StoredField("stored", Long.toString(valueArray[j])));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
writer.forceMerge(1);
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
for (int i = 0; i < r.maxDoc(); i++) {
if (i > docValues.docID()) {
docValues.nextDoc();
}
String expected[] = r.document(i).getValues("stored");
if (i < docValues.docID()) {
assertEquals(0, expected.length);
} else {
String actual[] = new String[docValues.docValueCount()];
for (int j = 0; j < actual.length; j++) {
actual[j] = Long.toString(docValues.nextValue());
}
assertArrayEquals(expected, actual);
}
}
}
ir.close();
dir.close();
}
public void testMaxBlock() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
ft.setTokenized(true);
ft.freeze();
for (int i = 0; i < 1024; i++) {
// create documents with an increasing number of As and one B
Document doc = new Document();
doc.add(new Field("foo", new TermFreqTokenStream("a", i+1), ft));
if (random().nextFloat() < 0.5f) {
doc.add(new Field("foo", new TermFreqTokenStream("b", 1), ft));
}
w.addDocument(doc);
}
w.forceMerge(1);
w.close();
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(new TestSimilarity.SimpleSimilarity());
// freq == score
// searcher.setSimilarity(new TestSimilarity.SimpleSimilarity());
final Query reqQ = new TermQuery(new Term("foo", "a"));
final Query optQ = new TermQuery(new Term("foo", "b"));
final Query boolQ = new BooleanQuery.Builder()
.add(reqQ, Occur.MUST)
.add(optQ, Occur.SHOULD)
.build();
Scorer actual = reqOptScorer(searcher, reqQ, optQ, true);
Scorer expected = searcher
.createWeight(boolQ, ScoreMode.COMPLETE, 1)
.scorer(searcher.getIndexReader().leaves().get(0));
actual.setMinCompetitiveScore(Math.nextUp(1));
// Checks that all blocks are fully visited
for (int i = 0; i < 1024; i++) {
assertEquals(i, actual.iterator().nextDoc());
assertEquals(i, expected.iterator().nextDoc());
assertEquals(actual.score(),expected.score(), 0);
}
reader.close();
dir.close();
}
public void testPerFieldCodec() throws Exception {
final int NUM_DOCS = atLeast(173);
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
}
BaseDirectoryWrapper dir = newDirectory();
dir.setCheckIndexOnClose(false); // we use a custom codec provider
IndexWriter w = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random())).
setCodec(new CustomPerFieldCodec()).
setMergePolicy(newLogMergePolicy(3))
);
Document doc = new Document();
// uses default codec:
doc.add(newTextField("field1", "this field uses the standard codec as the test", Field.Store.NO));
// uses memory codec:
Field field2 = newTextField("field2", "this field uses the memory codec as the test", Field.Store.NO);
doc.add(field2);
Field idField = newStringField("id", "", Field.Store.NO);
doc.add(idField);
for(int i=0;i<NUM_DOCS;i++) {
idField.setStringValue(""+i);
w.addDocument(doc);
if ((i+1)%10 == 0) {
w.commit();
}
}
if (VERBOSE) {
System.out.println("TEST: now delete id=77");
}
w.deleteDocuments(new Term("id", "77"));
IndexReader r = DirectoryReader.open(w);
assertEquals(NUM_DOCS-1, r.numDocs());
IndexSearcher s = newSearcher(r);
assertEquals(NUM_DOCS-1, s.count(new TermQuery(new Term("field1", "standard"))));
assertEquals(NUM_DOCS-1, s.count(new TermQuery(new Term("field2", "memory"))));
r.close();
if (VERBOSE) {
System.out.println("\nTEST: now delete 2nd doc");
}
w.deleteDocuments(new Term("id", "44"));
if (VERBOSE) {
System.out.println("\nTEST: now force merge");
}
w.forceMerge(1);
if (VERBOSE) {
System.out.println("\nTEST: now open reader");
}
r = DirectoryReader.open(w);
assertEquals(NUM_DOCS-2, r.maxDoc());
assertEquals(NUM_DOCS-2, r.numDocs());
s = newSearcher(r);
assertEquals(NUM_DOCS-2, s.count(new TermQuery(new Term("field1", "standard"))));
assertEquals(NUM_DOCS-2, s.count(new TermQuery(new Term("field2", "memory"))));
assertEquals(1, s.count(new TermQuery(new Term("id", "76"))));
assertEquals(0, s.count(new TermQuery(new Term("id", "77"))));
assertEquals(0, s.count(new TermQuery(new Term("id", "44"))));
if (VERBOSE) {
System.out.println("\nTEST: now close NRT reader");
}
r.close();
w.close();
dir.close();
}
@Test
@SuppressWarnings({"unchecked"})
public void testAlternateLocation() throws Exception {
String[] ALT_DOCS = new String[]{
"jumpin jack flash",
"Sargent Peppers Lonely Hearts Club Band",
"Born to Run",
"Thunder Road",
"Londons Burning",
"A Horse with No Name",
"Sweet Caroline"
};
IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
@SuppressWarnings({"rawtypes"})
NamedList spellchecker = new NamedList();
spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
File tmpDir = createTempDir().toFile();
File indexDir = new File(tmpDir, "spellingIdx");
//create a standalone index
File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
Directory dir = newFSDirectory(altIndexDir.toPath());
IndexWriter iw = new IndexWriter(
dir,
new IndexWriterConfig(new WhitespaceAnalyzer())
);
for (int i = 0; i < ALT_DOCS.length; i++) {
Document doc = new Document();
doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
iw.addDocument(doc);
}
iw.forceMerge(1);
iw.close();
dir.close();
indexDir.mkdirs();
spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
SolrCore core = h.getCore();
String dictName = checker.init(spellchecker, core);
assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
h.getCore().withSearcher(searcher -> {
checker.build(core, searcher);
IndexReader reader = searcher.getIndexReader();
Collection<Token> tokens = queryConverter.convert("flesh");
SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("flesh is null and it shouldn't be", suggestions != null);
assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);
//test something not in the spell checker
spellOpts.tokens = queryConverter.convert("super");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions size should be 0", suggestions.size()==0);
spellOpts.tokens = queryConverter.convert("Caroline");
result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
return null;
});
}
public void testMaxScoreSegment() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
for (String[] values : Arrays.asList(
new String[]{ "A" }, // 0
new String[]{ "A" }, // 1
new String[]{ }, // 2
new String[]{ "A", "B" }, // 3
new String[]{ "A" }, // 4
new String[]{ "B" }, // 5
new String[]{ "A", "B" }, // 6
new String[]{ "B" } // 7
)) {
Document doc = new Document();
for (String value : values) {
doc.add(new StringField("foo", value, Store.NO));
}
w.addDocument(doc);
}
w.forceMerge(1);
w.close();
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
final Query reqQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "A")));
final Query optQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
Scorer scorer = reqOptScorer(searcher, reqQ, optQ, false);
assertEquals(0, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(3, scorer.iterator().nextDoc());
assertEquals(2, scorer.score(), 0);
assertEquals(4, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(6, scorer.iterator().nextDoc());
assertEquals(2, scorer.score(), 0);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
scorer = reqOptScorer(searcher, reqQ, optQ, false);
scorer.setMinCompetitiveScore(Math.nextDown(1f));
assertEquals(0, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(3, scorer.iterator().nextDoc());
assertEquals(2, scorer.score(), 0);
assertEquals(4, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(6, scorer.iterator().nextDoc());
assertEquals(2, scorer.score(), 0);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
scorer = reqOptScorer(searcher, reqQ, optQ, false);
scorer.setMinCompetitiveScore(Math.nextUp(1f));
assertEquals(3, scorer.iterator().nextDoc());
assertEquals(2, scorer.score(), 0);
assertEquals(6, scorer.iterator().nextDoc());
assertEquals(2, scorer.score(), 0);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
scorer = reqOptScorer(searcher, reqQ, optQ, true);
scorer.setMinCompetitiveScore(Math.nextUp(2f));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
reader.close();
dir.close();
}
public void testUseIndexForSelectiveQueries() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()
// relies on costs and PointValues.estimateCost so we need the default codec
.setCodec(TestUtil.getDefaultCodec()));
for (int i = 0; i < 2000; ++i) {
Document doc = new Document();
if (i == 42) {
doc.add(new StringField("f1", "bar", Store.NO));
doc.add(new LongPoint("f2", 42L));
doc.add(new NumericDocValuesField("f2", 42L));
} else if (i == 100) {
doc.add(new StringField("f1", "foo", Store.NO));
doc.add(new LongPoint("f2", 2L));
doc.add(new NumericDocValuesField("f2", 2L));
} else {
doc.add(new StringField("f1", "bar", Store.NO));
doc.add(new LongPoint("f2", 2L));
doc.add(new NumericDocValuesField("f2", 2L));
}
w.addDocument(doc);
}
w.forceMerge(1);
IndexReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCache(null);
// The term query is more selective, so the IndexOrDocValuesQuery should use doc values
final Query q1 = new BooleanQuery.Builder()
.add(new TermQuery(new Term("f1", "foo")), Occur.MUST)
.add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), NumericDocValuesField.newSlowRangeQuery("f2", 2L, 2L)), Occur.MUST)
.build();
final Weight w1 = searcher.createWeight(searcher.rewrite(q1), ScoreMode.COMPLETE, 1);
final Scorer s1 = w1.scorer(searcher.getIndexReader().leaves().get(0));
assertNotNull(s1.twoPhaseIterator()); // means we use doc values
// The term query is less selective, so the IndexOrDocValuesQuery should use points
final Query q2 = new BooleanQuery.Builder()
.add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
.add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), NumericDocValuesField.newSlowRangeQuery("f2", 42L, 42L)), Occur.MUST)
.build();
final Weight w2 = searcher.createWeight(searcher.rewrite(q2), ScoreMode.COMPLETE, 1);
final Scorer s2 = w2.scorer(searcher.getIndexReader().leaves().get(0));
assertNull(s2.twoPhaseIterator()); // means we use points
reader.close();
w.close();
dir.close();
}