下面列出了org.apache.lucene.search.SimpleCollector#org.apache.lucene.document.FieldType 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private Document newDocument() {
Document doc = new Document();
for (IndexOptions option : IndexOptions.values()) {
if (option == IndexOptions.NONE) {
continue;
}
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
// turn on tvs for a cross-check, since we rely upon checkindex in this test (for now)
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorPayloads(true);
ft.setIndexOptions(option);
doc.add(new Field(option.toString(), "", ft));
}
return doc;
}
public static String termVectorOptionsToString(FieldType fieldType) {
if (!fieldType.storeTermVectors()) {
return "no";
} else if (!fieldType.storeTermVectorOffsets() && !fieldType.storeTermVectorPositions()) {
return "yes";
} else if (fieldType.storeTermVectorOffsets() && !fieldType.storeTermVectorPositions()) {
return "with_offsets";
} else {
StringBuilder builder = new StringBuilder("with");
if (fieldType.storeTermVectorPositions()) {
builder.append("_positions");
}
if (fieldType.storeTermVectorOffsets()) {
builder.append("_offsets");
}
if (fieldType.storeTermVectorPayloads()) {
builder.append("_payloads");
}
return builder.toString();
}
}
public void testNoAbortOnBadTVSettings() throws Exception {
Directory dir = newDirectory();
// Don't use RandomIndexWriter because we want to be sure both docs go to 1 seg:
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
iw.addDocument(doc);
FieldType ft = new FieldType(StoredField.TYPE);
ft.setStoreTermVectors(true);
ft.freeze();
doc.add(new Field("field", "value", ft));
expectThrows(IllegalArgumentException.class, () -> {
iw.addDocument(doc);
});
IndexReader r = DirectoryReader.open(iw);
// Make sure the exc didn't lose our first document:
assertEquals(1, r.numDocs());
iw.close();
r.close();
dir.close();
}
public static Document Document(File f)
throws java.io.FileNotFoundException {
Document doc = new Document();
doc.add(new StoredField("path", f.getPath()));
doc.add(new StoredField("modified",
DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE)));
//create new FieldType to store term positions (TextField is not sufficiently configurable)
FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
ft.setTokenized(true);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
Field contentsField = new Field("contents", new FileReader(f), ft);
doc.add(contentsField);
return doc;
}
public void testExcIndexingDocBeforeDocValues() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setDocValuesType(DocValuesType.SORTED);
ft.freeze();
Field field = new Field("test", "value", ft);
field.setTokenStream(new TokenStream() {
@Override
public boolean incrementToken() {
throw new RuntimeException("no");
}
});
doc.add(field);
expectThrows(RuntimeException.class, () -> {
w.addDocument(doc);
});
w.addDocument(new Document());
w.close();
dir.close();
}
public void testInfiniteFreq1() throws Exception {
String document = "drug druggy drug drug drug";
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newField("lyrics", document, new FieldType(TextField.TYPE_NOT_STORED)));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term("lyrics", "drug"), 1);
builder.add(new Term("lyrics", "drug"), 3);
builder.setSlop(1);
PhraseQuery pq = builder.build();
// "drug the drug"~1
assertSaneScoring(pq, is);
ir.close();
dir.close();
}
public void testFieldInvertState() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
w.addDocument(doc);
FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
assertEquals(228, fis.getMaxTermFrequency());
assertEquals(2, fis.getUniqueTermCount());
assertEquals(0, fis.getNumOverlap());
assertEquals(287, fis.getLength());
IOUtils.close(w, dir);
}
public void testLegalbutVeryLargeOffsets() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
if (random().nextBoolean()) {
t1.setPayload(new BytesRef("test"));
}
Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
TokenStream tokenStream = new CannedTokenStream(
new Token[] { t1, t2 }
);
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", tokenStream, ft);
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close();
}
private IndexReader indexSomeFields() throws IOException {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.NONE);
ft.setTokenized(false);
ft.setStored(true);
ft.freeze();
Field title = new Field("title", "", fieldType);
Field text = new Field("text", "", fieldType);
Field category = new Field("category", "", fieldType);
Document doc = new Document();
doc.add(title);
doc.add(text);
doc.add(category);
title.setStringValue("This is the title field.");
text.setStringValue("This is the text field. You can put some text if you want.");
category.setStringValue("This is the category field.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
return ir;
}
private IndexReader indexSomeFields() throws IOException {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.NONE);
ft.setTokenized(false);
ft.setStored(true);
ft.freeze();
Field title = new Field("title", "", fieldType);
Field text = new Field("text", "", fieldType);
Field category = new Field("category", "", fieldType);
Document doc = new Document();
doc.add(title);
doc.add(text);
doc.add(category);
title.setStringValue("This is the title field.");
text.setStringValue("This is the text field. You can put some text if you want.");
category.setStringValue("This is the category field.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
return ir;
}
public DocState(boolean reuseFields, FieldType ft, FieldType bodyFt) {
this.reuseFields = reuseFields;
if (reuseFields) {
fields = new HashMap<>();
numericFields = new HashMap<>();
// Initialize the map with the default fields.
fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyFt));
fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", ft));
fields.put(DATE_FIELD, new Field(DATE_FIELD, "", ft));
fields.put(ID_FIELD, new StringField(ID_FIELD, "", Field.Store.YES));
fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft));
numericFields.put(DATE_MSEC_FIELD, new LongPoint(DATE_MSEC_FIELD, 0L));
numericFields.put(TIME_SEC_FIELD, new IntPoint(TIME_SEC_FIELD, 0));
doc = new Document();
} else {
numericFields = null;
fields = null;
doc = null;
}
}
private Document newGeoDocument(OIdentifiable oIdentifiable, Shape shape) {
FieldType ft = new FieldType();
ft.setIndexed(true);
ft.setStored(true);
Document doc = new Document();
doc.add(OLuceneIndexType.createField(RID, oIdentifiable.getIdentity().toString(), Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
for (IndexableField f : strategy.createIndexableFields(shape)) {
doc.add(f);
}
doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));
return doc;
}
public void testIndexingPointsAndDocValues() throws Exception {
FieldType type = new FieldType();
type.setDimensions(1, 4);
type.setDocValuesType(DocValuesType.BINARY);
type.freeze();
Document doc = new Document();
byte[] packedPoint = "term".getBytes(StandardCharsets.UTF_8);
doc.add(new BinaryPoint("field", packedPoint, type));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
assertEquals(1, leafReader.getPointValues("field").size());
assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMinPackedValue());
assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMaxPackedValue());
BinaryDocValues dvs = leafReader.getBinaryDocValues("field");
assertEquals(0, dvs.nextDoc());
assertEquals("term", dvs.binaryValue().utf8ToString());
}
private Document buildDocument(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException {
String textString = text.utf8ToString();
Document doc = new Document();
FieldType ft = getTextFieldType();
doc.add(new Field(TEXT_FIELD_NAME, textString, ft));
if (minPrefixChars>0) {
doc.add(new Field(TEXTGRAMS_FIELD_NAME, textString, ft));
}
doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO));
doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text));
doc.add(new NumericDocValuesField("weight", weight));
if (payload != null) {
doc.add(new BinaryDocValuesField("payloads", payload));
}
if (contexts != null) {
for(BytesRef context : contexts) {
doc.add(new StringField(CONTEXTS_FIELD_NAME, context, Field.Store.NO));
doc.add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context));
}
}
return doc;
}
public void testIllegalVectorPositionsWithoutIndexed() throws Exception {
Directory dir = newDirectory();
MockAnalyzer a = new MockAnalyzer(random());
a.setEnableChecks(false);
RandomIndexWriter w = new RandomIndexWriter(random(), dir, a);
FieldType ft = new FieldType(StoredField.TYPE);
ft.setStoreTermVectorPositions(true);
Document doc = new Document();
doc.add(new Field("field", "value", ft));
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
w.addDocument(doc);
});
assertEquals("cannot store term vector positions for a field that is not indexed (field=\"field\")", expected.getMessage());
w.close();
dir.close();
}
public static Document createDocument(int n, String indexName, int numFields) {
StringBuilder sb = new StringBuilder();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
FieldType customType1 = new FieldType(StringField.TYPE_STORED);
customType1.setStoreTermVectors(true);
customType1.setStoreTermVectorPositions(true);
customType1.setStoreTermVectorOffsets(true);
final Document doc = new Document();
doc.add(new Field("id", Integer.toString(n), customType1));
doc.add(new Field("indexname", indexName, customType1));
sb.append("a");
sb.append(n);
doc.add(new Field("field1", sb.toString(), customType));
sb.append(" b");
sb.append(n);
for (int i = 1; i < numFields; i++) {
doc.add(new Field("field" + (i + 1), sb.toString(), customType));
}
return doc;
}
private void addDocumentBlock(int id, int count, IndexWriter writer) throws IOException {
FieldType fieldType = new FieldType();
fieldType.setIndexed(true);
fieldType.setOmitNorms(true);
fieldType.setTokenized(false);
fieldType.setStored(true);
FieldType fieldTypeNoIndex = new FieldType();
fieldTypeNoIndex.setStored(true);
fieldTypeNoIndex.setIndexed(false);
for (int i = 0; i < count; i++) {
Document document = new Document();
document.add(new Field("id", Integer.toString(id), fieldType));
document.add(new Field("field", Integer.toString(i), fieldType));
for (int j = 0; j < 100; j++) {
document.add(new Field("field" + j, "testing here testing here testing here testing here testing here testing here testing here", fieldTypeNoIndex));
}
writer.addDocument(document);
}
}
@Override
public void configure(String fieldNameForThisInstance, Map<String, String> properties, Configuration configuration) {
String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP);
if (precisionStepStr != null) {
_precisionStep = Integer.parseInt(precisionStepStr);
_typeStored = new FieldType(LongField.TYPE_STORED);
_typeStored.setNumericPrecisionStep(_precisionStep);
_typeStored.freeze();
_typeNotStored = new FieldType(LongField.TYPE_NOT_STORED);
_typeNotStored.setNumericPrecisionStep(_precisionStep);
_typeNotStored.freeze();
} else {
_typeStored = LongField.TYPE_STORED;
_typeNotStored = LongField.TYPE_NOT_STORED;
}
}
public void testNonStringTermHandling() throws IOException {
FieldType ft = new FieldType();
ft.setTokenized(true);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
try (Monitor monitor = newMonitor()) {
monitor.register(new MonitorQuery("1", new TermQuery(new Term("f", NON_STRING_TERM))));
Document doc = new Document();
doc.add(new Field("f", new NonStringTokenStream(), ft));
MatchingQueries<QueryMatch> m = monitor.match(doc, QueryMatch.SIMPLE_MATCHER);
assertEquals(1, m.getMatchCount());
assertEquals(1, m.getQueriesRun());
}
}
@Override
public Iterable<IndexableField> convert(LuceneContext context, String path, Field field, LuceneIndex annotation, Type type, Object data) {
Collection<IndexableField> indexables = new LinkedList<>();
FieldType configuration = new FieldType();
configuration.setIndexOptions(IndexOptions.DOCS);
if (annotation.analyze()) {
configuration.setTokenized(true);
LuceneTerm negative = annotation.negative();
if (negative.offset()) {
configuration.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
} else if (negative.position()) {
configuration.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
} else if (negative.frequency()) {
configuration.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
}
LuceneTerm positive = annotation.positive();
if (positive.offset()) {
configuration.setStoreTermVectorOffsets(true);
}
if (positive.position()) {
configuration.setStoreTermVectorPositions(true);
}
if (positive.frequency()) {
configuration.setStoreTermVectors(true);
}
}
indexables.add(new org.apache.lucene.document.Field(path, (String) data, configuration));
return indexables;
}
@Override
public GeoPointFieldMapper build(BuilderContext context, String simpleName, MappedFieldType fieldType,
MappedFieldType defaultFieldType, Settings indexSettings, ContentPath.Type pathType, DoubleFieldMapper latMapper,
DoubleFieldMapper lonMapper, StringFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
CopyTo copyTo) {
fieldType.setTokenized(false);
if (context.indexCreatedVersion().before(Version.V_2_3_0)) {
fieldType.setNumericPrecisionStep(GeoPointField.PRECISION_STEP);
fieldType.setNumericType(FieldType.NumericType.LONG);
}
setupFieldType(context);
return new GeoPointFieldMapper(simpleName, fieldType, defaultFieldType, indexSettings, pathType, latMapper, lonMapper,
geoHashMapper, multiFields, ignoreMalformed, copyTo);
}
public void testInvalidProx() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
Field field = new Field("field",
new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
new int[] {42, 128, 17, 100}),
fieldType);
doc.add(field);
Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
assertEquals("field \"field\": cannot index positions while using custom TermFrequencyAttribute", e.getMessage());
IOUtils.close(w, dir);
}
public void testSlopWithHoles() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setOmitNorms(true);
Field f = new Field("lyrics", "", customType);
Document doc = new Document();
doc.add(f);
f.setStringValue("drug drug");
iw.addDocument(doc);
f.setStringValue("drug druggy drug");
iw.addDocument(doc);
f.setStringValue("drug druggy druggy drug");
iw.addDocument(doc);
f.setStringValue("drug druggy drug druggy drug");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term("lyrics", "drug"), 1);
builder.add(new Term("lyrics", "drug"), 4);
PhraseQuery pq = builder.build();
// "drug the drug"~1
assertEquals(1, is.search(pq, 4).totalHits.value);
builder.setSlop(1);
pq = builder.build();
assertEquals(3, is.search(pq, 4).totalHits.value);
builder.setSlop(2);
pq = builder.build();
assertEquals(4, is.search(pq, 4).totalHits.value);
ir.close();
dir.close();
}
public void testSimpleHighlightTest() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);
doc.add(field);
writer.addDocument(doc);
FastVectorHighlighter highlighter = new FastVectorHighlighter();
IndexReader reader = DirectoryReader.open(writer);
int docId = 0;
FieldQuery fieldQuery = highlighter.getFieldQuery( new TermQuery(new Term("field", "foo")), reader );
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
// highlighted results are centered
assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
reader.close();
writer.close();
dir.close();
}
public void testFunctionScoreQueryHighlight() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);
doc.add(field);
writer.addDocument(doc);
FastVectorHighlighter highlighter = new FastVectorHighlighter();
IndexReader reader = DirectoryReader.open(writer);
int docId = 0;
FieldQuery fieldQuery = highlighter.getFieldQuery( new FunctionScoreQuery(new TermQuery(new Term("field", "foo")), DoubleValuesSource.constant(1)), reader );
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
// highlighted results are centered
assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
reader.close();
writer.close();
dir.close();
}
public void testEndOffsetPositionCharAnalyzer() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
Field f = newField("field", "abcd ", customType);
doc.add(f);
doc.add(f);
w.addDocument(doc);
w.close();
IndexReader r = DirectoryReader.open(dir);
TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator();
assertNotNull(termsEnum.next());
PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertEquals(2, termsEnum.totalTermFreq());
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
dpEnum.nextPosition();
assertEquals(0, dpEnum.startOffset());
assertEquals(4, dpEnum.endOffset());
dpEnum.nextPosition();
assertEquals(8, dpEnum.startOffset());
assertEquals(12, dpEnum.endOffset());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());
r.close();
dir.close();
}
public void testConcurrentPhrase() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox jumped";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, new TokenStreamConcurrent(), customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final PhraseQuery phraseQuery = new PhraseQuery(FIELD, "fox", "jumped");
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(1, hits.totalHits.value);
final Highlighter highlighter = new Highlighter(
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
new QueryScorer(phraseQuery));
final TokenStream tokenStream =
TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(),
TEXT), highlighter.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
public void testSparsePhraseWithNoPositions() throws IOException, InvalidTokenOffsetsException {
final String TEXT = "the fox did not jump";
final Directory directory = newDirectory();
final IndexWriter indexWriter = new IndexWriter(directory,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, TEXT, customType));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
}
final IndexReader indexReader = DirectoryReader.open(directory);
try {
assertEquals(1, indexReader.numDocs());
final IndexSearcher indexSearcher = newSearcher(indexReader);
final PhraseQuery phraseQuery = new PhraseQuery(1, FIELD, "did", "jump");
TopDocs hits = indexSearcher.search(phraseQuery, 1);
assertEquals(1, hits.totalHits.value);
final Highlighter highlighter = new Highlighter(
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
new QueryScorer(phraseQuery));
final TokenStream tokenStream =
TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
assertEquals("the fox <B>did</B> not <B>jump</B>", highlighter
.getBestFragment(tokenStream, TEXT));
} finally {
indexReader.close();
directory.close();
}
}
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
Predicate<String> readAsBytes = ResultContext.READASBYTES.get();
if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) {
final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions());
Objects.requireNonNull(value, "String value should not be null");
doc.add(new StoredField(fieldInfo.name, value, ft));
} else {
super.stringField(fieldInfo, value);
}
}
private Document createDoc(int id) {
Document doc = new Document();
FieldType ft = new FieldType();
ft.setTokenized(true);
ft.setStored(true);
ft.setIndexOptions(IndexOptions.DOCS);
doc.add(new Field("id", "" + id, ft));
FieldType ft2 = new FieldType();
ft2.setTokenized(true);
ft.setStored(true);
ft2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
doc.add(new Field("name", "echo ensh id " + id, ft2));
return doc;
}