下面列出了org.apache.lucene.search.highlight.Scorer#org.apache.lucene.search.highlight.InvalidTokenOffsetsException 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* This method intended for use with
* <code>testHighlightingWithDefaultField()</code>
*/
private String highlightField(Query query, String fieldName,
String text) throws IOException, InvalidTokenOffsetsException {
try (MockAnalyzer mockAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE,true,
MockTokenFilter.ENGLISH_STOPSET); TokenStream tokenStream = mockAnalyzer.tokenStream(fieldName, text)) {
// Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
String rv = highlighter.getBestFragments(tokenStream, text, 1,
"(FIELD TEXT TRUNCATED)");
return rv.length() == 0 ? text : rv;
}
}
public void testHighlightCustomQuery() throws IOException,
InvalidTokenOffsetsException {
String s1 = "I call our world Flatland, not because we call it so,";
// Verify that a query against the default field results in text being
// highlighted
// regardless of the field name.
CustomQuery q = new CustomQuery(new Term(FIELD_NAME, "world"));
String expected = "I call our <B>world</B> Flatland, not because we call it so,";
String observed = highlightField(q, "SOME_FIELD_NAME", s1);
if (VERBOSE)
System.out.println("Expected: \"" + expected + "\n" + "Observed: \""
+ observed);
assertEquals(
"Query in the default field results in text for *ANY* field being highlighted",
expected, observed);
// Verify that a query against a named field does not result in any
// highlighting
// when the query field name differs from the name of the field being
// highlighted,
// which in this example happens to be the default field name.
q = new CustomQuery(new Term("text", "world"));
expected = s1;
observed = highlightField(q, FIELD_NAME, s1);
if (VERBOSE)
System.out.println("Expected: \"" + expected + "\n" + "Observed: \""
+ observed);
assertEquals(
"Query in a named field does not result in highlighting when that field isn't in the query",
s1, highlightField(q, FIELD_NAME, s1));
}
/**
* Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
*
* @param query
* @param analyzer
* @param doc
* @param resultDocument
* @throws IOException
*/
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
// Get 3 best fragments of content and seperate with a "..."
try {
// highlight content
final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);
// if no highlightResult is in content => look in description
if (highlightResult.length() == 0) {
final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
resultDocument.setHighlightingDescription(true);
}
resultDocument.setHighlightResult(highlightResult);
// highlight title
final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
resultDocument.setHighlightTitle(highlightTitle);
} catch (final InvalidTokenOffsetsException e) {
log.warn("", e);
}
}
/**
* Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
*
* @param query
* @param analyzer
* @param doc
* @param resultDocument
* @throws IOException
*/
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
// Get 3 best fragments of content and seperate with a "..."
try {
// highlight content
final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);
// if no highlightResult is in content => look in description
if (highlightResult.length() == 0) {
final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
resultDocument.setHighlightingDescription(true);
}
resultDocument.setHighlightResult(highlightResult);
// highlight title
final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
resultDocument.setHighlightTitle(highlightTitle);
} catch (final InvalidTokenOffsetsException e) {
log.warn("", e);
}
}
/**
* NOTE: This method will not preserve the correct field types.
*
* @param preTag
* @param postTag
*/
public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager,
IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException {
String fieldLessFieldName = fieldManager.getFieldLessFieldName();
Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName);
Analyzer analyzer = fieldManager.getAnalyzerForQuery();
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag);
Document result = new Document();
for (IndexableField f : document) {
String name = f.name();
if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) {
result.add(f);
continue;
}
String text = f.stringValue();
Number numericValue = f.numericValue();
Query fieldFixedQuery;
if (fieldManager.isFieldLessIndexed(name)) {
fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName);
} else {
fieldFixedQuery = fixedQuery;
}
if (numericValue != null) {
if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
String numberHighlight = preTag + text + postTag;
result.add(new StringField(name, numberHighlight, Store.YES));
}
} else {
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
result.add(new StringField(name, frag[j].toString(), Store.YES));
}
}
}
}
return result;
}