下面列出了怎么用org.apache.lucene.search.highlight.SimpleHTMLFormatter的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* This method intended for use with
* <code>testHighlightingWithDefaultField()</code>
*/
private String highlightField(Query query, String fieldName,
String text) throws IOException, InvalidTokenOffsetsException {
try (MockAnalyzer mockAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE,true,
MockTokenFilter.ENGLISH_STOPSET); TokenStream tokenStream = mockAnalyzer.tokenStream(fieldName, text)) {
// Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
String rv = highlighter.getBestFragments(tokenStream, text, 1,
"(FIELD TEXT TRUNCATED)");
return rv.length() == 0 ? text : rv;
}
}
private List<LumongoHighlighter> getHighlighterList(List<HighlightRequest> highlightRequests, Query q) {
if (highlightRequests.isEmpty()) {
return Collections.emptyList();
}
List<LumongoHighlighter> highlighterList = new ArrayList<>();
for (HighlightRequest highlight : highlightRequests) {
QueryScorer queryScorer = new QueryScorer(q, highlight.getField());
queryScorer.setExpandMultiTermQuery(true);
Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer, highlight.getFragmentLength());
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(highlight.getPreTag(), highlight.getPostTag());
LumongoHighlighter highlighter = new LumongoHighlighter(simpleHTMLFormatter, queryScorer, highlight);
highlighter.setTextFragmenter(fragmenter);
highlighterList.add(highlighter);
}
return highlighterList;
}
public static void search(String indexDir, String q) throws Exception {
Directory dir = FSDirectory.open(Paths.get(indexDir));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
// Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
QueryParser parser = new QueryParser("desc", analyzer);
Query query = parser.parse(q);
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");
QueryScorer scorer = new QueryScorer(query);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
highlighter.setTextFragmenter(fragmenter);
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("city"));
System.out.println(doc.get("desc"));
String desc = doc.get("desc");
if (desc != null) {
TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc));
System.out.println(highlighter.getBestFragment(tokenStream, desc));
}
}
reader.close();
}
@Override
@Deprecated
public void highlight(IRI property) {
Formatter formatter = new SimpleHTMLFormatter(SearchFields.HIGHLIGHTER_PRE_TAG,
SearchFields.HIGHLIGHTER_POST_TAG);
highlighter = new Highlighter(formatter, new QueryScorer(query));
}
@Override
public Formatter getFormatter(String fieldName, SolrParams params )
{
numRequests.inc();
params = SolrParams.wrapDefaults(params, defaults);
return new SimpleHTMLFormatter(
params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "<em>" ),
params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "</em>"));
}
/**
* Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
*
* @param query
* @param analyzer
* @param doc
* @param resultDocument
* @throws IOException
*/
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
// Get 3 best fragments of content and seperate with a "..."
try {
// highlight content
final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);
// if no highlightResult is in content => look in description
if (highlightResult.length() == 0) {
final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
resultDocument.setHighlightingDescription(true);
}
resultDocument.setHighlightResult(highlightResult);
// highlight title
final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
resultDocument.setHighlightTitle(highlightTitle);
} catch (final InvalidTokenOffsetsException e) {
log.warn("", e);
}
}
/**
* Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
*
* @param query
* @param analyzer
* @param doc
* @param resultDocument
* @throws IOException
*/
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
// Get 3 best fragments of content and seperate with a "..."
try {
// highlight content
final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);
// if no highlightResult is in content => look in description
if (highlightResult.length() == 0) {
final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
resultDocument.setHighlightingDescription(true);
}
resultDocument.setHighlightResult(highlightResult);
// highlight title
final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
resultDocument.setHighlightTitle(highlightTitle);
} catch (final InvalidTokenOffsetsException e) {
log.warn("", e);
}
}
/**
* NOTE: This method will not preserve the correct field types.
*
* @param preTag
* @param postTag
*/
public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager,
IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException {
String fieldLessFieldName = fieldManager.getFieldLessFieldName();
Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName);
Analyzer analyzer = fieldManager.getAnalyzerForQuery();
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag);
Document result = new Document();
for (IndexableField f : document) {
String name = f.name();
if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) {
result.add(f);
continue;
}
String text = f.stringValue();
Number numericValue = f.numericValue();
Query fieldFixedQuery;
if (fieldManager.isFieldLessIndexed(name)) {
fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName);
} else {
fieldFixedQuery = fixedQuery;
}
if (numericValue != null) {
if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
String numberHighlight = preTag + text + postTag;
result.add(new StringField(name, numberHighlight, Store.YES));
}
} else {
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
result.add(new StringField(name, frag[j].toString(), Store.YES));
}
}
}
}
return result;
}
@Override
@PostStatusFilter
public Page<PostVO> search(Pageable pageable, String term) throws Exception {
FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
QueryBuilder builder = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Post.class).get();
Query luceneQuery = builder
.keyword()
.fuzzy()
.withEditDistanceUpTo(1)
.withPrefixLength(1)
.onFields("title", "summary", "tags")
.matching(term).createQuery();
FullTextQuery query = fullTextEntityManager.createFullTextQuery(luceneQuery, Post.class);
query.setFirstResult((int) pageable.getOffset());
query.setMaxResults(pageable.getPageSize());
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
QueryScorer scorer = new QueryScorer(luceneQuery);
Highlighter highlighter = new Highlighter(formatter, scorer);
List<Post> list = query.getResultList();
List<PostVO> rets = list.stream().map(po -> {
PostVO post = BeanMapUtils.copy(po);
try {
// 处理高亮
String title = highlighter.getBestFragment(analyzer, "title", post.getTitle());
String summary = highlighter.getBestFragment(analyzer, "summary", post.getSummary());
if (StringUtils.isNotEmpty(title)) {
post.setTitle(title);
}
if (StringUtils.isNotEmpty(summary)) {
post.setSummary(summary);
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}
return post;
}).collect(Collectors.toList());
buildUsers(rets);
return new PageImpl<>(rets, pageable, query.getResultSize());
}
public static void main(String[] args) throws Exception{
ApplicationContext applicationContext=new ClassPathXmlApplicationContext("applicationContext.xml");
SessionFactory sessionFactory = applicationContext.getBean("hibernate4sessionFactory",SessionFactory.class);
FullTextSession fullTextSession = Search.getFullTextSession(sessionFactory.openSession());
//使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name
// QueryBuilder qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get();
// Query luceneQuery = qb.keyword().onFields("name","description","authors.name").matching("移动互联网").createQuery();
//使用lucene api查询 从多个字段匹配 name、description、authors.name
//使用庖丁分词器
MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36, new String[]{"name","description","authors.name"}, new PaodingAnalyzer());
Query luceneQuery=queryParser.parse("实战");
FullTextQuery fullTextQuery =fullTextSession.createFullTextQuery(luceneQuery, Book.class);
//设置每页显示多少条
fullTextQuery.setMaxResults(5);
//设置当前页
fullTextQuery.setFirstResult(0);
//高亮设置
SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "<font/></b>");
QueryScorer queryScorer=new QueryScorer(luceneQuery);
Highlighter highlighter=new Highlighter(formatter, queryScorer);
@SuppressWarnings("unchecked")
List<Book> resultList = fullTextQuery.list();
System.out.println("共查找到["+resultList.size()+"]条记录");
for (Book book : resultList) {
String highlighterString=null;
Analyzer analyzer=new PaodingAnalyzer();
try {
//高亮name
highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName());
if(highlighterString!=null){
book.setName(highlighterString);
}
//高亮authors.name
Set<Author> authors = book.getAuthors();
for (Author author : authors) {
highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName());
if(highlighterString!=null){
author.setName(highlighterString);
}
}
//高亮description
highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription());
if(highlighterString!=null){
book.setDescription(highlighterString);
}
} catch (Exception e) {
}
System.out.println("书名:"+book.getName()+"\n描述:"+book.getDescription()+"\n出版日期:"+book.getPublicationDate());
System.out.println("----------------------------------------------------------");
}
fullTextSession.close();
sessionFactory.close();
}
@Override
public QueryResult<Book> query(String keyword, int start, int pagesize,Analyzer analyzer,String...field) throws Exception{
QueryResult<Book> queryResult=new QueryResult<Book>();
List<Book> books=new ArrayList<Book>();
FullTextSession fullTextSession = Search.getFullTextSession(getSession());
//使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name
//QueryBuilder qb = fullTextSession.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get();
//Query luceneQuery = qb.keyword().onFields(field).matching(keyword).createQuery();
//使用lucene api查询 从多个字段匹配 name、description、authors.name
MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"name","description","authors.name"}, analyzer);
Query luceneQuery=queryParser.parse(keyword);
FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(luceneQuery);
int searchresultsize = fullTextQuery.getResultSize();
queryResult.setSearchresultsize(searchresultsize);
System.out.println("共查找到["+searchresultsize+"]条记录");
fullTextQuery.setFirstResult(start);
fullTextQuery.setMaxResults(pagesize);
//设置按id排序
fullTextQuery.setSort(new Sort(new SortField("id", SortField.INT ,true)));
//高亮设置
SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
QueryScorer queryScorer=new QueryScorer(luceneQuery);
Highlighter highlighter=new Highlighter(formatter, queryScorer);
@SuppressWarnings("unchecked")
List<Book> tempresult = fullTextQuery.list();
for (Book book : tempresult) {
String highlighterString=null;
try {
//高亮name
highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName());
if(highlighterString!=null){
book.setName(highlighterString);
}
//高亮authors.name
Set<Author> authors = book.getAuthors();
for (Author author : authors) {
highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName());
if(highlighterString!=null){
author.setName(highlighterString);
}
}
//高亮description
highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription());
if(highlighterString!=null){
book.setDescription(highlighterString);
}
} catch (Exception e) {
}
books.add(book);
System.out.println("书名:"+book.getName()+"\n描述:"+book.getDescription()+"\n出版日期:"+book.getPublicationDate());
System.out.println("----------------------------------------------------------");
}
queryResult.setSearchresult(books);
return queryResult;
}
public static void main(String[] args) throws Exception{
ApplicationContext applicationContext=new ClassPathXmlApplicationContext("applicationContext.xml");
EntityManagerFactory entityManagerFactory = applicationContext.getBean("entityManagerFactory",EntityManagerFactory.class);
FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManagerFactory.createEntityManager());
//使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name
// QueryBuilder qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get();
// Query luceneQuery = qb.keyword().onFields("name","description","authors.name").matching("移动互联网").createQuery();
//使用lucene api查询 从多个字段匹配 name、description、authors.name
//使用庖丁分词器
MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36, new String[]{"name","description","authors.name"}, new PaodingAnalyzer());
Query luceneQuery=queryParser.parse("实战");
FullTextQuery fullTextQuery =fullTextEntityManager.createFullTextQuery(luceneQuery, Book.class);
//设置每页显示多少条
fullTextQuery.setMaxResults(5);
//设置当前页
fullTextQuery.setFirstResult(0);
//高亮设置
SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "<font/></b>");
QueryScorer queryScorer=new QueryScorer(luceneQuery);
Highlighter highlighter=new Highlighter(formatter, queryScorer);
@SuppressWarnings("unchecked")
List<Book> resultList = fullTextQuery.getResultList();
for (Book book : resultList) {
String highlighterString=null;
Analyzer analyzer=new PaodingAnalyzer();
try {
//高亮name
highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName());
if(highlighterString!=null){
book.setName(highlighterString);
}
//高亮authors.name
Set<Author> authors = book.getAuthors();
for (Author author : authors) {
highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName());
if(highlighterString!=null){
author.setName(highlighterString);
}
}
//高亮description
highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription());
if(highlighterString!=null){
book.setDescription(highlighterString);
}
} catch (Exception e) {
}
}
fullTextEntityManager.close();
entityManagerFactory.close();
}
@Override
public QueryResult<Book> query(String keyword, int start, int pagesize,Analyzer analyzer,String...field) throws Exception{
QueryResult<Book> queryResult=new QueryResult<Book>();
List<Book> books=new ArrayList<Book>();
FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(em);
//使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name
//QueryBuilder qb = fullTextSession.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get();
//Query luceneQuery = qb.keyword().onFields(field).matching(keyword).createQuery();
//使用lucene api查询 从多个字段匹配 name、description、authors.name
MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"name","description","authors.name"}, analyzer);
Query luceneQuery=queryParser.parse(keyword);
FullTextQuery fullTextQuery = fullTextEntityManager.createFullTextQuery(luceneQuery);
int searchresultsize = fullTextQuery.getResultSize();
queryResult.setSearchresultsize(searchresultsize);
fullTextQuery.setFirstResult(start);
fullTextQuery.setMaxResults(pagesize);
//设置按id排序
fullTextQuery.setSort(new Sort(new SortField("id", SortField.INT ,true)));
//高亮设置
SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
QueryScorer queryScorer=new QueryScorer(luceneQuery);
Highlighter highlighter=new Highlighter(formatter, queryScorer);
@SuppressWarnings("unchecked")
List<Book> tempresult = fullTextQuery.getResultList();
for (Book book : tempresult) {
String highlighterString=null;
try {
//高亮name
highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName());
if(highlighterString!=null){
book.setName(highlighterString);
}
//高亮authors.name
Set<Author> authors = book.getAuthors();
for (Author author : authors) {
highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName());
if(highlighterString!=null){
author.setName(highlighterString);
}
}
//高亮description
highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription());
if(highlighterString!=null){
book.setDescription(highlighterString);
}
} catch (Exception e) {
}
books.add(book);
}
queryResult.setSearchresult(books);
return queryResult;
}