下面列出了怎么用org.apache.lucene.index.memory.MemoryIndex的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Search for the user-specified query expression in the current page.
* @throws Exception if an error occurs.
*/
private void search() throws Exception {
final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
final Query q = parser.parse(query);
final MemoryIndex index = new MemoryIndex();
final Link link = new Link(url);
final PageData pageData = new SimpleHttpClientParser().load(link);
index.addField("contents", pageData.getData().toString(), new StandardAnalyzer());
final IndexSearcher searcher = index.createSearcher();
final Hits hits = searcher.search(q);
@SuppressWarnings("rawtypes")
final Iterator it = hits.iterator();
float relevance = 0f;
if (it.hasNext()) {
while (it.hasNext()) {
final Hit hit = (Hit) it.next();
relevance += ((float) Math.round(hit.getScore() * 1000)) / 10;
}
matchedLinks.add(new LinkMatch(url, relevance));
}
}
MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer, MemoryIndex memoryIndex) {
for (IndexableField field : d.getFields()) {
if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
continue;
}
try {
// TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
// like the indexer does
try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
if (tokenStream != null) {
memoryIndex.addField(field.name(), tokenStream, field.boost());
}
}
} catch (IOException e) {
throw new ElasticsearchException("Failed to create token stream", e);
}
}
return memoryIndex;
}
@Override
public void prepare(PercolateContext context, ParsedDocument parsedDocument) {
MemoryIndex memoryIndex = cache.get();
for (IndexableField field : parsedDocument.rootDoc().getFields()) {
if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
continue;
}
try {
Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer();
// TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
// like the indexer does
try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
if (tokenStream != null) {
memoryIndex.addField(field.name(), tokenStream, field.boost());
}
}
} catch (Exception e) {
throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e);
}
}
context.initialize(new DocEngineSearcher(memoryIndex), parsedDocument);
}
private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields)
throws IOException {
/* store document in memory index */
MemoryIndex index = new MemoryIndex(withOffsets);
for (GetField getField : getFields) {
String field = getField.getName();
if (fields.contains(field) == false) {
// some fields are returned even when not asked for, eg. _timestamp
continue;
}
Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
for (Object text : getField.getValues()) {
index.addField(field, text.toString(), analyzer);
}
}
/* and read vectors from it */
return MultiFields.getFields(index.createSearcher().getIndexReader());
}
@Inject
public PercolatorService(Settings settings, IndexNameExpressionResolver indexNameExpressionResolver, IndicesService indicesService,
PageCacheRecycler pageCacheRecycler, BigArrays bigArrays,
HighlightPhase highlightPhase, ClusterService clusterService,
AggregationPhase aggregationPhase, ScriptService scriptService,
MappingUpdatedAction mappingUpdatedAction) {
super(settings);
this.indexNameExpressionResolver = indexNameExpressionResolver;
this.parseFieldMatcher = new ParseFieldMatcher(settings);
this.indicesService = indicesService;
this.pageCacheRecycler = pageCacheRecycler;
this.bigArrays = bigArrays;
this.clusterService = clusterService;
this.highlightPhase = highlightPhase;
this.aggregationPhase = aggregationPhase;
this.scriptService = scriptService;
this.mappingUpdatedAction = mappingUpdatedAction;
this.sortParseElement = new SortParseElement();
final long maxReuseBytes = settings.getAsBytesSize("indices.memory.memory_index.size_per_thread", new ByteSizeValue(1, ByteSizeUnit.MB)).bytes();
cache = new CloseableThreadLocal<MemoryIndex>() {
@Override
protected MemoryIndex initialValue() {
// TODO: should we expose payloads as an option? should offsets be turned on always?
return new ExtendedMemoryIndex(true, false, maxReuseBytes);
}
};
single = new SingleDocumentPercolatorIndex(cache);
multi = new MultiDocumentPercolatorIndex(cache);
percolatorTypes = new IntObjectHashMap<>(6);
percolatorTypes.put(countPercolator.id(), countPercolator);
percolatorTypes.put(queryCountPercolator.id(), queryCountPercolator);
percolatorTypes.put(matchPercolator.id(), matchPercolator);
percolatorTypes.put(queryPercolator.id(), queryPercolator);
percolatorTypes.put(scoringPercolator.id(), scoringPercolator);
percolatorTypes.put(topMatchingPercolator.id(), topMatchingPercolator);
}
public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer) {
super(components, analyzer);
boolean storePayloads = components.getPhraseHelper().hasPositionSensitivity(); // might be needed
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
memIndexLeafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
// preFilter for MemoryIndex
preMemIndexFilterAutomaton = buildCombinedAutomaton(components);
}
private SingletonDocumentBatch(Analyzer analyzer, Document doc) {
MemoryIndex memoryindex = new MemoryIndex(true, true);
for (IndexableField field : doc) {
memoryindex.addField(field, analyzer);
}
memoryindex.freeze();
reader = (LeafReader) memoryindex.createSearcher().getIndexReader();
}
public void buildTermVector(int docid) throws IOException {
/*
*/
Set<String> fieldList = new HashSet<>();
fieldList.add("content");
Document doc = reader.document(docid, fieldList);
MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer());
IndexReader mr = mi.createSearcher().getIndexReader();
Terms t = mr.leaves().get(0).reader().terms("content");
if ((t != null) && (t.size()>0)) {
TermsEnum te = t.iterator();
BytesRef term = null;
System.out.println(t.size());
while ((term = te.next()) != null) {
System.out.println("BytesRef: " + term.utf8ToString());
System.out.println("docFreq: " + te.docFreq());
System.out.println("totalTermFreq: " + te.totalTermFreq());
}
}
}
@Test
public void testRewrite() throws IOException {
MemoryIndex memoryIndex = new MemoryIndex();
TaggedQuery taggedQuery = new TaggedQuery(new TermQuery(new Term("field", "value")), "tag");
Query rewrittenQuery = taggedQuery.rewrite(memoryIndex.createSearcher().getTopReaderContext().reader());
assertTrue(rewrittenQuery instanceof TermQuery);
assertEquals("field", ((TermQuery) rewrittenQuery).getTerm().field());
assertEquals("value", ((TermQuery) rewrittenQuery).getTerm().text());
}
MultiDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) {
this.cache = cache;
}
private DocSearcher(IndexSearcher searcher, MemoryIndex rootDocMemoryIndex) {
super("percolate", searcher);
this.rootDocMemoryIndex = rootDocMemoryIndex;
}
SingleDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) {
this.cache = cache;
}
public DocEngineSearcher(MemoryIndex memoryIndex) {
super("percolate", memoryIndex.createSearcher());
this.memoryIndex = memoryIndex;
}
public void testQueryBuilder() throws IOException {
IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
Presearcher presearcher = createPresearcher();
Directory dir = new ByteBuffersDirectory();
IndexWriter writer = new IndexWriter(dir, iwc);
MonitorConfiguration config = new MonitorConfiguration(){
@Override
public IndexWriter buildIndexWriter() {
return writer;
}
};
try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) {
monitor.register(new MonitorQuery("1", parse("f:test")));
try (IndexReader reader = DirectoryReader.open(writer, false, false)) {
MemoryIndex mindex = new MemoryIndex();
mindex.addField("f", "this is a test document", WHITESPACE);
LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader();
QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader);
BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter);
BooleanQuery expected = new BooleanQuery.Builder()
.add(should(new BooleanQuery.Builder()
.add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_0", new BytesRef("test")))).build()))
.add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_1", new BytesRef("test")))).build()))
.add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_2", new BytesRef("test")))).build()))
.add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_3", new BytesRef("test")))).build()))
.build()))
.add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__"))))
.build();
assertEquals(expected, q);
}
}
}
public void testQueryBuilder() throws IOException {
Presearcher presearcher = createPresearcher();
IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
Directory dir = new ByteBuffersDirectory();
IndexWriter writer = new IndexWriter(dir, iwc);
MonitorConfiguration config = new MonitorConfiguration(){
@Override
public IndexWriter buildIndexWriter() {
return writer;
}
};
try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) {
monitor.register(new MonitorQuery("1", parse("f:test")));
try (IndexReader reader = DirectoryReader.open(writer, false, false)) {
MemoryIndex mindex = new MemoryIndex();
mindex.addField("f", "this is a test document", WHITESPACE);
mindex.addField("g", "#######", ANALYZER); // analyzes away to empty field
LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader();
QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader);
BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter);
BooleanQuery expected = new BooleanQuery.Builder()
.add(should(new BooleanQuery.Builder()
.add(should(new TermInSetQuery("f", new BytesRef("test")))).build()))
.add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__"))))
.build();
assertEquals(expected, q);
}
}
}
public Map<String, Map<String, List<Integer>>> buildTermVectorWithPosition(int docid, Set<String> fields) throws IOException {
Map<String, Map<String, List<Integer>>> fieldToTermVector = new HashMap<>();
Document doc = reader.document(docid, fields);
MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer());
IndexReader mr = mi.createSearcher().getIndexReader();
for (LeafReaderContext leafContext : mr.leaves()) {
LeafReader leaf = leafContext.reader();
for (String field : fields) {
Map<String, List<Integer>> termToPositions = new HashMap<>();
Terms t = leaf.terms(field);
if(t != null) {
fieldToTermVector.put(field, termToPositions);
TermsEnum tenum = t.iterator();
BytesRef termBytes = null;
PostingsEnum postings = null;
while ((termBytes = tenum.next()) != null) {
List<Integer> positions = new ArrayList<>();
termToPositions.put(termBytes.utf8ToString(), positions);
postings = tenum.postings(postings);
postings.advance(0);
for (int i = 0; i < postings.freq(); i++) {
positions.add(postings.nextPosition());
}
}
}
}
}
return fieldToTermVector;
}