下面列出了org.apache.lucene.search.spell.SuggestMode#org.apache.lucene.analysis.core.WhitespaceAnalyzer 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public void testRamDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
RAMDirectory ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("RamDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RamDirectory search consumes {}ms!", (end - start));
}
public void testMMapDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("MMapDirectory search consumes {}ms!", (end - start));
}
public void testRamDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
RAMDirectory ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("RamDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RamDirectory search consumes {}ms!", (end - start));
}
public void testMMapDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("MMapDirectory search consumes {}ms!", (end - start));
}
private Document createTestNormsDocument(boolean setNormsProp,
boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal)
throws Exception {
Properties props = new Properties();
// Indexing configuration.
props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
props.setProperty("directory", "ByteBuffersDirectory");
if (setNormsProp) {
props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
}
if (setBodyNormsProp) {
props.setProperty("doc.body.tokenized.norms", Boolean.toString(bodyNormsVal));
}
// Create PerfRunData
Config config = new Config(props);
DocMaker dm = new DocMaker();
dm.setConfig(config, new OneDocSource());
return dm.makeDocument();
}
/** Build the example index. */
public void index() throws IOException {
IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(
new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
// Add documents with a fake timestamp, 1000 sec before
// "now", 2000 sec before "now", ...:
for(int i=0;i<100;i++) {
Document doc = new Document();
long then = nowSec - i * 1000;
// Add as doc values field, so we can compute range facets:
doc.add(new NumericDocValuesField("timestamp", then));
// Add as numeric field so we can drill-down:
doc.add(new LongPoint("timestamp", then));
indexWriter.addDocument(doc);
}
// Open near-real-time searcher
searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
indexWriter.close();
}
/**
* Create an index writer.
*
* @param dir - index directory
* @param analyzer - analyser used by the index writer
* @param useCompound - if true, compound index files are used
* @param keepAllCommits - if true, all commit generations are kept
* @param ps - information stream
* @return new index writer
* @throws IOException - if there is a low level IO error.
*/
public static IndexWriter createWriter(Directory dir, Analyzer analyzer, boolean useCompound, boolean keepAllCommits,
PrintStream ps) throws IOException {
Objects.requireNonNull(dir);
IndexWriterConfig config = new IndexWriterConfig(analyzer == null ? new WhitespaceAnalyzer() : analyzer);
config.setUseCompoundFile(useCompound);
if (ps != null) {
config.setInfoStream(ps);
}
if (keepAllCommits) {
config.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
} else {
config.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
}
return new IndexWriter(dir, config);
}
public void testTermStreamWrapping() throws IOException {
CustomQueryHandler handler
= new RegexpQueryHandler("FOO", 10, "__wibble__", Collections.singleton("field1"));
try (Analyzer input = new WhitespaceAnalyzer()) {
// field1 is in the excluded set, so nothing should happen
assertTokenStreamContents(handler.wrapTermStream("field1", input.tokenStream("field1", "hello world")),
new String[]{ "hello", "world" });
// field2 is not excluded
assertTokenStreamContents(handler.wrapTermStream("field2", input.tokenStream("field2", "harm alarm asdasasdasdasd")),
new String[]{
"harm", "harmFOO", "harFOO", "haFOO", "hFOO", "armFOO", "arFOO", "aFOO", "rmFOO", "rFOO", "mFOO", "FOO",
"alarm", "alarmFOO", "alarFOO", "alaFOO", "alFOO", "larmFOO", "larFOO", "laFOO", "lFOO",
"asdasasdasdasd", "__wibble__"
});
}
}
@Test
@SuppressWarnings({"rawtypes"})
public void testUnicode() {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
// chinese text value
Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
tokens = converter.convert("text_购field:我购买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
tokens = converter.convert("text_field:我购xyz买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
@Test
@SuppressWarnings({"rawtypes"})
public void testMultipleClauses() {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
// two field:value pairs should give two tokens
Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
// a field:value pair and a search term should give two tokens
tokens = converter.convert("text_field:我购买了道具和服装。 bar");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
@Test
public void testOffsetWindowTokenFilter() throws Exception {
String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
try (Analyzer a1 = new WhitespaceAnalyzer()) {
TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n");
try (DefaultSolrHighlighter.OffsetWindowTokenFilter tots = new DefaultSolrHighlighter.OffsetWindowTokenFilter(tokenStream)) {
for (String v : multivalued) {
TokenStream ts1 = tots.advanceToNextWindowOfLength(v.length());
ts1.reset();
try (Analyzer a2 = new WhitespaceAnalyzer()) {
TokenStream ts2 = a2.tokenStream("", v);
ts2.reset();
while (ts1.incrementToken()) {
assertTrue(ts2.incrementToken());
assertEquals(ts1, ts2);
}
assertFalse(ts2.incrementToken());
}
}
}
}
}
@Test
public void testAnalyzeMultiTerm() {
// No terms provided by the StopFilter (stop word) for the multi-term part.
// This is supported. Check TextField.analyzeMultiTerm returns null (and does not throw an exception).
BytesRef termBytes = TextField.analyzeMultiTerm("field", "the", new StopAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET));
assertNull(termBytes);
// One term provided by the WhitespaceTokenizer for the multi-term part.
// This is the regular case. Check TextField.analyzeMultiTerm returns it (and does not throw an exception).
termBytes = TextField.analyzeMultiTerm("field", "Sol", new WhitespaceAnalyzer());
assertEquals("Sol", termBytes.utf8ToString());
// Two terms provided by the WhitespaceTokenizer for the multi-term part.
// This is not allowed. Expect an exception.
SolrException exception = expectThrows(SolrException.class, () -> TextField.analyzeMultiTerm("field", "term1 term2", new WhitespaceAnalyzer()));
assertEquals("Unexpected error code", SolrException.ErrorCode.BAD_REQUEST.code, exception.code());
}
@Test
public void shouldTurnAnalysisOn() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("smembers");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true);
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321")));
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).smembers("simpleKey");
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(3, terms.size());
}
@Test
public void shouldRetryWhenRedisFailed() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("smembers");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(false);
when(localParamsMock.get("retries")).thenReturn("2");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
when(jedisPoolMock.getResource()).thenReturn(jedisFailingMock).thenReturn(jedisMock);
when(jedisFailingMock.smembers("simpleKey")).thenThrow(new JedisException("Synthetic exception"));
when(jedisMock.smembers("simpleKey")).thenReturn(new HashSet<String>(Collections.singletonList("value")));
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock,
new RetryingCommandHandler(jedisPoolMock, 1));
final Query query = redisQParser.parse();
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(1, terms.size());
}
@Test
public void shouldUseTermsQuery() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("smembers");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get("ignoreScore")).thenReturn("true");
when(localParamsMock.getBool("useAnalyzer", false)).thenReturn(true);
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new WhitespaceAnalyzer());
when(jedisMock.smembers(anyString())).thenReturn(new HashSet<>(Arrays.asList("123 124", "321", "322", "323", "324",
"325", "326", "327", "328", "329", "330", "331", "332", "333", "334", "335", "336", "337", "338")));
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).smembers("simpleKey");
IndexSearcher searcher = new IndexSearcher(new MultiReader());
Query rewrittenQuery = searcher.rewrite(query);
assertTrue(rewrittenQuery instanceof TermsQuery);
}
protected Query doToQuery(QueryShardContext context) throws IOException {
// Analyzer analyzer = context.getMapperService().searchAnalyzer();
Analyzer analyzer = new WhitespaceAnalyzer();
try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
if (termAtt == null) {
return null;
}
List<CustomSpanTermQuery> clauses = new ArrayList<>();
stream.reset();
while (stream.incrementToken()) {
Term term = new Term(fieldName, termAtt.getBytesRef());
clauses.add(new CustomSpanTermQuery(term));
}
return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
} catch (IOException e) {
throw new RuntimeException("Error analyzing query text", e);
}
}
@Test
public void testDocValuesFormat() throws IOException {
RAMDirectory directory = new RAMDirectory();
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
conf.setCodec(new Blur024Codec());
IndexWriter writer = new IndexWriter(directory, conf);
Document doc = new Document();
doc.add(new StringField("f", "v", Store.YES));
doc.add(new SortedDocValuesField("f", new BytesRef("v")));
writer.addDocument(doc);
writer.close();
DirectoryReader reader = DirectoryReader.open(directory);
AtomicReaderContext context = reader.leaves().get(0);
AtomicReader atomicReader = context.reader();
SortedDocValues sortedDocValues = atomicReader.getSortedDocValues("f");
assertTrue(sortedDocValues.getClass().getName().startsWith(DiskDocValuesProducer.class.getName()));
reader.close();
}
@Test
public void test5() throws ParseException, IOException {
parser = new SuperParser(LUCENE_VERSION, getFieldManager(new WhitespaceAnalyzer(LUCENE_VERSION)), true, null,
ScoreType.SUPER, new Term("_primedoc_"));
Query query = parser.parse("<a.a:a a.d:{e TO f} a.b:b a.test:hello\\<> -<g.c:c g.d:d>");
BooleanQuery booleanQuery1 = new BooleanQuery();
booleanQuery1.add(new TermQuery(new Term("a.a", "a")), Occur.SHOULD);
booleanQuery1.add(new TermRangeQuery("a.d", new BytesRef("e"), new BytesRef("f"), false, false), Occur.SHOULD);
booleanQuery1.add(new TermQuery(new Term("a.b", "b")), Occur.SHOULD);
// std analyzer took the "<" out
booleanQuery1.add(new TermQuery(new Term("a.test", "hello<")), Occur.SHOULD);
BooleanQuery booleanQuery2 = new BooleanQuery();
booleanQuery2.add(new TermQuery(new Term("g.c", "c")), Occur.SHOULD);
booleanQuery2.add(new TermQuery(new Term("g.d", "d")), Occur.SHOULD);
SuperQuery superQuery1 = new SuperQuery(booleanQuery1, ScoreType.SUPER, new Term("_primedoc_"));
SuperQuery superQuery2 = new SuperQuery(booleanQuery2, ScoreType.SUPER, new Term("_primedoc_"));
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(superQuery1, Occur.SHOULD);
booleanQuery.add(superQuery2, Occur.MUST_NOT);
assertQuery(booleanQuery, query);
}
@Override
public void setUp() throws Exception {
super.setUp();
queryHelper = new QueryHelper() {
protected QueryParser getQueryParser() {
ExtendableQueryParser queryParser = new ExtendableQueryParser(Constants.DEFAULT_FIELD, new WhitespaceAnalyzer());
queryParser.setAllowLeadingWildcard(true);
queryParser.setDefaultOperator(QueryParser.Operator.AND);
return queryParser;
}
};
File file = File.createTempFile("test", ".properties");
file.deleteOnExit();
FileUtil.writeBytes(file.getAbsolutePath(), "ldap.security.principal=%[email protected]".getBytes("UTF-8"));
DynamicProperties systemProps = new DynamicProperties(file);
ComponentUtil.register(systemProps, "systemProperties");
ComponentUtil.register(new SystemHelper(), "systemHelper");
ComponentUtil.register(new VirtualHostHelper(), "virtualHostHelper");
ComponentUtil.register(new KeyMatchHelper(), "keyMatchHelper");
inject(queryHelper);
queryHelper.init();
}
public void
testRedisDirectoryWithRemoteJedisPool() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
JedisPool jedisPool = new JedisPool(new JedisPoolConfig(), "10.97.19.55", 6379, Constants.TIME_OUT);
RedisDirectory redisDirectory = new RedisDirectory(new JedisPoolStream(jedisPool));
IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
for (int i = 0; i < 5000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
redisDirectory.close();
long end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedisPool consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost",
6379))));
int total = 0;
for (int i = 0; i < 1000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedisPool search consumes {}ms!", (end - start));
}
public void testRedisDirectoryWithJedisPool() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
//indexWriterConfig.setInfoStream(System.out);
//indexWriterConfig.setRAMBufferSizeMB(2048);
//LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy();
//logByteSizeMergePolicy.setMinMergeMB(1);
//logByteSizeMergePolicy.setMaxMergeMB(64);
//logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64);
//indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false);
//GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
//获取连接等待时间
//genericObjectPoolConfig.setMaxWaitMillis(3000);
//10s超时时间
JedisPool jedisPool = new JedisPool(new JedisPoolConfig(), "localhost", 6379, Constants.TIME_OUT);
RedisDirectory redisDirectory = new RedisDirectory(new JedisPoolStream(jedisPool));
IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
redisDirectory.close();
long end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedisPool consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost",
6379))));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedisPool search consumes {}ms!", (end - start));
}
public void testRedisDirectoryWithJedis() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
//indexWriterConfig.setInfoStream(System.out);
//indexWriterConfig.setRAMBufferSizeMB(2048);
//LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy();
//logByteSizeMergePolicy.setMinMergeMB(1);
//logByteSizeMergePolicy.setMaxMergeMB(64);
//logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64);
//indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false);
//GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
//获取连接等待时间
//genericObjectPoolConfig.setMaxWaitMillis(3000);
//10s超时时间
RedisDirectory redisDirectory = new RedisDirectory(new JedisStream("localhost", 6379));
IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
redisDirectory.close();
long end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedis consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost",
6379))));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedis search consumes {}ms!", (end - start));
}
public void
testRedisDirectoryWithRemoteJedisPool() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
JedisPool jedisPool = new JedisPool(new JedisPoolConfig(), "10.97.19.55", 6379, Constants.TIME_OUT);
RedisDirectory redisDirectory = new RedisDirectory(new JedisPoolStream(jedisPool));
IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
for (int i = 0; i < 5000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
redisDirectory.close();
long end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedisPool consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost",
6379))));
int total = 0;
for (int i = 0; i < 1000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedisPool search consumes {}ms!", (end - start));
}
public void testRedisDirectoryWithJedisPool() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
//indexWriterConfig.setInfoStream(System.out);
//indexWriterConfig.setRAMBufferSizeMB(2048);
//LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy();
//logByteSizeMergePolicy.setMinMergeMB(1);
//logByteSizeMergePolicy.setMaxMergeMB(64);
//logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64);
//indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false);
//GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
//获取连接等待时间
//genericObjectPoolConfig.setMaxWaitMillis(3000);
//10s超时时间
JedisPool jedisPool = new JedisPool(new JedisPoolConfig(), "localhost", 6379, Constants.TIME_OUT);
RedisDirectory redisDirectory = new RedisDirectory(new JedisPoolStream(jedisPool));
IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
redisDirectory.close();
long end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedisPool consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost",
6379))));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedisPool search consumes {}ms!", (end - start));
}
public void testRedisDirectoryWithJedis() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
//indexWriterConfig.setInfoStream(System.out);
//indexWriterConfig.setRAMBufferSizeMB(2048);
//LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy();
//logByteSizeMergePolicy.setMinMergeMB(1);
//logByteSizeMergePolicy.setMaxMergeMB(64);
//logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64);
//indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false);
//GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig();
//获取连接等待时间
//genericObjectPoolConfig.setMaxWaitMillis(3000);
//10s超时时间
RedisDirectory redisDirectory = new RedisDirectory(new JedisStream("localhost", 6379));
IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
redisDirectory.close();
long end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedis consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost",
6379))));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RedisDirectoryWithJedis search consumes {}ms!", (end - start));
}
private void doTestIndexProperties(boolean setIndexProps,
boolean indexPropsVal, int numExpectedResults) throws Exception {
Properties props = new Properties();
// Indexing configuration.
props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
props.setProperty("content.source", OneDocSource.class.getName());
props.setProperty("directory", "ByteBuffersDirectory");
if (setIndexProps) {
props.setProperty("doc.index.props", Boolean.toString(indexPropsVal));
}
// Create PerfRunData
Config config = new Config(props);
PerfRunData runData = new PerfRunData(config);
TaskSequence tasks = new TaskSequence(runData, getTestName(), null, false);
tasks.addTask(new CreateIndexTask(runData));
tasks.addTask(new AddDocTask(runData));
tasks.addTask(new CloseIndexTask(runData));
tasks.doLogic();
IndexReader reader = DirectoryReader.open(runData.getDirectory());
IndexSearcher searcher = newSearcher(reader);
TopDocs td = searcher.search(new TermQuery(new Term("key", "value")), 10);
assertEquals(numExpectedResults, td.totalHits.value);
reader.close();
}
@Test
@SuppressWarnings({"rawtypes"})
public void test() throws Exception {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
Collection<Token> tokens = converter.convert("field:foo");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertTrue("tokens Size: " + tokens.size() + " is not: " + 1, tokens.size() == 1);
}
@Test
@SuppressWarnings({"rawtypes"})
public void testNumeric() throws Exception {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
String[] queries = {"12345", "foo:12345", "12345 67890", "foo:(12345 67890)", "foo:(life 67890)", "12345 life",
"+12345 +life", "-12345 life"};
int[] tokensToExpect = {1, 1, 2, 2, 2, 2, 2, 2};
for (int i = 0; i < queries.length; i++) {
Collection<Token> tokens = converter.convert(queries[i]);
assertTrue("tokens Size: " + tokens.size() + " is not: " + tokensToExpect[i], tokens.size() == tokensToExpect[i]);
}
}
@Override
public Collection<Token> convert(String origQuery) {
Collection<Token> result = new HashSet<>();
try (WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); TokenStream ts = analyzer.tokenStream("", origQuery)) {
// TODO: support custom attributes
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
while (ts.incrementToken()) {
Token tok = new Token();
tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
tok.setFlags(flagsAtt.getFlags());
tok.setPayload(payloadAtt.getPayload());
tok.setPositionIncrement(posIncAtt.getPositionIncrement());
tok.setType(typeAtt.type());
result.add(tok);
}
ts.end();
return result;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/** Index all text files under a directory. */
public static void main(String[] args) {
String usage = "java pitt.search.lucene.LuceneIndexFromTriples [triples text file] ";
if (args.length == 0) {
System.err.println("Usage: " + usage);
System.exit(1);
}
FlagConfig flagConfig = FlagConfig.getFlagConfig(args);
// Allow for the specification of a directory to write the index to.
if (flagConfig.luceneindexpath().length() > 0) {
INDEX_DIR = FileSystems.getDefault().getPath(flagConfig.luceneindexpath());
}
if (Files.exists(INDEX_DIR)) {
throw new IllegalArgumentException(
"Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
}
try {
// Create IndexWriter using WhiteSpaceAnalyzer without any stopword list.
IndexWriterConfig writerConfig = new IndexWriterConfig(new WhitespaceAnalyzer());
IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), writerConfig);
final File triplesTextFile = new File(args[0]);
if (!triplesTextFile.exists() || !triplesTextFile.canRead()) {
writer.close();
throw new IOException("Document file '" + triplesTextFile.getAbsolutePath() +
"' does not exist or is not readable, please check the path");
}
System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
indexDoc(writer, triplesTextFile);
writer.close();
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
}
}