下面列出了org.junit.jupiter.api.extension.ParameterResolutionException#org.apache.lucene.analysis.standard.StandardAnalyzer 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public static void main(String[] args) throws Exception {
// index
try (Directory index = new NIOFSDirectory(Paths.get("/tmp/index"))) {
// add
try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(new StandardAnalyzer()))) {
Document doc = new Document();
doc.add(new TextField("blog", "yuzhouwan.com", Field.Store.YES));
doc.add(new StringField("github", "asdf2014", Field.Store.YES));
writer.addDocument(doc);
writer.commit();
}
// search
try (DirectoryReader reader = DirectoryReader.open(index)) {
IndexSearcher searcher = new IndexSearcher(reader);
QueryParser parser = new QueryParser("blog", new StandardAnalyzer());
Query query = parser.parse("yuzhouwan.com");
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
for (ScoreDoc hit : hits) {
Document hitDoc = searcher.doc(hit.doc);
System.out.println(hitDoc.get("blog"));
}
}
}
}
@Test
public void shouldAddTermsFromSortOrderAsc() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("sort");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get("order")).thenReturn("asc");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.sort(anyString(), any(SortingParams.class))).thenReturn(Arrays.asList("123", "321"));
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
final ArgumentCaptor<SortingParams> argument = ArgumentCaptor.forClass(SortingParams.class);
verify(jedisMock).sort(eq("simpleKey"), argument.capture());
Assert.assertEquals(getSortingParamString(new SortingParams().asc()), getSortingParamString(argument.getValue()));
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(2, terms.size());
}
/**
* Search for the user-specified query expression in the current page.
* @throws Exception if an error occurs.
*/
private void search() throws Exception {
final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
final Query q = parser.parse(query);
final MemoryIndex index = new MemoryIndex();
final Link link = new Link(url);
final PageData pageData = new SimpleHttpClientParser().load(link);
index.addField("contents", pageData.getData().toString(), new StandardAnalyzer());
final IndexSearcher searcher = index.createSearcher();
final Hits hits = searcher.search(q);
@SuppressWarnings("rawtypes")
final Iterator it = hits.iterator();
float relevance = 0f;
if (it.hasNext()) {
while (it.hasNext()) {
final Hit hit = (Hit) it.next();
relevance += ((float) Math.round(hit.getScore() * 1000)) / 10;
}
matchedLinks.add(new LinkMatch(url, relevance));
}
}
@Test
public void shouldReturnEmptyQueryOnEmptyListOfSunion() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("sunion");
when(localParamsMock.get("key")).thenReturn("key1");
when(localParamsMock.get("key1")).thenReturn("key2");
when(localParamsMock.getParameterNamesIterator()).thenReturn(Arrays.asList("command", "key", "key1").iterator());
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.sunion(anyString(), anyString())).thenReturn(new HashSet<String>());
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).sunion("key1", "key2");
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(0, terms.size());
}
@Test
public void shouldDeflateGzipAndParseJsonTermsFromRedisOnGetCommand() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("get");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get("compression")).thenReturn("gzip");
when(localParamsMock.get("serialization")).thenReturn("json");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.get(any(byte[].class))).thenReturn(Compressor.compressGzip("[100,200,300]".getBytes()));
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).get("simpleKey".getBytes());
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(3, terms.size());
}
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
CharArraySet noise = EnglishAnalyzer.getDefaultStopSet();
// We are using a standard tokenizer that eliminates the stop words.
// We can use Stemming tokenizer such Porter
// A set of English noise keywords is used that will eliminates
// words such as "the, a, etc"
Analyzer analyzer = new StandardAnalyzer(noise);
List<String> token = Tokenizer.tokenizeString(analyzer, line);
Iterator<String> it = token.iterator();
while (it.hasNext()) {
word.set(it.next());
fileName.set(key);
if (!mapTable.containsKey(fileName.toString() + word.toString())) {
context.write(fileName, word);
mapTable.put(fileName.toString() + word.toString(), new IntWritable(1));
}
}
}
@Override
String[] getExtraProperties() {
String analyzer = ConfigurableAnalyzerFactory.Options.ANALYZER;
return new String[]{
FullTextIndex.Options.ANALYZER_FACTORY_CLASS, ConfigurableAnalyzerFactory.class.getName(),
analyzer+"_."+AnalyzerOptions.LIKE, "x-empty",
analyzer+"x-empty."+AnalyzerOptions.ANALYZER_CLASS, EmptyAnalyzer.class.getName(),
analyzer+"x-terms."+AnalyzerOptions.PATTERN, "\\W+",
analyzer+"x-splits."+AnalyzerOptions.ANALYZER_CLASS, TermCompletionAnalyzer.class.getName(),
analyzer+"x-splits."+AnalyzerOptions.STOPWORDS, AnalyzerOptions.STOPWORDS_VALUE_NONE,
analyzer+"x-splits."+AnalyzerOptions.WORD_BOUNDARY, " ",
analyzer+"x-splits."+AnalyzerOptions.SUB_WORD_BOUNDARY, "(?<!\\p{L}|\\p{N})(?=\\p{L}|\\p{N})|(?<!\\p{Lu})(?=\\p{Lu})|(?<=\\p{N})(?=\\p{L})",
analyzer+"x-hyphen."+AnalyzerOptions.SUB_WORD_BOUNDARY, "[-.]",
analyzer+"x-hyphen."+AnalyzerOptions.SOFT_HYPHENS, "-",
analyzer+"x-hyphen."+AnalyzerOptions.WORD_BOUNDARY, " ",
analyzer+"x-hyphen."+AnalyzerOptions.ALWAYS_REMOVE_SOFT_HYPHENS, "false",
analyzer+"x-hyphen2."+AnalyzerOptions.SUB_WORD_BOUNDARY, "[-.]",
analyzer+"x-hyphen2."+AnalyzerOptions.SOFT_HYPHENS, "-",
analyzer+"x-hyphen2."+AnalyzerOptions.WORD_BOUNDARY, " ",
analyzer+"x-hyphen2."+AnalyzerOptions.ALWAYS_REMOVE_SOFT_HYPHENS, "true",
analyzer+"x-keywords."+AnalyzerOptions.ANALYZER_CLASS, KeywordAnalyzer.class.getName(),
analyzer+"en-x-de."+AnalyzerOptions.ANALYZER_CLASS, StandardAnalyzer.class.getName(),
analyzer+"en-x-de."+AnalyzerOptions.STOPWORDS, GermanAnalyzer.class.getName(),
};
}
@Test
public void shouldAddTermsFromRedisOnLrangeCommandCustomMinAndMax() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("lrange");
when(localParamsMock.get("min")).thenReturn("2");
when(localParamsMock.get("max")).thenReturn("3");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.lrange(anyString(), anyLong(), anyLong())).thenReturn(Arrays.asList("123", "321"));
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).lrange("simpleKey", 2, 3);
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(2, terms.size());
}
@Inject
public LucenePerUserWaveViewHandlerImpl(IndexDirectory directory,
ReadableWaveletDataProvider waveletProvider,
@Named(CoreSettingsNames.WAVE_SERVER_DOMAIN) String domain,
@IndexExecutor Executor executor) {
this.waveletProvider = waveletProvider;
this.executor = executor;
analyzer = new StandardAnalyzer(LUCENE_VERSION);
try {
IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);
indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
indexWriter = new IndexWriter(directory.getDirectory(), indexConfig);
nrtManager = new NRTManager(indexWriter, new WaveSearchWarmer(domain));
} catch (IOException ex) {
throw new IndexException(ex);
}
nrtManagerReopenThread = new NRTManagerReopenThread(nrtManager, MAX_STALE_SEC, MIN_STALE_SEC);
nrtManagerReopenThread.start();
}
@Test
public void shouldReturnEmptyQueryOnEmptyListOfSinter() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("sinter");
when(localParamsMock.get("key")).thenReturn("key1");
when(localParamsMock.get("key1")).thenReturn("key2");
when(localParamsMock.getParameterNamesIterator()).thenReturn(Arrays.asList("command", "key", "key1").iterator());
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.sinter(anyString(), anyString())).thenReturn(new HashSet<String>());
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).sinter("key1", "key2");
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(0, terms.size());
}
@Transactional(readOnly = true)
public Integer createIndex(Integer siteId, Integer channelId,
Date startDate, Date endDate, Integer startId, Integer max,
Directory dir) throws IOException, ParseException {
boolean exist = IndexReader.indexExists(dir);
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
Version.LUCENE_30), !exist, IndexWriter.MaxFieldLength.LIMITED);
try {
if (exist) {
LuceneContent.delete(siteId, channelId, startDate, endDate,
writer);
}
Integer lastId = luceneContentDao.index(writer, siteId, channelId,
startDate, endDate, startId, max);
writer.optimize();
return lastId;
} finally {
writer.close();
}
}
@Test
public void shouldAddTermsFromRedisOnZrangeCommandWithCustomRange() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("zrange");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get("range_start")).thenReturn("1");
when(localParamsMock.get("range_end")).thenReturn("100");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.zrangeWithScores(anyString(), anyLong(), anyLong()))
.thenReturn(new HashSet<>(Arrays.asList(new Tuple("123", (double) 1.0f), new Tuple("321", (double) 1.0f))));
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).zrangeWithScores("simpleKey", 1, 100);
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(2, terms.size());
}
@Override
public boolean initialize(ResourceSpecifier aSpecifier, Map<String, Object> aAdditionalParams)
throws ResourceInitializationException {
super.initialize(aSpecifier, aAdditionalParams);
hits = Integer.class.cast(getParameterValue("hits"));
// query constructor
String stoplistPath = String.class.cast(getParameterValue("stoplist-path"));
try {
stoplist = Resources.readLines(getClass().getResource(stoplistPath), UTF_8).stream()
.map(String::trim).collect(toSet());
} catch (IOException e) {
throw new ResourceInitializationException(e);
}
analyzer = new StandardAnalyzer();
parser = new QueryParser("text", analyzer);
return true;
}
@Test
public void shouldAddTermsFromRedisOnLrangeCommandEmptyMinAndMaxFallsBackToDefault() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("lrange");
when(localParamsMock.get("min")).thenReturn("");
when(localParamsMock.get("max")).thenReturn("");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.lrange(anyString(), anyLong(), anyLong())).thenReturn(Arrays.asList("123", "321"));
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).lrange("simpleKey", 0, -1);
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(2, terms.size());
}
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
hits = UimaContextHelper.getConfigParameterIntValue(context, "hits", 100);
analyzer = UimaContextHelper.createObjectFromConfigParameter(context, "query-analyzer",
"query-analyzer-params", StandardAnalyzer.class, Analyzer.class);
queryStringConstructor = UimaContextHelper.createObjectFromConfigParameter(context,
"query-string-constructor", "query-string-constructor-params",
LuceneQueryStringConstructor.class, QueryStringConstructor.class);
parser = new QueryParser("text", analyzer);
// load parameters
String param = UimaContextHelper.getConfigParameterStringValue(context, "doc-logreg-params");
try {
docFeatWeights = Resources.readLines(getClass().getResource(param), UTF_8).stream().limit(1)
.map(line -> line.split("\t")).flatMap(Arrays::stream)
.mapToDouble(Double::parseDouble).toArray();
} catch (IOException e) {
throw new ResourceInitializationException(e);
}
}
@Test
public void shouldReturnEmptyQueryOnEmptyListOfHget() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("hget");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get("field")).thenReturn("f1");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.hget(anyString(), anyString())).thenReturn(null);
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).hget("simpleKey", "f1");
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(0, terms.size());
}
@Test
public void shouldAddTermsFromSort() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("sort");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.sort(anyString(), any(SortingParams.class))).thenReturn(Arrays.asList("123", "321"));
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
final ArgumentCaptor<SortingParams> argument = ArgumentCaptor.forClass(SortingParams.class);
verify(jedisMock).sort(eq("simpleKey"), argument.capture());
Assert.assertEquals(getSortingParamString(new SortingParams()), getSortingParamString(argument.getValue()));
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(2, terms.size());
}
private void doMLTSearch() {
if (Objects.isNull(mltDocFTF.getValue())) {
throw new LukeException("Doc num is not set.");
}
int docNum = (int) mltDocFTF.getValue();
MLTConfig mltConfig = operatorRegistry.get(MLTTabOperator.class)
.map(MLTTabOperator::getConfig)
.orElse(new MLTConfig.Builder().build());
Analyzer analyzer = operatorRegistry.get(AnalysisTabOperator.class)
.map(AnalysisTabOperator::getCurrentAnalyzer)
.orElse(new StandardAnalyzer());
Query query = searchModel.mltQuery(docNum, mltConfig, analyzer);
Set<String> fieldsToLoad = operatorRegistry.get(FieldValuesTabOperator.class)
.map(FieldValuesTabOperator::getFieldsToLoad)
.orElse(Collections.emptySet());
SearchResults results = searchModel.search(query, new SimilarityConfig.Builder().build(), fieldsToLoad, DEFAULT_PAGE_SIZE, false);
TableUtils.setupTable(resultsTable, ListSelectionModel.SINGLE_SELECTION, new SearchResultsTableModel(), null,
SearchResultsTableModel.Column.DOCID.getColumnWidth(),
SearchResultsTableModel.Column.SCORE.getColumnWidth());
populateResults(results);
messageBroker.clearStatusMessage();
}
@Test
public void shouldAddTermsFromRedisOnHmgetCommand() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("hmget");
when(localParamsMock.get("key")).thenReturn("hash");
when(localParamsMock.get("field")).thenReturn("field1");
when(localParamsMock.getParameterNamesIterator()).thenReturn(Arrays.asList("command", "key", "field").iterator());
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.hmget(anyString(), anyString())).thenReturn(Arrays.asList("123"));
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).hmget("hash", "field1");
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(1, terms.size());
}
@Test
public void shouldAddTermsFromSortOffset() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("sort");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get("offset")).thenReturn("100");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.sort(anyString(), any(SortingParams.class))).thenReturn(Arrays.asList("123", "321"));
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
final ArgumentCaptor<SortingParams> argument = ArgumentCaptor.forClass(SortingParams.class);
verify(jedisMock).sort(eq("simpleKey"), argument.capture());
Assert.assertEquals(getSortingParamString(new SortingParams().limit(100, 0)),
getSortingParamString(argument.getValue()));
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(2, terms.size());
}
public void testFreeFormQueryParse() throws Exception {
String queryString = new String("name:spell -description:another");
log.info("Original query: " + queryString);
NGramQueryParser parser = new NGramQueryParser("name",
new NGramAnalyzer(min_ngram, max_ngram), true);
Query q = parser.parse(queryString);
log.info("NGramQueryParser parsed query: " + q.toString());
QueryParser origParser = new QueryParser("name", new StandardAnalyzer());
q = origParser.parse(queryString);
log.info("QueryParser parsed query = " + q.toString());
}
@Test
public void shouldReturnEmptyQueryOnEmptyListOfSrandmember() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("srandmember");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.srandmember(anyString(), anyInt())).thenReturn(new ArrayList<String>());
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).srandmember("simpleKey", 1);
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(0, terms.size());
}
@Transactional(readOnly = true)
public void deleteIndex(Integer contentId, Directory dir)
throws IOException, ParseException {
boolean exist = IndexReader.indexExists(dir);
if (exist) {
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
Version.LUCENE_30), false,
IndexWriter.MaxFieldLength.LIMITED);
try {
LuceneContent.delete(contentId, writer);
} finally {
writer.close();
}
}
}
/**
* @param args
*/
public static void main(String[] args) throws Exception{
if (args.length != 2) {
System.out.println("usage: source_file index_dir");
System.exit(1);
}
File f = new File(args[0]);
BufferedReader reader = new BufferedReader(new FileReader(f));
ObjectMapper mapper = new ObjectMapper();
IndexWriterConfig idxWriterConfig = new IndexWriterConfig(new StandardAnalyzer());
Directory dir = FSDirectory.open(FileSystems.getDefault().getPath(args[1]));
IndexWriter writer = new IndexWriter(dir, idxWriterConfig);
int count = 0;
while (true) {
String line = reader.readLine();
if (line == null) break;
JsonNode json = mapper.readTree(line);
Document doc = buildDoc(json);
writer.addDocument(doc);
count++;
if (count % 100 == 0) {
System.out.print(".");
}
}
System.out.println(count+" docs indexed");
reader.close();
writer.commit();
writer.close();
}
@Test
public void shouldAddTermsFromRedisOnLrangeCommand() throws SyntaxError, IOException {
when(localParamsMock.get("command")).thenReturn("lrange");
when(localParamsMock.get("key")).thenReturn("simpleKey");
when(localParamsMock.get(QueryParsing.V)).thenReturn("string_field");
when(jedisMock.lrange(anyString(), anyLong(), anyLong())).thenReturn(Arrays.asList("123", "321"));
when(requestMock.getSchema()).thenReturn(schema);
when(schema.getQueryAnalyzer()).thenReturn(new StandardAnalyzer());
redisQParser = new RedisQParser("string_field", localParamsMock, paramsMock, requestMock, commandHandler);
final Query query = redisQParser.parse();
verify(jedisMock).lrange("simpleKey", 0, -1);
IndexSearcher searcher = new IndexSearcher(new MultiReader());
final Set<Term> terms = extractTerms(searcher, query);
Assert.assertEquals(2, terms.size());
}
public HashMap<String,Short> indexFile(int i, String filePath) {
HashMap<String,Short> map = new HashMap<>();
try {
List<String> lines = FileUtils.readLines(new File(filePath));
Analyzer analyzer = new StandardAnalyzer();
int messageCountFile = 0;
for(String line : lines){
if(!mention || (mention && line.contains("@"))){
messageCountFile++;
String cleanLine = line.toLowerCase();
List<String> strings = Tokenizer.tokenizeString(analyzer, cleanLine);
for(String string : strings){
if(string.length()>=minWordLength){
Short count = map.get(string);
if(count == null){
count = 0;
}
count++;
map.put(string,count);
}
}
}
}
messageCountDistribution.put(i,messageCountFile);
messageCount += messageCountFile;
} catch (IOException ex) {
Logger.getLogger(Indexer.class.getName()).log(Level.SEVERE, null, ex);
}
return map;
}
@Test
public void givenPrefixQueryWhenFetchedDocumentThenCorrect() {
InMemoryLuceneIndex inMemoryLuceneIndex = new InMemoryLuceneIndex(new RAMDirectory(), new StandardAnalyzer());
inMemoryLuceneIndex.indexDocument("article", "Lucene introduction");
inMemoryLuceneIndex.indexDocument("article", "Introduction to Lucene");
Term term = new Term("body", "intro");
Query query = new PrefixQuery(term);
List<Document> documents = inMemoryLuceneIndex.searchIndex(query);
Assert.assertEquals(2, documents.size());
}
public IndexSearcher() {
try {
searcher = new org.apache.lucene.search.IndexSearcher(new ClasspathDirectory());
} catch (IOException e) {
e.printStackTrace();
}
analyzer = new StandardAnalyzer(Version.LUCENE_31);
parser = new MultiFieldQueryParser(Version.LUCENE_31, new String[]{"name","description"}, analyzer);
}
public void testDemo() throws IOException {
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
Path indexPath = Files.createTempDirectory("tempIndex");
try (Directory dir = FSDirectory.open(indexPath)) {
Analyzer analyzer = new StandardAnalyzer();
try (IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(analyzer))) {
Document doc = new Document();
doc.add(newTextField("fieldname", text, Field.Store.YES));
iw.addDocument(doc);
}
// Now search the index.
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
assertEquals(1, searcher.count(new TermQuery(new Term("fieldname", longTerm))));
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = searcher.search(query, 1);
assertEquals(1, hits.totalHits.value);
// Iterate through the results.
for (int i = 0; i < hits.scoreDocs.length; i++) {
Document hitDoc = searcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
}
// Test simple phrase query.
PhraseQuery phraseQuery = new PhraseQuery("fieldname", "to", "be");
assertEquals(1, searcher.count(phraseQuery));
}
}
IOUtils.rm(indexPath);
}
private void indexWithLucene() throws IOException {
System.out.println("Deleting old Lucene index");
FileUtils.deleteDirectory(new File(LUCENE_INDEX));
System.out.println("Indexing with Lucene");
final BufferedReader dictionary = config.newReader();
try {
final Directory directory = FSDirectory.open(new File(LUCENE_INDEX));
try {
final IndexWriter luceneWriter = new IndexWriter(directory,
new StandardAnalyzer(LuceneSearch.LUCENE_VERSION), true,
IndexWriter.MaxFieldLength.UNLIMITED);
try {
final IDictParser parser = config.fileType.newParser(config);
indexWithLucene(dictionary, luceneWriter, parser);
System.out.println("Optimizing Lucene index");
luceneWriter.optimize();
} finally {
luceneWriter.close();
}
} finally {
closeQuietly(directory);
}
} finally {
IOUtils.closeQuietly(dictionary);
}
System.out.println("Finished Lucene indexing");
}