下面列出了怎么用org.apache.lucene.index.IndexWriter的API类实例代码及写法,或者点击链接到github查看源代码。
@Test
public void testSingleTerm() throws IOException {
RAMDirectory d = new RAMDirectory();
IndexWriter w = new IndexWriter(d, null, true, IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("int", "1", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
w.addDocument(doc);
w.close();
IndexReader r = IndexReader.open(d);
LuceneUnsortedIntTermDocIterator iter = LuceneUnsortedIntTermDocIterator.create(r, "int");
assertTrue(iter.nextTerm());
assertEquals(1, iter.term());
int[] docs = new int[2];
assertEquals(1, iter.nextDocs(docs));
assertEquals(0, docs[0]);
assertFalse(iter.nextTerm());
r.close();
}
public void testCustomLockFactory() throws IOException {
MockLockFactory lf = new MockLockFactory();
Directory dir = new MockDirectoryWrapper(random(), new ByteBuffersDirectory(lf));
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
// add 100 documents (so that commit lock is used)
for (int i = 0; i < 100; i++) {
addDoc(writer);
}
// Both write lock and commit lock should have been created:
assertEquals("# of unique locks created (after instantiating IndexWriter)",
1, lf.locksCreated.size());
writer.close();
}
@Override
public void delete(DeleteUpdateCommand cmd) throws IOException {
TestInjection.injectDirectUpdateLatch();
deleteByIdCommands.increment();
deleteByIdCommandsCumulative.mark();
if ((cmd.getFlags() & UpdateCommand.IGNORE_INDEXWRITER) != 0 ) {
if (ulog != null) ulog.delete(cmd);
return;
}
Term deleteTerm = getIdTerm(cmd.getIndexedId(), false);
// SolrCore.verbose("deleteDocuments",deleteTerm,writer);
RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
try {
iw.get().deleteDocuments(deleteTerm);
} finally {
iw.decref();
}
// SolrCore.verbose("deleteDocuments",deleteTerm,"DONE");
if (ulog != null) ulog.delete(cmd);
updateDeleteTrackers(cmd);
}
public void addFileToIndex(String filepath) throws IOException, URISyntaxException {
Path path = Paths.get(getClass().getClassLoader().getResource(filepath).toURI());
File file = path.toFile();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
Document document = new Document();
FileReader fileReader = new FileReader(file);
document.add(new TextField("contents", fileReader));
document.add(new StringField("path", file.getPath(), Field.Store.YES));
document.add(new StringField("filename", file.getName(), Field.Store.YES));
indexWriter.addDocument(document);
indexWriter.close();
}
/**
* Add a number of chapters to the index. This is much more efficient than
* calling add() repeatedly because it just uses one writer rather than
* opening and closing one for each individual operation.
*
* @param bibleList the list of chapters to add.
*/
@Override
public void addAll(Collection<? extends BibleChapter> bibleList) {
try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
for(BibleChapter chapter : bibleList) {
Document doc = new Document();
doc.add(new TextField("text", chapter.getText(), Field.Store.NO));
doc.add(new TextField("number", Integer.toString(chapter.getID()), Field.Store.YES));
writer.addDocument(doc);
chapters.put(chapter.getID(), chapter);
LOGGER.log(Level.FINE, "Added bible chapter to index: {0}", chapter.getID());
}
}
catch (IOException ex) {
LOGGER.log(Level.SEVERE, "Couldn't add value to index", ex);
}
}
private void updateIndex(Session session, FullTextIndexInfo indexInfo, Iterable<byte[]> rows) throws IOException {
StoreAdapter adapter = store.createAdapter(session);
QueryContext queryContext = new SimpleQueryContext(adapter);
QueryBindings queryBindings = queryContext.createBindings();
Cursor cursor = null;
IndexWriter writer = indexInfo.getIndexer().getWriter();
try(RowIndexer rowIndexer = new RowIndexer(indexInfo, writer, true)) {
Operator operator = indexInfo.getOperator();
Iterator<byte[]> it = rows.iterator();
while(it.hasNext()) {
byte[] row = it.next();
Row hkeyRow = toHKeyRow(row, indexInfo.getHKeyRowType(), adapter);
queryBindings.setRow(0, hkeyRow);
cursor = API.cursor(operator, queryContext, queryBindings);
rowIndexer.updateDocument(cursor, row);
it.remove();
}
} finally {
if(cursor != null && !cursor.isClosed()) {
cursor.close();
}
}
}
@Inject
public LucenePerUserWaveViewHandlerImpl(IndexDirectory directory,
ReadableWaveletDataProvider waveletProvider,
@Named(CoreSettingsNames.WAVE_SERVER_DOMAIN) String domain,
@IndexExecutor Executor executor) {
this.waveletProvider = waveletProvider;
this.executor = executor;
analyzer = new StandardAnalyzer(LUCENE_VERSION);
try {
IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, analyzer);
indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
indexWriter = new IndexWriter(directory.getDirectory(), indexConfig);
nrtManager = new NRTManager(indexWriter, new WaveSearchWarmer(domain));
} catch (IOException ex) {
throw new IndexException(ex);
}
nrtManagerReopenThread = new NRTManagerReopenThread(nrtManager, MAX_STALE_SEC, MIN_STALE_SEC);
nrtManagerReopenThread.start();
}
/**
* Created by {@link org.apache.pinot.core.indexsegment.mutable.MutableSegmentImpl}
* for each column on which text index has been enabled
* @param column column name
* @param segmentIndexDir realtime segment consumer dir
* @param segmentName realtime segment name
*/
public RealtimeLuceneTextIndexReader(String column, File segmentIndexDir, String segmentName) {
_column = column;
_segmentName = segmentName;
try {
// indexCreator.close() is necessary for cleaning up the resources associated with lucene
// index writer that was indexing data realtime. We close the indexCreator
// when the realtime segment is destroyed (we would have already committed the
// segment and converted it into offline before destroy is invoked)
// So committing the lucene index for the realtime in-memory segment is not necessary
// as it is already part of the offline segment after the conversion.
// This is why "commitOnClose" is set to false when creating the lucene index writer
// for realtime
_indexCreator =
new LuceneTextIndexCreator(column, new File(segmentIndexDir.getAbsolutePath() + "/" + segmentName),
false /* commitOnClose */);
IndexWriter indexWriter = _indexCreator.getIndexWriter();
_searcherManager = new SearcherManager(indexWriter, false, false, null);
} catch (Exception e) {
LOGGER.error("Failed to instantiate realtime Lucene index reader for column {}, exception {}", column,
e.getMessage());
throw new RuntimeException(e);
}
StandardAnalyzer analyzer = new StandardAnalyzer();
_queryParser = new QueryParser(column, analyzer);
}
public void testExceptionDuringRefresh() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriter w = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
w.commit();
tw.commit();
SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null);
tw.addCategory(new FacetLabel("a", "b"));
w.addDocument(new Document());
tw.commit();
w.commit();
// intentionally corrupt the taxo index:
SegmentInfos infos = SegmentInfos.readLatestCommit(taxoDir);
taxoDir.deleteFile(infos.getSegmentsFileName());
expectThrows(IndexNotFoundException.class, mgr::maybeRefreshBlocking);
IOUtils.close(w, tw, mgr, indexDir, taxoDir);
}
@Override
public synchronized void clear() throws IOException
{
// Disable the scheduler temporarily to avoid new commits getting scheduled
if (schedulerService != null) {
schedulerService.shutdown();
}
// Remove all data from the index
IndexWriter indexWriter = getIndexWriter();
indexWriter.deleteAll();
// Close the index temporarily because we want the IndexWriter to be re-initialized on the
// next access in order to pick up the current layer configuration of the project.
close();
}
/**
* Reads the current stored translog ID from the IW commit data. If the id is not found, recommits the current
* translog id into lucene and returns null.
*/
@Nullable
private Translog.TranslogGeneration loadTranslogIdFromCommit(IndexWriter writer) throws IOException {
// commit on a just opened writer will commit even if there are no changes done to it
// we rely on that for the commit data translog id key
final Map<String, String> commitUserData = writer.getCommitData();
if (commitUserData.containsKey("translog_id")) {
assert commitUserData.containsKey(Translog.TRANSLOG_UUID_KEY) == false : "legacy commit contains translog UUID";
return new Translog.TranslogGeneration(null, Long.parseLong(commitUserData.get("translog_id")));
} else if (commitUserData.containsKey(Translog.TRANSLOG_GENERATION_KEY)) {
if (commitUserData.containsKey(Translog.TRANSLOG_UUID_KEY) == false) {
throw new IllegalStateException("commit doesn't contain translog UUID");
}
final String translogUUID = commitUserData.get(Translog.TRANSLOG_UUID_KEY);
final long translogGen = Long.parseLong(commitUserData.get(Translog.TRANSLOG_GENERATION_KEY));
return new Translog.TranslogGeneration(translogUUID, translogGen);
}
return null;
}
private static float computeNorm(Similarity sim, int length) throws IOException {
String value = IntStream.range(0, length).mapToObj(i -> "a").collect(Collectors.joining(" "));
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim));
w.addDocument(Collections.singleton(newTextField("foo", value, Store.NO)));
DirectoryReader reader = DirectoryReader.open(w);
w.close();
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(sim);
Explanation expl = searcher.explain(new TermQuery(new Term("foo", "a")), 0);
reader.close();
dir.close();
Explanation norm = findExplanation(expl, "fieldNorm");
assertNotNull(norm);
return norm.getValue().floatValue();
}
public void testListenerCalled() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null));
final AtomicBoolean afterRefreshCalled = new AtomicBoolean(false);
SearcherManager sm = new SearcherManager(iw, new SearcherFactory());
sm.addListener(new ReferenceManager.RefreshListener() {
@Override
public void beforeRefresh() {
}
@Override
public void afterRefresh(boolean didRefresh) {
if (didRefresh) {
afterRefreshCalled.set(true);
}
}
});
iw.addDocument(new Document());
iw.commit();
assertFalse(afterRefreshCalled.get());
sm.maybeRefreshBlocking();
assertTrue(afterRefreshCalled.get());
sm.close();
iw.close();
dir.close();
}
public void testStressLocks() throws Exception {
Path tempPath = createTempDir();
assumeFalse("cannot handle buggy Files.delete", TestUtil.hasWindowsFS(tempPath));
Directory dir = getDirectory(tempPath);
// First create a 1 doc index:
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
addDoc(w);
w.close();
int numIterations = atLeast(20);
WriterThread writer = new WriterThread(numIterations, dir);
SearcherThread searcher = new SearcherThread(numIterations, dir);
writer.start();
searcher.start();
writer.join();
searcher.join();
assertTrue("IndexWriter hit unexpected exceptions", !writer.hitException);
assertTrue("IndexSearcher hit unexpected exceptions", !searcher.hitException);
dir.close();
}
@Test
public void testSearchNoScores() throws Exception {
IndexWriter w = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
KeywordFieldMapper.KeywordFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
fieldType.setName("x");
fieldType.freeze();
for (int i = 0; i < 3; i++) {
addDoc(w, fieldType, "Arthur");
}
addDoc(w, fieldType, "Arthur"); // not "Arthur" to lower score
w.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w, true, true));
List<LuceneCollectorExpression<?>> columnReferences = Collections.singletonList(new ScoreCollectorExpression());
Query query = fieldType.termsQuery(Collections.singletonList("Arthur"), null);
LuceneOrderedDocCollector collector = collector(searcher, columnReferences, query, null, false);
KeyIterable<ShardId, Row> result = collector.collect();
assertThat(Iterables.size(result), is(2));
Iterator<Row> values = result.iterator();
assertThat(values.next().get(0), Matchers.is(Float.NaN));
assertThat(values.next().get(0), Matchers.is(Float.NaN));
}
@Test
public void testBackwardsCompatibility() throws Exception {
// tests that if the taxonomy index doesn't have the INDEX_EPOCH
// property (supports pre-3.6 indexes), all still works.
Directory dir = newDirectory();
// create an empty index first, so that DirTaxoWriter initializes indexEpoch to 1.
new IndexWriter(dir, new IndexWriterConfig(null)).close();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
taxoWriter.close();
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
assertNull(TaxonomyReader.openIfChanged(taxoReader));
taxoReader.close();
dir.close();
}
public MergeScheduler getMergeScheduler() {
return new MergeScheduler() {
private final String _id = UUID.randomUUID().toString();
@Override
public void merge(IndexWriter writer) throws IOException {
addMerges(_id, writer);
}
@Override
public void close() throws IOException {
remove(_id);
}
@Override
public MergeScheduler clone() {
return getMergeScheduler();
}
};
}
public void prepareIndex() throws IOException {
File globalWFMDIr = new File(Util.GTPM_INDEX_DIR);
if (!globalWFMDIr.exists()) {
Util.createDirs(Util.GTPM_INDEX_DIR);
}
KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
IndexWriterConfig wfmIndexWriterConfig = new IndexWriterConfig(Version.LUCENE_46, keywordAnalyzer);
wfmIndexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
wfmIndexWriterConfig.setRAMBufferSizeMB(1024);
logger.info("PREPARE INDEX");
try {
wfmIndexWriter = new IndexWriter(FSDirectory.open(new File(Util.GTPM_INDEX_DIR)), wfmIndexWriterConfig);
wfmIndexWriter.commit();
wfmIndexer = new DocumentMaker(wfmIndexWriter);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Manage closing and unregistering an index writer .
*
* @param id String
* @throws IOException
*/
public void closeDeltaIndexWriter(String id) throws IOException
{
if (id == null)
{
throw new IndexerException("\"null\" is not a valid identifier for a transaction");
}
// No lock required as the delta applied to one thread. The delta is
// still active.
IndexWriter writer = indexWriters.remove(id);
if (writer != null)
{
writer.close();
}
}
static void indexDocs(final IndexWriter writer, Path path) throws IOException {
if (Files.isDirectory(path)) {
Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
try {
indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
} catch (IOException ignore) {
}
return FileVisitResult.CONTINUE;
}
}
);
} else {
indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());
}
}
@Test
public void testMultipleWritersOpenOnSameDirectory() throws IOException {
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
FastHdfsKeyValueDirectory directory = new FastHdfsKeyValueDirectory(false, _timer, _configuration, new Path(_path,
"test_multiple"));
IndexWriter writer1 = new IndexWriter(directory, config.clone());
addDoc(writer1, getDoc(1));
IndexWriter writer2 = new IndexWriter(directory, config.clone());
addDoc(writer2, getDoc(2));
writer1.close();
writer2.close();
DirectoryReader reader = DirectoryReader.open(directory);
int maxDoc = reader.maxDoc();
assertEquals(1, maxDoc);
Document document = reader.document(0);
assertEquals("2", document.get("id"));
reader.close();
}
public void testRamDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
RAMDirectory ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("RamDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RamDirectory search consumes {}ms!", (end - start));
}
private static void addIPv6Values(IndexWriter writer) throws IOException {
for (int i = 10; i < 20; i++) {
Document doc = new Document();
doc.add(new StringField("_id", Integer.toString(i), Field.Store.NO));
InetAddress address = InetAddresses.forString("7bd0:8082:2df8:487e:e0df:e7b5:9362:" + Integer.toHexString(i));
doc.add(new SortedSetDocValuesField(IP_COLUMN, new BytesRef(InetAddressPoint.encode(address))));
writer.addDocument(doc);
}
}
private IndexReader getIndexReader() throws IOException {
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
Directory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, conf);
writer.close();
return DirectoryReader.open(dir);
}
public void testDeleteAll() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
SearcherManager mgr = new SearcherManager(w, new SearcherFactory());
nrtDeletesThread = new ControlledRealTimeReopenThread<>(w, mgr, 0.1, 0.01);
nrtDeletesThread.setName("NRTDeletes Reopen Thread");
nrtDeletesThread.setDaemon(true);
nrtDeletesThread.start();
long gen1 = w.addDocument(new Document());
long gen2 = w.deleteAll();
nrtDeletesThread.waitForGeneration(gen2);
IOUtils.close(nrtDeletesThread, nrtDeletes, w, dir);
}
@Override
public IndexWriter createIndexWriter(Directory directory, ODocument metadata) throws IOException {
Analyzer analyzer = getAnalyzer(metadata);
Version version = getLuceneVersion(metadata);
IndexWriterConfig iwc = new IndexWriterConfig(version, analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
return new IndexWriter(directory, iwc);
}
public void testSeparateIndexedFields() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
FacetsConfig config = new FacetsConfig();
config.setIndexFieldName("b", "$b");
for(int i = atLeast(30); i > 0; --i) {
Document doc = new Document();
doc.add(new StringField("f", "v", Field.Store.NO));
doc.add(new FacetField("a", "1"));
doc.add(new FacetField("b", "1"));
iw.addDocument(config.build(taxoWriter, doc));
}
DirectoryReader r = DirectoryReader.open(iw);
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
FacetsCollector sfc = new FacetsCollector();
newSearcher(r).search(new MatchAllDocsQuery(), sfc);
Facets facets1 = getTaxonomyFacetCounts(taxoReader, config, sfc);
Facets facets2 = getTaxonomyFacetCounts(taxoReader, config, sfc, "$b");
assertEquals(r.maxDoc(), facets1.getTopChildren(10, "a").value.intValue());
assertEquals(r.maxDoc(), facets2.getTopChildren(10, "b").value.intValue());
iw.close();
IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}
public void setDoc(String id, Document doc) throws HongsException {
IndexWriter iw = getWriter();
try {
iw.updateDocument (new Term("@"+Cnst.ID_KEY, id), doc);
} catch (IOException ex) {
throw new HongsException(ex);
}
if (!REFLUX_MODE) {
commit();
}
}
private IndexWriter getIndexWriter(String name, String lang)
throws CorruptIndexException, LockObtainFailedException,
IOException {
String path = indexWorkDir + name;
File f = new File(path);
f.mkdirs();
Analyzer analyzer = getAnalyzer(name, lang);
IndexWriter writer = new IndexWriter(path, analyzer);
writer.setUseCompoundFile(true);
return writer;
}
@Override
public int doLogic() throws Exception {
IndexWriter iw = getRunData().getIndexWriter();
if (iw != null) {
if (commitUserData != null) {
iw.setLiveCommitData(commitUserData.entrySet());
}
iw.commit();
}
return 1;
}