下面列出了org.apache.lucene.index.IndexWriter#optimize ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Transactional(readOnly = true)
public Integer createIndex(Integer siteId, Integer channelId,
Date startDate, Date endDate, Integer startId, Integer max,
Directory dir) throws IOException, ParseException {
boolean exist = IndexReader.indexExists(dir);
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
Version.LUCENE_30), !exist, IndexWriter.MaxFieldLength.LIMITED);
try {
if (exist) {
LuceneContent.delete(siteId, channelId, startDate, endDate,
writer);
}
Integer lastId = luceneContentDao.index(writer, siteId, channelId,
startDate, endDate, startId, max);
writer.optimize();
return lastId;
} finally {
writer.close();
}
}
/**
* Test of indexing with Lucene.
* @throws Exception if an error is thrown while executing.
*/
public static void luceneIndex() throws Exception {
// setting default parameters
final int depth = 3;
// create Lucene index writer
final IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true);
writer.setUseCompoundFile(true);
writer.setMaxFieldLength(1000000);
// common crawler settings
final Crawler crawler = new Crawler();
crawler.setLinkFilter(new ServerFilter(server));
crawler.setModel(new MaxDepthModel(depth));
crawler.addParserListener(new IParserEventListener() {
@Override
public void parse(final ParserEvent event) {
print("Parsing link: " + event.getLink());
}
});
// create Lucene parsing listener and add it
final LuceneParserEventListener listener = new LuceneParserEventListener(writer);
crawler.addParserListener(listener);
// start crawler
crawler.start(server, startPage);
// Optimizing Lucene index
writer.optimize();
writer.close();
}
/**
* Indexes the data from the given reader.
* @param reader Source index reader, from which autocomplete words are obtained for the defined field
* @param field the field of the source index reader to index for autocompletion
* @param mergeFactor mergeFactor to use when indexing
* @param ramMB the max amount or memory in MB to use
* @param optimize whether or not the autocomplete index should be optimized
* @throws AlreadyClosedException if the Autocompleter is already closed
* @throws IOException
*/
public final void indexDictionary(IndexReader reader, String field, int mergeFactor, int ramMB, boolean optimize) throws IOException {
synchronized (modifyCurrentIndexLock) {
ensureOpen();
final Directory dir = this.autoCompleteIndex;
final Dictionary dict = new LuceneDictionary(reader, field);
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
IndexSearcher indexSearcher = obtainSearcher();
final List<IndexReader> readers = new ArrayList<IndexReader>();
if (searcher.maxDoc() > 0) {
ReaderUtil.gatherSubReaders(readers, searcher.getIndexReader());
}
//clear the index
writer.deleteAll();
try {
Iterator<String> iter = dict.getWordsIterator();
while (iter.hasNext()) {
String word = iter.next();
// ok index the word
Document doc = createDocument(word, reader.docFreq(new Term(field, word)));
writer.addDocument(doc);
}
} finally {
releaseSearcher(indexSearcher);
}
// close writer
if (optimize)
writer.optimize();
writer.close();
// also re-open the autocomplete index to see our own changes when the next suggestion
// is fetched:
swapSearcher(dir);
}
}
public boolean createCourseIndex() {
List<Course> list = this.getCourses();
try {
Directory directory = FSDirectory.getDirectory(INDEXPATH);
IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
for (Course course : list) {
Document doc = new Document();
String courseTitle = course.getCourseTitle() == null ? "" : course.getCourseTitle().trim();
String courseIntro = course.getCourseIntro() == null ? "" : course.getCourseIntro();
String courseId = course.getCourseId() == null ? "" : course.getCourseId();
String type = course.getType() == null ? "" : course.getType();
String courseState = course.getCourseState() == null ? "" : course.getCourseState();
doc.add(new Field("courseIntro", courseIntro, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
doc.add(new Field("courseTitle", courseTitle, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
doc.add(new Field("courseId", courseId, Field.Store.COMPRESS, Field.Index.NO));
doc.add(new Field("type", type, Field.Store.COMPRESS, Field.Index.NO));
doc.add(new Field("courseState", courseState, Field.Store.COMPRESS, Field.Index.NO));
indexWriter.addDocument(doc);
}
indexWriter.optimize();
indexWriter.close();
return true;
} catch (Exception e) {
logger.error("createCourseIndex error.");
return false;
}
}
private void indexWithLucene() throws IOException {
System.out.println("Deleting old Lucene index");
FileUtils.deleteDirectory(new File(LUCENE_INDEX));
System.out.println("Indexing with Lucene");
final BufferedReader dictionary = config.newReader();
try {
final Directory directory = FSDirectory.open(new File(LUCENE_INDEX));
try {
final IndexWriter luceneWriter = new IndexWriter(directory,
new StandardAnalyzer(LuceneSearch.LUCENE_VERSION), true,
IndexWriter.MaxFieldLength.UNLIMITED);
try {
final IDictParser parser = config.fileType.newParser(config);
indexWithLucene(dictionary, luceneWriter, parser);
System.out.println("Optimizing Lucene index");
luceneWriter.optimize();
} finally {
luceneWriter.close();
}
} finally {
closeQuietly(directory);
}
} finally {
IOUtils.closeQuietly(dictionary);
}
System.out.println("Finished Lucene indexing");
}
public void indexforentity() throws Exception
{
if(EntityFragmentFields.entityId2Name == null)
EntityFragmentFields.load();
long startTime = new Date().getTime();
//Try update KB index to DBpedia2015. by husen 2016-04-08
//Try update KB index to DBpedia2016. by husen 2018-8-22
File indexDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/entity_fragment_index");
File sourceDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/fragments/entity_RDF_fragment/16entity_fragment.txt");
Analyzer luceneAnalyzer_en = new StandardAnalyzer();
IndexWriter indexWriter_en = new IndexWriter(indexDir_en, luceneAnalyzer_en,true);
int mergeFactor = 100000; //default 10
int maxBufferedDoc = 1000; //default 10
int maxMergeDoc = Integer.MAX_VALUE; //INF
//indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor;
indexWriter_en.setMergeFactor(mergeFactor);
indexWriter_en.setMaxBufferedDocs(maxBufferedDoc);
indexWriter_en.setMaxMergeDocs(maxMergeDoc);
FileInputStream file = new FileInputStream(sourceDir_en);
InputStreamReader in = new InputStreamReader(file,"UTF-8");
BufferedReader br = new BufferedReader(in);
int count = 0;
while(true)
{
String _line = br.readLine();
{
if(_line == null) break;
}
count++;
if(count % 100000 == 0)
System.out.println(count);
String line = _line;
String temp[] = line.split("\t");
if(temp.length != 2)
continue;
else
{
int entity_id = Integer.parseInt(temp[0]);
if(!EntityFragmentFields.entityId2Name.containsKey(entity_id))
continue;
String entity_name = EntityFragmentFields.entityId2Name.get(entity_id);
String entity_fragment = temp[1];
entity_name = entity_name.replace("____", " ");
entity_name = entity_name.replace("__", " ");
entity_name = entity_name.replace("_", " ");
Document document = new Document();
Field EntityName = new Field("EntityName", entity_name, Field.Store.YES,
Field.Index.TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS);
Field EntityId = new Field("EntityId", String.valueOf(entity_id),
Field.Store.YES, Field.Index.NO);
Field EntityFragment = new Field("EntityFragment", entity_fragment,
Field.Store.YES, Field.Index.NO);
document.add(EntityName);
document.add(EntityId);
document.add(EntityFragment);
indexWriter_en.addDocument(document);
}
}
indexWriter_en.optimize();
indexWriter_en.close();
br.close();
// input the time of Build index
long endTime = new Date().getTime();
System.out.println("entity_name index has build ->" + count + " " + "Time:" + (endTime - startTime));
}
public static void buildIndex(HashMap<String, ArrayList<Integer>> typeShortName2IdList) throws Exception
{
long startTime = new Date().getTime();
File indexDir_li = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/type_fragment_index");
Analyzer luceneAnalyzer_li = new StandardAnalyzer();
IndexWriter indexWriter_li = new IndexWriter(indexDir_li, luceneAnalyzer_li,true);
int mergeFactor = 100000;
int maxBufferedDoc = 1000;
int maxMergeDoc = Integer.MAX_VALUE;
//indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor;
indexWriter_li.setMergeFactor(mergeFactor);
indexWriter_li.setMaxBufferedDocs(maxBufferedDoc);
indexWriter_li.setMaxMergeDocs(maxMergeDoc);
int count = 0;
Iterator<String> it = typeShortName2IdList.keySet().iterator();
while (it.hasNext())
{
String sn = it.next();
if (sn.length() == 0) {
continue;
}
count ++;
StringBuilder splittedSn = new StringBuilder("");
if(sn.contains("_"))
{
String nsn = sn.replace("_", " ");
splittedSn.append(nsn.toLowerCase());
}
else
{
int last = 0, i = 0;
for(i = 0; i < sn.length(); i ++)
{
// if it were not a small letter, then break it.
if(!(sn.charAt(i)>='a' && sn.charAt(i)<='z'))
{
splittedSn.append(sn.substring(last, i).toLowerCase());
splittedSn.append(' ');
last = i;
}
}
splittedSn.append(sn.substring(last, i).toLowerCase());
while(splittedSn.charAt(0) == ' ') {
splittedSn.deleteCharAt(0);
}
}
System.out.println("SplitttedType: "+splittedSn);
Document document = new Document();
Field SplittedTypeShortName = new Field("SplittedTypeShortName", splittedSn.toString(),
Field.Store.YES,
Field.Index.TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS);
Field TypeShortName = new Field("TypeShortName", sn,
Field.Store.YES, Field.Index.NO);
document.add(SplittedTypeShortName);
document.add(TypeShortName);
indexWriter_li.addDocument(document);
}
indexWriter_li.optimize();
indexWriter_li.close();
// input the time of Build index
long endTime = new Date().getTime();
System.out.println("TypeShortName index has build ->" + count + " " + "Time:" + (endTime - startTime));
}
@Override
public void makeIndex(String lang, File workingDir) throws IOException
{
log.info("Loading support datasets...");
File all_anchors = new WikipediaAnchorParser(lang).getFile();
long numAnchors = ExternalSortUtils.wcl(all_anchors);
AnchorIterator iterator = new AnchorIterator(all_anchors);
IntSet people = new PeopleWIDs(lang).getDataset();
// IndexSearcher articles = Indexes.getSearcher(RepositoryDirs.WIKIPEDIA.getPath(lang));
IndexSearcher articles = openWikipediaIndex(lang);
//QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new WhitespaceAnalyzer(Version.LUCENE_34));
QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new StandardAnalyzer(Version.LUCENE_34, new HashSet<String>()));
IndexWriter index = new IndexWriter(FSDirectory.open(workingDir.getAbsoluteFile()), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer()));
Document doc = new Document();
Field fId = new Field(FIELD_ID, "", Store.YES, Index.NOT_ANALYZED);
Field fText = new Field(FIELD_TEXT, "", Store.YES, Index.NOT_ANALYZED);
Field fObject = new Field(FIELD_OBJECT, "", Store.YES, Index.NO);
doc.add(fId);
doc.add(fText);
doc.add(fObject);
// Field fOriginal = new Field(FIELD_ORIGINAL, "", Store.YES, Index.ANALYZED);
// Field fWID = new Field(FIELD_WID, "", Store.NO, Index.ANALYZED);
PLogger plog = new PLogger(log, Step.TEN_MINUTES, "lines", "anchors", "searches", "indexed", "0-freq","dropped");
plog.setEnd(0, numAnchors);
plog.start("Support datasets loaded, now parsing...");
int id=0;
while(iterator.next())
{
plog.update(0, iterator.scroll);
plog.update(1);
String anchorText = iterator.anchor;
int freq = freq(iterator.originals, articles, queryParser);
plog.update(2, iterator.originals.size());
if (freq == 0) plog.update(4);
Anchor anchorObj = Anchor.build(id, iterator.links, freq, people);
if (anchorObj == null){
plog.update(5);
continue;
}
String anchorSerial = Anchor.serialize(anchorObj);
fId.setValue(Integer.toString(++id));
fText.setValue(anchorText);
fObject.setValue(anchorSerial);
for(int page : anchorObj){
Field fWID = new Field(FIELD_WID, Integer.toString(page), Store.YES, Index.NOT_ANALYZED);
// fWID.setBoost(iterator.links.get(page));
doc.add(fWID);
}
for(String original : iterator.originals) {
doc.add(new Field(FIELD_ORIGINAL, original, Store.YES, Index.NOT_ANALYZED));
}
index.addDocument(doc);
plog.update(3);
doc.removeFields(FIELD_ORIGINAL);
doc.removeFields(FIELD_WID);
}
plog.stop();
iterator.close();
log.info("Now optimizing...");
index.optimize();
index.close();
log.info("Done.");
}
@Override
public void makeIndex(String lang, File workingDir) throws IOException
{
IndexReader articles = Indexes.getReader(RepositoryDirs.WIKIPEDIA.getPath(lang));
Int2ObjectMap<String> bestAnchorMap = new BestAnchors(lang).getDataset();
IndexWriter index = new IndexWriter(new SimpleFSDirectory(workingDir), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer()));
Document doc = new Document();
Field fWID = new Field(FIELD_WID, "", Store.YES, Index.NOT_ANALYZED);
Field fTitle = new Field(FIELD_TITLE, "", Store.YES, Index.NOT_ANALYZED);
Field fAbstract = new Field(FIELD_ABSTRACT, "", Store.YES, Index.NO);
Field fBestAnchor = new Field(FIELD_BEST_ANCHOR, "", Store.YES, Index.NO);
doc.add(fWID);
doc.add(fTitle);
doc.add(fAbstract);
doc.add(fBestAnchor);
int max = articles.maxDoc();
PLogger plog = new PLogger(log, Step.TEN_MINUTES, "pages", "indexed", "noBest");
plog.setEnd(max);
plog.start("Start indexing...");
for(int i=0; i<max; i++)
{
plog.update(0);
Document oldDoc = articles.document(i);
PageType type = PageType.valueOf(oldDoc.get(WikipediaIndexer.FIELD_TYPE));
if (type == PageType.TOPIC)
{
int wid = Integer.parseInt(oldDoc.get(WikipediaIndexer.FIELD_WID));
fWID.setValue(oldDoc.get(WikipediaIndexer.FIELD_WID));
fAbstract.setValue(oldDoc.get(WikipediaIndexer.FIELD_ABSTRACT));
fTitle.setValue(oldDoc.get(WikipediaIndexer.FIELD_TITLE));
String bestAnchor = bestAnchorMap.get(wid);
if (bestAnchor == null || bestAnchor.length() == 0) plog.update(2);
fBestAnchor.setValue(bestAnchor==null?"":bestAnchor);
String[] cats = oldDoc.getValues(WikipediaIndexer.FIELD_CAT);
if (cats != null) {
for (int j=0; j<cats.length; j++)
doc.add(new Field(FIELD_CAT, cats[j], Store.YES, Index.NOT_ANALYZED));
}
index.addDocument(doc);
plog.update(1);
doc.removeFields(FIELD_CAT);
}
}
plog.stop();
log.info("Now optimizing...");
index.optimize();
index.close();
//we cannot call this because the index is still in the temporary dir
//so TopicDocs will be created using old index
// log.info("Index Done, now creating WID->DOC_ID map");
//
// TopicDocs td = new TopicDocs(lang);
// td.forceParsing();
log.info("Done.");
}
/**
* Creates a new spell-check index based on search-index
*/
public void createSpellIndex() {
if (isSpellCheckEnabled) {
IndexReader indexReader = null;
try {
log.info("Start generating Spell-Index...");
long startSpellIndexTime = 0;
if (log.isDebugEnabled()) {
startSpellIndexTime = System.currentTimeMillis();
}
final Directory indexDir = FSDirectory.open(new File(indexPath));
indexReader = IndexReader.open(indexDir);
// 1. Create content spellIndex
final File spellDictionaryFile = new File(spellDictionaryPath);
final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true
final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
contentSpellChecker.indexDictionary(contentDictionary);
// 2. Create title spellIndex
final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true
final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
titleSpellChecker.indexDictionary(titleDictionary);
// 3. Create description spellIndex
final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true
final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
descriptionSpellChecker.indexDictionary(descriptionDictionary);
// 4. Create author spellIndex
final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true
final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
authorSpellChecker.indexDictionary(authorDictionary);
// Merge all part spell indexes (content,title etc.) to one common spell index
final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true
final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
merger.addIndexesNoOptimize(directories);
merger.optimize();
merger.close();
spellChecker = new SpellChecker(spellIndexDirectory);
spellChecker.setAccuracy(0.7f);
if (log.isDebugEnabled()) {
log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
}
log.info("New generated Spell-Index ready to use.");
} catch (final IOException ioEx) {
log.warn("Can not create SpellIndex", ioEx);
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (final IOException e) {
log.warn("Can not close indexReader properly", e);
}
}
}
}
}
/**
* Create index-writer object. In multi-threaded mode ctreates an array of index-workers. Start indexing with main-index as root object. Index recursive all elements.
* At the end optimze and close new index. The new index is stored in [temporary-index-path]/main
*
* @throws InterruptedException
*/
private void doIndex() throws InterruptedException {
try {
final File tempIndexDir = new File(tempIndexPath);
final Directory indexPath = FSDirectory.open(new File(tempIndexDir, "main"));
final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
indexWriter = new IndexWriter(indexPath, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
indexWriter.deleteAll();
indexWriter.setMergeFactor(INDEX_MERGE_FACTOR); // for better performance
indexWriter.setRAMBufferSizeMB(ramBufferSizeMB);// for better performance set to 48MB (see lucene docu 'how to make indexing faster")
log.info("IndexWriter config RAMBufferSizeMB=" + indexWriter.getRAMBufferSizeMB());
indexWriter.setUseCompoundFile(useCompoundFile); // for better performance (see lucene docu 'how to make indexing faster")
log.info("IndexWriter config UseCompoundFile=" + indexWriter.getUseCompoundFile());
// Create IndexWriterWorker
log.info("Running with " + numberIndexWriter + " IndexerWriterWorker");
indexWriterWorkers = new IndexWriterWorker[numberIndexWriter];
final Directory[] partIndexDirs = new Directory[numberIndexWriter];
for (int i = 0; i < numberIndexWriter; i++) {
final IndexWriterWorker indexWriterWorker = new IndexWriterWorker(i, tempIndexDir, this);
indexWriterWorkers[i] = indexWriterWorker;
indexWriterWorkers[i].start();
partIndexDirs[i] = indexWriterWorkers[i].getIndexDir();
}
final SearchResourceContext searchResourceContext = new SearchResourceContext();
log.info("doIndex start. OlatFullIndexer with Debug output");
mainIndexer.doIndex(searchResourceContext, null /* no parent */, this);
log.info("Wait until every folder indexer is finished");
DBFactory.getInstance().commitAndCloseSession();
// check if every folder indexer is finished max waiting-time 10Min (=waitingCount-limit = 60)
int waitingCount = 0;
final int MAX_WAITING_COUNT = 60;// = 10Min
while (FolderIndexerWorkerPool.getInstance().isIndexerRunning() && (waitingCount++ < MAX_WAITING_COUNT)) {
Thread.sleep(10000);
}
if (waitingCount >= MAX_WAITING_COUNT) {
log.info("Finished with max waiting time!");
}
log.info("Set Finish-flag for each indexWriterWorkers");
// Set Finish-flag
for (int i = 0; i < numberIndexWriter; i++) {
indexWriterWorkers[i].finishIndexing();
}
log.info("Wait until every indexworker is finished");
// check if every indexworker is finished max waiting-time 10Min (=waitingCount-limit = 60)
waitingCount = 0;
while (!areIndexingDone() && (waitingCount++ < MAX_WAITING_COUNT)) {
Thread.sleep(10000);
}
if (waitingCount >= MAX_WAITING_COUNT) {
log.info("Finished with max waiting time!");
}
// Merge all partIndex
DBFactory.getInstance().commitAndCloseSession();
if (partIndexDirs.length > 0) {
log.info("Start merging part Indexes");
indexWriter.addIndexesNoOptimize(partIndexDirs);
log.info("Added all part Indexes");
}
fullIndexerStatus.setIndexSize(indexWriter.maxDoc());
indexWriter.optimize();
indexWriter.close();
} catch (final IOException e) {
e.printStackTrace();
log.warn("Can not create IndexWriter, indexname=" + tempIndexPath, e);
} finally {
DBFactory.getInstance().commitAndCloseSession();
log.debug("doIndex: commit & close session");
}
}
/**
* Creates a new spell-check index based on search-index
*/
public static void createSpellIndex(final SearchModule searchModule) {
final String tempSearchIndexPath = searchModule.getTempSearchIndexPath();
final String tempSpellCheckIndexPath = searchModule.getTempSpellCheckerIndexPath();
IndexReader indexReader = null;
try {
log.info("Start generating spell check index ...");
long startSpellIndexTime = 0;
if (log.isDebugEnabled()) {
startSpellIndexTime = System.currentTimeMillis();
}
final Directory indexDir = FSDirectory.open(new File(tempSearchIndexPath, "main"));
indexReader = IndexReader.open(indexDir);
// 1. Create content spellIndex
log.info("Generating 'content' spell check index ...");
final File contentSpellIndexPath = new File(tempSpellCheckIndexPath + CONTENT_PATH);
FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
final Directory contentSpellIndexDirectory = FSDirectory.open(contentSpellIndexPath);
final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
contentSpellChecker.indexDictionary(contentDictionary);
// 2. Create title spellIndex
log.info("Generating 'title' spell check index ...");
final File titleSpellIndexPath = new File(tempSpellCheckIndexPath + TITLE_PATH);
FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
final Directory titleSpellIndexDirectory = FSDirectory.open(titleSpellIndexPath);
final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
titleSpellChecker.indexDictionary(titleDictionary);
// 3. Create description spellIndex
log.info("Generating 'description' spell check index ...");
final File descriptionSpellIndexPath = new File(tempSpellCheckIndexPath + DESCRIPTION_PATH);
FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
final Directory descriptionSpellIndexDirectory = FSDirectory.open(descriptionSpellIndexPath);
final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
descriptionSpellChecker.indexDictionary(descriptionDictionary);
// 4. Create author spellIndex
log.info("Generating 'author' spell check index ...");
final File authorSpellIndexPath = new File(tempSpellCheckIndexPath + AUTHOR_PATH);
FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);
final Directory authorSpellIndexDirectory = FSDirectory.open(authorSpellIndexPath);
final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
authorSpellChecker.indexDictionary(authorDictionary);
log.info("Merging spell check indices ...");
// Merge all part spell indexes (content,title etc.) to one common spell index
final File tempSpellCheckIndexDir = new File(tempSpellCheckIndexPath);
FileUtils.deleteDirsAndFiles(tempSpellCheckIndexDir, true, true);
final Directory tempSpellIndexDirectory = FSDirectory.open(tempSpellCheckIndexDir);
final IndexWriter merger = new IndexWriter(tempSpellIndexDirectory, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
merger.addIndexesNoOptimize(directories);
log.info("Optimizing spell check index ...");
merger.optimize();
merger.close();
tempSpellIndexDirectory.close();
contentSpellChecker.close();
contentSpellIndexDirectory.close();
titleSpellChecker.close();
titleSpellIndexDirectory.close();
descriptionSpellChecker.close();
descriptionSpellIndexDirectory.close();
authorSpellChecker.close();
authorSpellIndexDirectory.close();
FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);
if (log.isDebugEnabled()) {
log.debug("Spell check index created in " + (System.currentTimeMillis() - startSpellIndexTime) + " ms.");
}
} catch (final IOException ioEx) {
log.warn("Can not create spell check index.", ioEx);
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (final IOException e) {
log.warn("Can not close indexReader properly", e);
}
}
}
}
public static void main(String[] argv) {
try {
GlobalVar gv = GlobalVar.getGlobalVar();
// get args
File indexDir = gv.getIndexDir();
File localDir = gv.getLocalDir();
File root = gv.getLocalRoot();
boolean hasWrappers = false;
String usage = OfflineSearchIndexer.class.getName() + " [-wrappers]";
for (int i = 0; i < argv.length; i++) {
if (argv[i].equals("-wrappers")) { // parse -wrappers option
log.info("wrappers set true");
hasWrappers = true;
} else {
log.error("Incorrect arguments in the command line");
System.err.println(usage);
System.err.println(" -wrappers means the directory contains wrappers saved in earlier run of seal");
return;
}
}
// check args
if (root!=null && !System.getenv("PWD").equals(root.getPath())) {
log.error("to build an index relative to "+root+" run OfflineSearchIndexer from that directory, and make localDir a relative path");
System.exit(-1);
}
if (root==null && !localDir.isAbsolute()) {
log.warn("to build an absolute index make localDir an absolute path - this index will be relative to "+System.getenv("PWD"));
}
if (indexDir.exists()) {
log.error("Cannot save index to '" +indexDir+ "' directory, please delete it first");
System.exit(-1);
}
if (!localDir.exists() || !localDir.canRead()) {
System.out.println("Document directory '" +localDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
System.exit(-1);
}
Date start = new Date();
IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED);
System.out.println("Indexing to directory '" +indexDir+ "'...");
indexDocs(writer, localDir, hasWrappers);
System.out.println("Optimizing...");
writer.optimize();
writer.close();
Date end = new Date();
log.info("indexed "+numIndexed+" of "+numFiles+" files");
log.info((end.getTime() - start.getTime())+" total milliseconds");
} catch (Exception e) {
log.error(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
e.printStackTrace();
}
}