org.apache.lucene.index.IndexWriter#optimize ( )源码实例Demo

下面列出了org.apache.lucene.index.IndexWriter#optimize ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: Lottery   文件: LuceneContentSvcImpl.java
@Transactional(readOnly = true)
public Integer createIndex(Integer siteId, Integer channelId,
		Date startDate, Date endDate, Integer startId, Integer max,
		Directory dir) throws IOException, ParseException {
	boolean exist = IndexReader.indexExists(dir);
	IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(
			Version.LUCENE_30), !exist, IndexWriter.MaxFieldLength.LIMITED);
	try {
		if (exist) {
			LuceneContent.delete(siteId, channelId, startDate, endDate,
					writer);
		}
		Integer lastId = luceneContentDao.index(writer, siteId, channelId,
				startDate, endDate, startId, max);
		writer.optimize();
		return lastId;
	} finally {
		writer.close();
	}
}
 
源代码2 项目: JPPF   文件: CrawlerTest.java
/**
 * Test of indexing with Lucene.
 * @throws Exception if an error is thrown while executing.
 */
public static void luceneIndex() throws Exception {
  // setting default parameters
  final int depth = 3;

  // create Lucene index writer
  final IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true);
  writer.setUseCompoundFile(true);
  writer.setMaxFieldLength(1000000);

  // common crawler settings
  final Crawler crawler = new Crawler();
  crawler.setLinkFilter(new ServerFilter(server));
  crawler.setModel(new MaxDepthModel(depth));
  crawler.addParserListener(new IParserEventListener() {
    @Override
    public void parse(final ParserEvent event) {
      print("Parsing link: " + event.getLink());
    }
  });

  // create Lucene parsing listener and add it
  final LuceneParserEventListener listener = new LuceneParserEventListener(writer);
  crawler.addParserListener(listener);

  // start crawler
  crawler.start(server, startPage);

  // Optimizing Lucene index
  writer.optimize();
  writer.close();
}
 
源代码3 项目: webdsl   文件: AutoCompleter.java
/**
  * Indexes the data from the given reader.
* @param reader Source index reader, from which autocomplete words are obtained for the defined field
* @param field the field of the source index reader to index for autocompletion
* @param mergeFactor mergeFactor to use when indexing
* @param ramMB the max amount or memory in MB to use
* @param optimize whether or not the autocomplete index should be optimized
  * @throws AlreadyClosedException if the Autocompleter is already closed
  * @throws IOException
  */
 public final void indexDictionary(IndexReader reader, String field, int mergeFactor, int ramMB, boolean optimize) throws IOException {
   synchronized (modifyCurrentIndexLock) {
     ensureOpen();
     final Directory dir = this.autoCompleteIndex;
     final Dictionary dict = new LuceneDictionary(reader, field);
     final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
     IndexSearcher indexSearcher = obtainSearcher();
     final List<IndexReader> readers = new ArrayList<IndexReader>();

     if (searcher.maxDoc() > 0) {
       ReaderUtil.gatherSubReaders(readers, searcher.getIndexReader());
     }

     //clear the index
     writer.deleteAll();

     try {
       Iterator<String> iter = dict.getWordsIterator();

     while (iter.hasNext()) {
         String word = iter.next();

         // ok index the word
         Document doc = createDocument(word, reader.docFreq(new Term(field, word)));
         writer.addDocument(doc);
       }
     } finally {
       releaseSearcher(indexSearcher);
     }
     // close writer
     if (optimize)
       writer.optimize();
     writer.close();
     // also re-open the autocomplete index to see our own changes when the next suggestion
     // is fetched:
     swapSearcher(dir);
   }
 }
 
源代码4 项目: TinyMooc   文件: CourseServiceImpl.java
public boolean createCourseIndex() {
    List<Course> list = this.getCourses();
    try {
        Directory directory = FSDirectory.getDirectory(INDEXPATH);
        IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
        for (Course course : list) {
            Document doc = new Document();
            String courseTitle = course.getCourseTitle() == null ? "" : course.getCourseTitle().trim();
            String courseIntro = course.getCourseIntro() == null ? "" : course.getCourseIntro();
            String courseId = course.getCourseId() == null ? "" : course.getCourseId();
            String type = course.getType() == null ? "" : course.getType();
            String courseState = course.getCourseState() == null ? "" : course.getCourseState();
            doc.add(new Field("courseIntro", courseIntro, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.add(new Field("courseTitle", courseTitle, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
            doc.add(new Field("courseId", courseId, Field.Store.COMPRESS, Field.Index.NO));
            doc.add(new Field("type", type, Field.Store.COMPRESS, Field.Index.NO));
            doc.add(new Field("courseState", courseState, Field.Store.COMPRESS, Field.Index.NO));

            indexWriter.addDocument(doc);
        }
        indexWriter.optimize();
        indexWriter.close();
        return true;
    } catch (Exception e) {
        logger.error("createCourseIndex error.");
        return false;
    }
}
 
源代码5 项目: aedict   文件: Main.java
private void indexWithLucene() throws IOException {
    System.out.println("Deleting old Lucene index");
    FileUtils.deleteDirectory(new File(LUCENE_INDEX));
    System.out.println("Indexing with Lucene");
    final BufferedReader dictionary = config.newReader();
    try {
        final Directory directory = FSDirectory.open(new File(LUCENE_INDEX));
        try {
            final IndexWriter luceneWriter = new IndexWriter(directory,
                    new StandardAnalyzer(LuceneSearch.LUCENE_VERSION), true,
                    IndexWriter.MaxFieldLength.UNLIMITED);
            try {
                final IDictParser parser = config.fileType.newParser(config);
                indexWithLucene(dictionary, luceneWriter, parser);
                System.out.println("Optimizing Lucene index");
                luceneWriter.optimize();
            } finally {
                luceneWriter.close();
            }
        } finally {
            closeQuietly(directory);
        }
    } finally {
        IOUtils.closeQuietly(dictionary);
    }
    System.out.println("Finished Lucene indexing");
}
 
源代码6 项目: gAnswer   文件: BuildIndexForEntityFragments.java
public void indexforentity() throws Exception
{
	if(EntityFragmentFields.entityId2Name == null)
		EntityFragmentFields.load();
	
	long startTime = new Date().getTime();
	
	//Try update KB index to DBpedia2015. by husen 2016-04-08
	//Try update KB index to DBpedia2016. by husen 2018-8-22
	File indexDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/entity_fragment_index");
	File sourceDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/fragments/entity_RDF_fragment/16entity_fragment.txt");
	
	Analyzer luceneAnalyzer_en = new StandardAnalyzer();  
	IndexWriter indexWriter_en = new IndexWriter(indexDir_en, luceneAnalyzer_en,true); 
	
	int mergeFactor = 100000;    //default 10
	int maxBufferedDoc = 1000;   //default 10
	int maxMergeDoc = Integer.MAX_VALUE;  //INF
	
	//indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor;
	indexWriter_en.setMergeFactor(mergeFactor);
	indexWriter_en.setMaxBufferedDocs(maxBufferedDoc);
	indexWriter_en.setMaxMergeDocs(maxMergeDoc);		
	
	
	FileInputStream file = new FileInputStream(sourceDir_en);		
	InputStreamReader in = new InputStreamReader(file,"UTF-8");	
	BufferedReader br = new BufferedReader(in);		
	
	int count = 0;
	while(true)
	{			
		String _line = br.readLine();
		{
			if(_line == null) break;
		}
		count++;
		if(count % 100000 == 0)
			System.out.println(count);				
		
		String line = _line;		
		String temp[] = line.split("\t");
		
		if(temp.length != 2)
			continue;
		else
		{
			int entity_id = Integer.parseInt(temp[0]);
			if(!EntityFragmentFields.entityId2Name.containsKey(entity_id))
				continue;
			
			String entity_name = EntityFragmentFields.entityId2Name.get(entity_id);
			String entity_fragment = temp[1];
			entity_name = entity_name.replace("____", " ");
			entity_name = entity_name.replace("__", " ");
			entity_name = entity_name.replace("_", " ");
		
				
			Document document = new Document(); 
			
			Field EntityName = new Field("EntityName", entity_name, Field.Store.YES,
					Field.Index.TOKENIZED,
					Field.TermVector.WITH_POSITIONS_OFFSETS);	
			Field EntityId = new Field("EntityId", String.valueOf(entity_id),
					Field.Store.YES, Field.Index.NO);
			Field EntityFragment = new Field("EntityFragment", entity_fragment,
					Field.Store.YES, Field.Index.NO);
			
			document.add(EntityName);
			document.add(EntityId);
			document.add(EntityFragment);
			indexWriter_en.addDocument(document);
		}			
	}
	
	indexWriter_en.optimize();
	indexWriter_en.close();
	br.close();

	// input the time of Build index
	long endTime = new Date().getTime();
	System.out.println("entity_name index has build ->" + count + " " + "Time:" + (endTime - startTime));
}
 
源代码7 项目: gAnswer   文件: BuildIndexForTypeShortName.java
public static void buildIndex(HashMap<String, ArrayList<Integer>> typeShortName2IdList) throws Exception
{
	long startTime = new Date().getTime();
	File indexDir_li = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/type_fragment_index");
	
	Analyzer luceneAnalyzer_li = new StandardAnalyzer();  
	IndexWriter indexWriter_li = new IndexWriter(indexDir_li, luceneAnalyzer_li,true); 
	
	int mergeFactor = 100000;
	int maxBufferedDoc = 1000;
	int maxMergeDoc = Integer.MAX_VALUE;
	
	//indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor;
	indexWriter_li.setMergeFactor(mergeFactor);
	indexWriter_li.setMaxBufferedDocs(maxBufferedDoc);
	indexWriter_li.setMaxMergeDocs(maxMergeDoc);
	
	int count = 0;
	Iterator<String> it = typeShortName2IdList.keySet().iterator();
	while (it.hasNext()) 
	{
		String sn = it.next();
		if (sn.length() == 0) {
			continue;
		}
		
		count ++;
	
		StringBuilder splittedSn = new StringBuilder("");
		
		if(sn.contains("_"))
		{
			String nsn = sn.replace("_", " ");
			splittedSn.append(nsn.toLowerCase());
		}
		else
		{
			int last = 0, i = 0;
			for(i = 0; i < sn.length(); i ++) 
			{
				// if it were not a small letter, then break it.
				if(!(sn.charAt(i)>='a' && sn.charAt(i)<='z')) 
				{
					splittedSn.append(sn.substring(last, i).toLowerCase());
					splittedSn.append(' ');
					last = i;
				}
			}
			splittedSn.append(sn.substring(last, i).toLowerCase());
			while(splittedSn.charAt(0) == ' ') {
				splittedSn.deleteCharAt(0);
			}
		}
		
		System.out.println("SplitttedType: "+splittedSn);
		
		Document document = new Document(); 

		Field SplittedTypeShortName = new Field("SplittedTypeShortName", splittedSn.toString(), 
				Field.Store.YES,
				Field.Index.TOKENIZED,
				Field.TermVector.WITH_POSITIONS_OFFSETS);			
		Field TypeShortName = new Field("TypeShortName", sn,
				Field.Store.YES, Field.Index.NO);
		
		document.add(SplittedTypeShortName);
		document.add(TypeShortName);
		indexWriter_li.addDocument(document);	
	}
			
	indexWriter_li.optimize();
	indexWriter_li.close();

	// input the time of Build index
	long endTime = new Date().getTime();
	System.out.println("TypeShortName index has build ->" + count + " " + "Time:" + (endTime - startTime));
}
 
源代码8 项目: tagme   文件: AnchorIndexer.java
@Override
	public void makeIndex(String lang, File workingDir) throws IOException
	{
		log.info("Loading support datasets...");
		
		File all_anchors = new WikipediaAnchorParser(lang).getFile();
		long numAnchors = ExternalSortUtils.wcl(all_anchors);
		AnchorIterator iterator = new AnchorIterator(all_anchors);
		
		IntSet people = new PeopleWIDs(lang).getDataset();
		
//		IndexSearcher articles = Indexes.getSearcher(RepositoryDirs.WIKIPEDIA.getPath(lang));
		IndexSearcher articles = openWikipediaIndex(lang);
		//QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new WhitespaceAnalyzer(Version.LUCENE_34));
		QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new StandardAnalyzer(Version.LUCENE_34, new HashSet<String>()));
		
		IndexWriter index = new IndexWriter(FSDirectory.open(workingDir.getAbsoluteFile()), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer()));
		Document doc = new Document();
		Field fId = new Field(FIELD_ID, "", Store.YES, Index.NOT_ANALYZED);
		Field fText = new Field(FIELD_TEXT, "", Store.YES, Index.NOT_ANALYZED);
		Field fObject = new Field(FIELD_OBJECT, "", Store.YES, Index.NO);
		
		doc.add(fId);
		doc.add(fText);
		doc.add(fObject);
		
//		Field fOriginal = new Field(FIELD_ORIGINAL, "", Store.YES, Index.ANALYZED);
//		Field fWID = new Field(FIELD_WID, "", Store.NO, Index.ANALYZED);
		
		PLogger plog = new PLogger(log, Step.TEN_MINUTES, "lines", "anchors", "searches", "indexed", "0-freq","dropped");
		plog.setEnd(0, numAnchors);
		plog.start("Support datasets loaded, now parsing...");
		int id=0;
		while(iterator.next())
		{
			plog.update(0, iterator.scroll);
			plog.update(1);
			String anchorText = iterator.anchor;
			
			int freq = freq(iterator.originals, articles, queryParser);
			plog.update(2, iterator.originals.size());
			if (freq == 0) plog.update(4);
			
			Anchor anchorObj = Anchor.build(id, iterator.links, freq, people);
			if (anchorObj == null){
				plog.update(5);
				continue;
			}
			
			String anchorSerial = Anchor.serialize(anchorObj);
			fId.setValue(Integer.toString(++id));
			fText.setValue(anchorText);
			fObject.setValue(anchorSerial);
			
			for(int page : anchorObj){
				Field fWID = new Field(FIELD_WID, Integer.toString(page), Store.YES, Index.NOT_ANALYZED);
//				fWID.setBoost(iterator.links.get(page));
				doc.add(fWID);
			}
			for(String original : iterator.originals) {
				doc.add(new Field(FIELD_ORIGINAL, original, Store.YES, Index.NOT_ANALYZED));
			}
			
			index.addDocument(doc);
			plog.update(3);
			
			doc.removeFields(FIELD_ORIGINAL);
			doc.removeFields(FIELD_WID);
		}
		plog.stop();
		iterator.close();
		
		log.info("Now optimizing...");
		index.optimize();
		
		index.close();
		log.info("Done.");
	}
 
源代码9 项目: tagme   文件: TopicIndexer.java
@Override
	public void makeIndex(String lang, File workingDir) throws IOException
	{
		
		IndexReader articles = Indexes.getReader(RepositoryDirs.WIKIPEDIA.getPath(lang));
		Int2ObjectMap<String> bestAnchorMap = new BestAnchors(lang).getDataset();
		
		IndexWriter index = new IndexWriter(new SimpleFSDirectory(workingDir), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer()));
		Document doc = new Document();
		Field fWID = new Field(FIELD_WID, "", Store.YES, Index.NOT_ANALYZED);
		Field fTitle = new Field(FIELD_TITLE, "", Store.YES, Index.NOT_ANALYZED);
		Field fAbstract = new Field(FIELD_ABSTRACT, "", Store.YES, Index.NO);
		Field fBestAnchor = new Field(FIELD_BEST_ANCHOR, "", Store.YES, Index.NO);
		doc.add(fWID);
		doc.add(fTitle);
		doc.add(fAbstract);
		doc.add(fBestAnchor);
				
		
		int max = articles.maxDoc();
		PLogger plog = new PLogger(log, Step.TEN_MINUTES, "pages", "indexed", "noBest");
		plog.setEnd(max);
		plog.start("Start indexing...");
		
		for(int i=0; i<max; i++)
		{
			plog.update(0);
			Document oldDoc = articles.document(i);
			PageType type = PageType.valueOf(oldDoc.get(WikipediaIndexer.FIELD_TYPE));
			if (type == PageType.TOPIC)
			{
				int wid = Integer.parseInt(oldDoc.get(WikipediaIndexer.FIELD_WID));
				fWID.setValue(oldDoc.get(WikipediaIndexer.FIELD_WID));
				fAbstract.setValue(oldDoc.get(WikipediaIndexer.FIELD_ABSTRACT));
				fTitle.setValue(oldDoc.get(WikipediaIndexer.FIELD_TITLE));
				
				String bestAnchor = bestAnchorMap.get(wid);
				if (bestAnchor == null || bestAnchor.length() == 0) plog.update(2);
				fBestAnchor.setValue(bestAnchor==null?"":bestAnchor);
				
				String[] cats = oldDoc.getValues(WikipediaIndexer.FIELD_CAT);
				if (cats != null) {
					for (int j=0; j<cats.length; j++)
						doc.add(new Field(FIELD_CAT, cats[j], Store.YES, Index.NOT_ANALYZED));
				}
				
				index.addDocument(doc);
				plog.update(1);
				
				doc.removeFields(FIELD_CAT);
			}
		}
		
		plog.stop();
		
		log.info("Now optimizing...");
		index.optimize();
		
		index.close();
		
		//we cannot call this because the index is still in the temporary dir
		//so TopicDocs will be created using old index
//		log.info("Index Done, now creating WID->DOC_ID map");
//		
//		TopicDocs td = new TopicDocs(lang);
//		td.forceParsing();
		
		log.info("Done.");
	}
 
源代码10 项目: olat   文件: SearchSpellChecker.java
/**
 * Creates a new spell-check index based on search-index
 */
public void createSpellIndex() {
    if (isSpellCheckEnabled) {
        IndexReader indexReader = null;
        try {
            log.info("Start generating Spell-Index...");
            long startSpellIndexTime = 0;
            if (log.isDebugEnabled()) {
                startSpellIndexTime = System.currentTimeMillis();
            }
            final Directory indexDir = FSDirectory.open(new File(indexPath));
            indexReader = IndexReader.open(indexDir);
            // 1. Create content spellIndex
            final File spellDictionaryFile = new File(spellDictionaryPath);
            final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true
            final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
            final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
            contentSpellChecker.indexDictionary(contentDictionary);
            // 2. Create title spellIndex
            final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true
            final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
            final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
            titleSpellChecker.indexDictionary(titleDictionary);
            // 3. Create description spellIndex
            final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true
            final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
            final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            descriptionSpellChecker.indexDictionary(descriptionDictionary);
            // 4. Create author spellIndex
            final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true
            final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
            final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
            authorSpellChecker.indexDictionary(authorDictionary);

            // Merge all part spell indexes (content,title etc.) to one common spell index
            final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true
            final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
            merger.addIndexesNoOptimize(directories);
            merger.optimize();
            merger.close();
            spellChecker = new SpellChecker(spellIndexDirectory);
            spellChecker.setAccuracy(0.7f);
            if (log.isDebugEnabled()) {
                log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
            }
            log.info("New generated Spell-Index ready to use.");
        } catch (final IOException ioEx) {
            log.warn("Can not create SpellIndex", ioEx);
        } finally {
            if (indexReader != null) {
                try {
                    indexReader.close();
                } catch (final IOException e) {
                    log.warn("Can not close indexReader properly", e);
                }
            }
        }
    }
}
 
源代码11 项目: olat   文件: OlatFullIndexer.java
/**
 * Create index-writer object. In multi-threaded mode ctreates an array of index-workers. Start indexing with main-index as root object. Index recursive all elements.
 * At the end optimze and close new index. The new index is stored in [temporary-index-path]/main
 * 
 * @throws InterruptedException
 */
private void doIndex() throws InterruptedException {
    try {
        final File tempIndexDir = new File(tempIndexPath);
        final Directory indexPath = FSDirectory.open(new File(tempIndexDir, "main"));
        final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
        indexWriter = new IndexWriter(indexPath, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
        indexWriter.deleteAll();
        indexWriter.setMergeFactor(INDEX_MERGE_FACTOR); // for better performance
        indexWriter.setRAMBufferSizeMB(ramBufferSizeMB);// for better performance set to 48MB (see lucene docu 'how to make indexing faster")
        log.info("IndexWriter config RAMBufferSizeMB=" + indexWriter.getRAMBufferSizeMB());
        indexWriter.setUseCompoundFile(useCompoundFile); // for better performance (see lucene docu 'how to make indexing faster")
        log.info("IndexWriter config UseCompoundFile=" + indexWriter.getUseCompoundFile());
        // Create IndexWriterWorker
        log.info("Running with " + numberIndexWriter + " IndexerWriterWorker");
        indexWriterWorkers = new IndexWriterWorker[numberIndexWriter];
        final Directory[] partIndexDirs = new Directory[numberIndexWriter];
        for (int i = 0; i < numberIndexWriter; i++) {
            final IndexWriterWorker indexWriterWorker = new IndexWriterWorker(i, tempIndexDir, this);
            indexWriterWorkers[i] = indexWriterWorker;
            indexWriterWorkers[i].start();
            partIndexDirs[i] = indexWriterWorkers[i].getIndexDir();
        }

        final SearchResourceContext searchResourceContext = new SearchResourceContext();
        log.info("doIndex start. OlatFullIndexer with Debug output");
        mainIndexer.doIndex(searchResourceContext, null /* no parent */, this);

        log.info("Wait until every folder indexer is finished");

        DBFactory.getInstance().commitAndCloseSession();
        // check if every folder indexer is finished max waiting-time 10Min (=waitingCount-limit = 60)
        int waitingCount = 0;
        final int MAX_WAITING_COUNT = 60;// = 10Min
        while (FolderIndexerWorkerPool.getInstance().isIndexerRunning() && (waitingCount++ < MAX_WAITING_COUNT)) {
            Thread.sleep(10000);
        }
        if (waitingCount >= MAX_WAITING_COUNT) {
            log.info("Finished with max waiting time!");
        }
        log.info("Set Finish-flag for each indexWriterWorkers");
        // Set Finish-flag
        for (int i = 0; i < numberIndexWriter; i++) {
            indexWriterWorkers[i].finishIndexing();
        }

        log.info("Wait until every indexworker is finished");
        // check if every indexworker is finished max waiting-time 10Min (=waitingCount-limit = 60)
        waitingCount = 0;
        while (!areIndexingDone() && (waitingCount++ < MAX_WAITING_COUNT)) {
            Thread.sleep(10000);
        }
        if (waitingCount >= MAX_WAITING_COUNT) {
            log.info("Finished with max waiting time!");
        }

        // Merge all partIndex
        DBFactory.getInstance().commitAndCloseSession();
        if (partIndexDirs.length > 0) {
            log.info("Start merging part Indexes");
            indexWriter.addIndexesNoOptimize(partIndexDirs);
            log.info("Added all part Indexes");
        }
        fullIndexerStatus.setIndexSize(indexWriter.maxDoc());
        indexWriter.optimize();
        indexWriter.close();
    } catch (final IOException e) {
        e.printStackTrace();
        log.warn("Can not create IndexWriter, indexname=" + tempIndexPath, e);
    } finally {
        DBFactory.getInstance().commitAndCloseSession();
        log.debug("doIndex: commit & close session");
    }
}
 
源代码12 项目: olat   文件: SearchSpellChecker.java
/**
 * Creates a new spell-check index based on search-index
 */
public static void createSpellIndex(final SearchModule searchModule) {
    final String tempSearchIndexPath = searchModule.getTempSearchIndexPath();
    final String tempSpellCheckIndexPath = searchModule.getTempSpellCheckerIndexPath();

    IndexReader indexReader = null;
    try {
        log.info("Start generating spell check index ...");

        long startSpellIndexTime = 0;
        if (log.isDebugEnabled()) {
            startSpellIndexTime = System.currentTimeMillis();
        }
        final Directory indexDir = FSDirectory.open(new File(tempSearchIndexPath, "main"));
        indexReader = IndexReader.open(indexDir);

        // 1. Create content spellIndex
        log.info("Generating 'content' spell check index ...");
        final File contentSpellIndexPath = new File(tempSpellCheckIndexPath + CONTENT_PATH);
        FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
        final Directory contentSpellIndexDirectory = FSDirectory.open(contentSpellIndexPath);
        final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
        final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
        contentSpellChecker.indexDictionary(contentDictionary);

        // 2. Create title spellIndex
        log.info("Generating 'title' spell check index ...");
        final File titleSpellIndexPath = new File(tempSpellCheckIndexPath + TITLE_PATH);
        FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
        final Directory titleSpellIndexDirectory = FSDirectory.open(titleSpellIndexPath);
        final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
        final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
        titleSpellChecker.indexDictionary(titleDictionary);

        // 3. Create description spellIndex
        log.info("Generating 'description' spell check index ...");
        final File descriptionSpellIndexPath = new File(tempSpellCheckIndexPath + DESCRIPTION_PATH);
        FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
        final Directory descriptionSpellIndexDirectory = FSDirectory.open(descriptionSpellIndexPath);
        final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
        final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
        descriptionSpellChecker.indexDictionary(descriptionDictionary);

        // 4. Create author spellIndex
        log.info("Generating 'author' spell check index ...");
        final File authorSpellIndexPath = new File(tempSpellCheckIndexPath + AUTHOR_PATH);
        FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);
        final Directory authorSpellIndexDirectory = FSDirectory.open(authorSpellIndexPath);
        final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
        final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
        authorSpellChecker.indexDictionary(authorDictionary);

        log.info("Merging spell check indices ...");
        // Merge all part spell indexes (content,title etc.) to one common spell index
        final File tempSpellCheckIndexDir = new File(tempSpellCheckIndexPath);
        FileUtils.deleteDirsAndFiles(tempSpellCheckIndexDir, true, true);
        final Directory tempSpellIndexDirectory = FSDirectory.open(tempSpellCheckIndexDir);
        final IndexWriter merger = new IndexWriter(tempSpellIndexDirectory, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
        final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
        merger.addIndexesNoOptimize(directories);

        log.info("Optimizing spell check index ...");
        merger.optimize();
        merger.close();

        tempSpellIndexDirectory.close();

        contentSpellChecker.close();
        contentSpellIndexDirectory.close();

        titleSpellChecker.close();
        titleSpellIndexDirectory.close();

        descriptionSpellChecker.close();
        descriptionSpellIndexDirectory.close();

        authorSpellChecker.close();
        authorSpellIndexDirectory.close();

        FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);

        if (log.isDebugEnabled()) {
            log.debug("Spell check index created in " + (System.currentTimeMillis() - startSpellIndexTime) + " ms.");
        }
    } catch (final IOException ioEx) {
        log.warn("Can not create spell check index.", ioEx);
    } finally {
        if (indexReader != null) {
            try {
                indexReader.close();
            } catch (final IOException e) {
                log.warn("Can not close indexReader properly", e);
            }
        }
    }
}
 
源代码13 项目: SEAL   文件: OfflineSearchIndexer.java
public static void main(String[] argv) {

        try {
            GlobalVar gv = GlobalVar.getGlobalVar();

            // get args
            File indexDir = gv.getIndexDir();
            File localDir = gv.getLocalDir();
            File root = gv.getLocalRoot();
            boolean hasWrappers = false;
            String usage = OfflineSearchIndexer.class.getName() + " [-wrappers]";
            for (int i = 0; i < argv.length; i++) {
                if (argv[i].equals("-wrappers")) { // parse -wrappers option
                    log.info("wrappers set true");
                    hasWrappers = true;
                } else {
                    log.error("Incorrect arguments in the command line");
                    System.err.println(usage);
                    System.err.println(" -wrappers means the directory contains wrappers saved in earlier run of seal");
                    return;
                }
            }

            // check args
            if (root!=null && !System.getenv("PWD").equals(root.getPath())) {
                log.error("to build an index relative to "+root+" run OfflineSearchIndexer from that directory, and make localDir a relative path");
                System.exit(-1);
            }
            if (root==null && !localDir.isAbsolute()) {
                log.warn("to build an absolute index make localDir an absolute path - this index will be relative to "+System.getenv("PWD"));
            }
            if (indexDir.exists()) {
                log.error("Cannot save index to '" +indexDir+ "' directory, please delete it first");
                System.exit(-1);
            }
            if (!localDir.exists() || !localDir.canRead()) {
                System.out.println("Document directory '" +localDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
                System.exit(-1);
            }
            Date start = new Date();
            IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED);
            System.out.println("Indexing to directory '" +indexDir+ "'...");
            indexDocs(writer, localDir, hasWrappers);
            System.out.println("Optimizing...");
            writer.optimize();
            writer.close();

            Date end = new Date();
            log.info("indexed "+numIndexed+" of "+numFiles+" files");
            log.info((end.getTime() - start.getTime())+" total milliseconds");

        } catch (Exception e) {
            log.error(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
            e.printStackTrace();
        }
    }