下面列出了怎么用org.apache.lucene.index.SegmentInfos的API类实例代码及写法,或者点击链接到github查看源代码。
private static LocalCheckpointTracker createLocalCheckpointTracker(EngineConfig engineConfig, SegmentInfos lastCommittedSegmentInfos,
Logger logger, Supplier<Searcher> searcherSupplier, BiFunction<Long, Long, LocalCheckpointTracker> localCheckpointTrackerSupplier) {
try {
final SequenceNumbers.CommitInfo seqNoStats =
SequenceNumbers.loadSeqNoInfoFromLuceneCommit(lastCommittedSegmentInfos.userData.entrySet());
final long maxSeqNo = seqNoStats.maxSeqNo;
final long localCheckpoint = seqNoStats.localCheckpoint;
logger.trace("recovered maximum sequence number [{}] and local checkpoint [{}]", maxSeqNo, localCheckpoint);
final LocalCheckpointTracker tracker = localCheckpointTrackerSupplier.apply(maxSeqNo, localCheckpoint);
// Operations that are optimized using max_seq_no_of_updates optimization must not be processed twice; otherwise, they will
// create duplicates in Lucene. To avoid this we check the LocalCheckpointTracker to see if an operation was already processed.
// Thus, we need to restore the LocalCheckpointTracker bit by bit to ensure the consistency between LocalCheckpointTracker and
// Lucene index. This is not the only solution since we can bootstrap max_seq_no_of_updates with max_seq_no of the commit to
// disable the MSU optimization during recovery. Here we prefer to maintain the consistency of LocalCheckpointTracker.
if (localCheckpoint < maxSeqNo && engineConfig.getIndexSettings().isSoftDeleteEnabled()) {
try (Searcher searcher = searcherSupplier.get()) {
Lucene.scanSeqNosInReader(searcher.getDirectoryReader(), localCheckpoint + 1, maxSeqNo,
tracker::markSeqNoAsCompleted);
}
}
return tracker;
} catch (IOException ex) {
throw new EngineCreationFailureException(engineConfig.getShardId(), "failed to create local checkpoint tracker", ex);
}
}
/**
* Read the last segments info from the commit pointed to by the searcher manager
*/
protected static SegmentInfos readLastCommittedSegmentInfos(final SearcherManager sm, final Store store) throws IOException {
IndexSearcher searcher = sm.acquire();
try {
IndexCommit latestCommit = ((DirectoryReader) searcher.getIndexReader()).getIndexCommit();
return Lucene.readSegmentInfos(latestCommit);
} catch (IOException e) {
// Fall back to reading from the store if reading from the commit fails
try {
return store. readLastCommittedSegmentsInfo();
} catch (IOException e2) {
e2.addSuppressed(e);
throw e2;
}
} finally {
sm.release(searcher);
}
}
@Override
public List<Segment> getSegments(long commitGen) throws LukeException {
try {
SegmentInfos infos = findSegmentInfos(commitGen);
if (infos == null) {
return Collections.emptyList();
}
return infos.asList().stream()
.map(Segment::of)
.sorted(Comparator.comparing(Segment::getName))
.collect(Collectors.toList());
} catch (IOException e) {
throw new LukeException(String.format(Locale.ENGLISH, "Failed to load segment infos for commit generation %d", commitGen), e);
}
}
@Override
public Map<String, String> getSegmentAttributes(long commitGen, String name) throws LukeException {
try {
SegmentInfos infos = findSegmentInfos(commitGen);
if (infos == null) {
return Collections.emptyMap();
}
return infos.asList().stream()
.filter(seg -> seg.info.name.equals(name))
.findAny()
.map(seg -> seg.info.getAttributes())
.orElse(Collections.emptyMap());
} catch (IOException e) {
throw new LukeException(String.format(Locale.ENGLISH, "Failed to load segment infos for commit generation %d", commitGen), e);
}
}
@Override
public Map<String, String> getSegmentDiagnostics(long commitGen, String name) throws LukeException {
try {
SegmentInfos infos = findSegmentInfos(commitGen);
if (infos == null) {
return Collections.emptyMap();
}
return infos.asList().stream()
.filter(seg -> seg.info.name.equals(name))
.findAny()
.map(seg -> seg.info.getDiagnostics())
.orElse(Collections.emptyMap());
} catch (IOException e) {
throw new LukeException(String.format(Locale.ENGLISH, "Failed to load segment infos for commit generation %d", commitGen), e);
}
}
@Override
public Optional<Codec> getSegmentCodec(long commitGen, String name) throws LukeException {
try {
SegmentInfos infos = findSegmentInfos(commitGen);
if (infos == null) {
return Optional.empty();
}
return infos.asList().stream()
.filter(seg -> seg.info.name.equals(name))
.findAny()
.map(seg -> seg.info.getCodec());
} catch (IOException e) {
throw new LukeException(String.format(Locale.ENGLISH, "Failed to load segment infos for commit generation %d", commitGen), e);
}
}
@Test
public void testCommitNoEmptyCommits() throws Exception {
// LUCENE-4972: DTW used to create empty commits even if no changes were made
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
taxoWriter.addCategory(new FacetLabel("a"));
taxoWriter.commit();
long gen1 = SegmentInfos.getLastCommitGeneration(dir);
taxoWriter.commit();
long gen2 = SegmentInfos.getLastCommitGeneration(dir);
assertEquals("empty commit should not have changed the index", gen1, gen2);
taxoWriter.close();
dir.close();
}
@Test
public void testCloseNoEmptyCommits() throws Exception {
// LUCENE-4972: DTW used to create empty commits even if no changes were made
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
taxoWriter.addCategory(new FacetLabel("a"));
taxoWriter.commit();
long gen1 = SegmentInfos.getLastCommitGeneration(dir);
taxoWriter.close();
long gen2 = SegmentInfos.getLastCommitGeneration(dir);
assertEquals("empty commit should not have changed the index", gen1, gen2);
taxoWriter.close();
dir.close();
}
@Test
public void testPrepareCommitNoEmptyCommits() throws Exception {
// LUCENE-4972: DTW used to create empty commits even if no changes were made
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
taxoWriter.addCategory(new FacetLabel("a"));
taxoWriter.prepareCommit();
taxoWriter.commit();
long gen1 = SegmentInfos.getLastCommitGeneration(dir);
taxoWriter.prepareCommit();
taxoWriter.commit();
long gen2 = SegmentInfos.getLastCommitGeneration(dir);
assertEquals("empty commit should not have changed the index", gen1, gen2);
taxoWriter.close();
dir.close();
}
public void testExceptionDuringRefresh() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriter w = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
w.commit();
tw.commit();
SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(indexDir, taxoDir, null);
tw.addCategory(new FacetLabel("a", "b"));
w.addDocument(new Document());
tw.commit();
w.commit();
// intentionally corrupt the taxo index:
SegmentInfos infos = SegmentInfos.readLatestCommit(taxoDir);
taxoDir.deleteFile(infos.getSegmentsFileName());
expectThrows(IndexNotFoundException.class, mgr::maybeRefreshBlocking);
IOUtils.close(w, tw, mgr, indexDir, taxoDir);
}
private SimpleOrderedMap<Object> getMergeInformation(SolrQueryRequest req, SegmentInfos infos, List<String> mergeCandidates) throws IOException {
SimpleOrderedMap<Object> result = new SimpleOrderedMap<>();
RefCounted<IndexWriter> refCounted = req.getCore().getSolrCoreState().getIndexWriter(req.getCore());
try {
IndexWriter indexWriter = refCounted.get();
if (indexWriter instanceof SolrIndexWriter) {
result.addAll(((SolrIndexWriter)indexWriter).getRunningMerges());
}
//get chosen merge policy
MergePolicy mp = indexWriter.getConfig().getMergePolicy();
//Find merges
MergeSpecification findMerges = mp.findMerges(MergeTrigger.EXPLICIT, infos, indexWriter);
if (findMerges != null && findMerges.merges != null && findMerges.merges.size() > 0) {
for (OneMerge merge : findMerges.merges) {
//TODO: add merge grouping
for (SegmentCommitInfo mergeSegmentInfo : merge.segments) {
mergeCandidates.add(mergeSegmentInfo.info.name);
}
}
}
return result;
} finally {
refCounted.decref();
}
}
@Test
public void testSegmentNames() throws IOException {
String[] segmentNamePatterns = new String[NUM_SEGMENTS];
h.getCore().withSearcher((searcher) -> {
int i = 0;
for (SegmentCommitInfo sInfo : SegmentInfos.readLatestCommit(searcher.getIndexReader().directory())) {
assertTrue("Unexpected number of segment in the index: " + i, i < NUM_SEGMENTS);
segmentNamePatterns[i] = "//lst[@name='segments']/lst/str[@name='name'][.='" + sInfo.info.name + "']";
i++;
}
return null;
});
assertQ("Unexpected segment names returned",
req("qt","/admin/segments"),
segmentNamePatterns);
}
@Test
public void testFieldInfo() throws Exception {
String[] segmentNamePatterns = new String[NUM_SEGMENTS];
h.getCore().withSearcher((searcher) -> {
int i = 0;
for (SegmentCommitInfo sInfo : SegmentInfos.readLatestCommit(searcher.getIndexReader().directory())) {
assertTrue("Unexpected number of segment in the index: " + i, i < NUM_SEGMENTS);
segmentNamePatterns[i] = "boolean(//lst[@name='segments']/lst[@name='" + sInfo.info.name + "']/lst[@name='fields']/lst[@name='id']/str[@name='flags'])";
i++;
}
return null;
});
assertQ("Unexpected field infos returned",
req("qt","/admin/segments", "fieldInfo", "true"),
segmentNamePatterns);
}
public CommitStats(SegmentInfos segmentInfos) {
// clone the map to protect against concurrent changes
userData = MapBuilder.<String, String>newMapBuilder().putAll(segmentInfos.getUserData()).immutableMap();
// lucene calls the current generation, last generation.
generation = segmentInfos.getLastGeneration();
if (segmentInfos.getId() != null) { // id is only written starting with Lucene 5.0
id = Base64.encodeBytes(segmentInfos.getId());
}
numDocs = Lucene.getNumDocs(segmentInfos);
}
public void testCommitData() throws Exception {
PerfRunData runData = createPerfRunData();
new CreateIndexTask(runData).doLogic();
CommitIndexTask task = new CommitIndexTask(runData);
task.setParams("params");
task.doLogic();
SegmentInfos infos = SegmentInfos.readLatestCommit(runData.getDirectory());
assertEquals("params", infos.getUserData().get(OpenReaderTask.USER_DATA));
new CloseIndexTask(runData).doLogic();
}
/**
* Test that we can call forceMerge(maxNumSegments).
*/
public void testForceMerge() throws Exception {
// 1. alg definition (required in every "logic" test)
String algLines[] = {
"# ----- properties ",
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + getReuters20LinesFile(),
"content.source.log.step=3",
"ram.flush.mb=-1",
"max.buffered=3",
"doc.term.vector=false",
"content.source.forever=false",
"directory=ByteBuffersDirectory",
"merge.policy=org.apache.lucene.index.LogDocMergePolicy",
"doc.stored=false",
"doc.tokenized=false",
"debug.level=1",
"# ----- alg ",
"{ \"Rounds\"",
" ResetSystemErase",
" CreateIndex",
" { \"AddDocs\" AddDoc > : * ",
" ForceMerge(3)",
" CloseIndex()",
"} : 2",
};
// 2. execute the algorithm (required in every "logic" test)
Benchmark benchmark = execBenchmark(algLines);
// 3. test number of docs in the index
IndexReader ir = DirectoryReader.open(benchmark.getRunData().getDirectory());
int ndocsExpected = 20; // first 20 reuters docs.
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
ir.close();
// Make sure we have 3 segments:
SegmentInfos infos = SegmentInfos.readLatestCommit(benchmark.getRunData().getDirectory());
assertEquals(3, infos.size());
}
public CopyState(Map<String,FileMetaData> files, long version, long gen, byte[] infosBytes,
Set<String> completedMergeFiles, long primaryGen, SegmentInfos infos) {
assert completedMergeFiles != null;
this.files = Collections.unmodifiableMap(files);
this.version = version;
this.gen = gen;
this.infosBytes = infosBytes;
this.completedMergeFiles = Collections.unmodifiableSet(completedMergeFiles);
this.primaryGen = primaryGen;
this.infos = infos;
}
/** Switch to new segments, refreshing if necessary. Note that it's the caller job to ensure there's a held refCount for the
* incoming infos, so all files exist. */
public void setCurrentInfos(SegmentInfos infos) throws IOException {
if (currentInfos != null) {
// So that if we commit, we will go to the next
// (unwritten so far) generation:
infos.updateGeneration(currentInfos);
node.message("mgr.setCurrentInfos: carry over infos gen=" + infos.getSegmentsFileName());
}
currentInfos = infos;
maybeRefresh();
}
private SegmentInfos findSegmentInfos(long commitGen) throws LukeException, IOException {
IndexCommit ic = getCommitMap().get(commitGen);
if (ic == null) {
return null;
}
String segmentFile = ic.getSegmentsFileName();
return SegmentInfos.readCommit(dir, segmentFile);
}
private boolean hasUnusedFiles(Directory indexDir, IndexCommit commit) throws IOException {
String segmentsFileName = commit.getSegmentsFileName();
SegmentInfos infos = SegmentInfos.readCommit(indexDir, segmentsFileName);
Set<String> currentFiles = new HashSet<>(infos.files(true));
String[] allFiles = indexDir.listAll();
for (String file : allFiles) {
if (!file.equals(segmentsFileName) && !currentFiles.contains(file) && !file.endsWith(".lock")) {
log.info("Found unused file: {}", file);
return true;
}
}
return false;
}
@BeforeClass
public static void beforeClass() throws Exception {
// we need a consistent segmentation to ensure we don't get a random
// merge that reduces the total num docs in all segments, or the number of deletes
//
systemSetPropertySolrTestsMergePolicyFactory(NoMergePolicyFactory.class.getName());
// Also prevent flushes
System.setProperty("solr.tests.maxBufferedDocs", "1000");
System.setProperty("solr.tests.ramBufferSizeMB", "5000");
System.setProperty("enable.update.log", "false"); // no _version_ in our schema
initCore("solrconfig.xml", "schema12.xml"); // segments API shouldn't depend on _version_ or ulog
// build up an index with at least 2 segments and some deletes
for (int i = 0; i < DOC_COUNT; i++) {
assertU(adoc("id","SOLR100" + i, "name","Apache Solr:" + i));
}
for (int i = 0; i < DEL_COUNT; i++) {
assertU(delI("SOLR100" + i));
}
assertU(commit());
for (int i = 0; i < DOC_COUNT; i++) {
assertU(adoc("id","SOLR200" + i, "name","Apache Solr:" + i));
}
assertU(commit());
h.getCore().withSearcher((searcher) -> {
int numSegments = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory()).size();
// if this is not NUM_SEGMENTS, there was some unexpected flush or merge
assertEquals("Unexpected number of segment in the index: " + numSegments,
NUM_SEGMENTS, numSegments);
return null;
});
// see SOLR-14431
RefCounted<IndexWriter> iwRef = h.getCore().getSolrCoreState().getIndexWriter(h.getCore());
initialRefCount = iwRef.getRefcount();
iwRef.decref();
}
public void test() throws Exception {
SolrConfig config = h.getCore().getSolrConfig();
String codecFactory = config.get("codecFactory/@class");
assertEquals("Unexpected solrconfig codec factory", "solr.SimpleTextCodecFactory", codecFactory);
assertEquals("Unexpected core codec", "SimpleText", h.getCore().getCodec().getName());
RefCounted<IndexWriter> writerRef = h.getCore().getSolrCoreState().getIndexWriter(h.getCore());
try {
IndexWriter writer = writerRef.get();
assertEquals("Unexpected codec in IndexWriter config",
"SimpleText", writer.getConfig().getCodec().getName());
} finally {
writerRef.decref();
}
assertU(add(doc("id","1", "text","textual content goes here")));
assertU(commit());
h.getCore().withSearcher(searcher -> {
SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
SegmentInfo info = infos.info(infos.size() - 1).info;
assertEquals("Unexpected segment codec", "SimpleText", info.getCodec().getName());
return null;
});
assertQ(req("q", "id:1"),
"*[count(//doc)=1]");
}
protected void assertCompressionMode(String expectedModeString, SolrCore core) throws IOException {
h.getCore().withSearcher(searcher -> {
SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
SegmentInfo info = infos.info(infos.size() - 1).info;
assertEquals("Expecting compression mode string to be " + expectedModeString +
" but got: " + info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY) +
"\n SegmentInfo: " + info +
"\n SegmentInfos: " + infos +
"\n Codec: " + core.getCodec(),
expectedModeString, info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY));
return null;
});
}
private Collection<SegmentKey> getSegmentKeys() throws IOException {
List<SegmentKey> keys = new ArrayList<SegmentKey>();
SegmentInfos segmentInfos = new SegmentInfos();
segmentInfos.read(_directory, _indexCommit.getSegmentsFileName());
for (SegmentInfoPerCommit segmentInfoPerCommit : segmentInfos) {
String name = segmentInfoPerCommit.info.name;
String id = getId(segmentInfoPerCommit.info);
keys.add(new SegmentKey(name, id));
}
return keys;
}
public CommitStats(SegmentInfos segmentInfos) {
// clone the map to protect against concurrent changes
userData = Map.copyOf(segmentInfos.getUserData());
// lucene calls the current generation, last generation.
generation = segmentInfos.getLastGeneration();
id = Base64.getEncoder().encodeToString(segmentInfos.getId());
numDocs = Lucene.getNumDocs(segmentInfos);
}
private void addRecoveredFileDetails(SegmentInfos si, Store store, RecoveryState.Index index) throws IOException {
final Directory directory = store.directory();
for (String name : Lucene.files(si)) {
long length = directory.fileLength(name);
index.addFileDetail(name, length, true);
}
}
/**
* Returns the last committed segments info for this store
*
* @throws IOException if the index is corrupted or the segments file is not present
*/
public SegmentInfos readLastCommittedSegmentsInfo() throws IOException {
failIfCorrupted();
try {
return readSegmentsInfo(null, directory());
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
markStoreCorrupted(ex);
throw ex;
}
}
/**
* Returns the segments info for the given commit or for the latest commit if the given commit is <code>null</code>
*
* @throws IOException if the index is corrupted or the segments file is not present
*/
private static SegmentInfos readSegmentsInfo(IndexCommit commit, Directory directory) throws IOException {
assert commit == null || commit.getDirectory() == directory;
try {
return commit == null ? Lucene.readSegmentInfos(directory) : Lucene.readSegmentInfos(commit);
} catch (EOFException eof) {
// TODO this should be caught by lucene - EOF is almost certainly an index corruption
throw new CorruptIndexException("Read past EOF while reading segment infos", "commit(" + commit + ")", eof);
} catch (IOException exception) {
throw exception; // IOExceptions like too many open files are not necessarily a corruption - just bubble it up
} catch (Exception ex) {
throw new CorruptIndexException("Hit unexpected exception while reading segment infos", "commit(" + commit + ")", ex);
}
}
/**
* Tries to open an index for the given location. This includes reading the
* segment infos and possible corruption markers. If the index can not
* be opened, an exception is thrown
*/
public static void tryOpenIndex(Path indexLocation, ShardId shardId, NodeEnvironment.ShardLocker shardLocker, Logger logger) throws IOException, ShardLockObtainFailedException {
try (ShardLock lock = shardLocker.lock(shardId, "open index", TimeUnit.SECONDS.toMillis(5));
Directory dir = new SimpleFSDirectory(indexLocation)) {
failIfCorrupted(dir, shardId);
SegmentInfos segInfo = Lucene.readSegmentInfos(dir);
logger.trace("{} loaded segment info [{}]", shardId, segInfo);
}
}
/**
* Returns an iterable that allows to iterate over all files in this segments info
*/
public static Iterable<String> files(SegmentInfos infos) throws IOException {
final List<Collection<String>> list = new ArrayList<>();
list.add(Collections.singleton(infos.getSegmentsFileName()));
for (SegmentCommitInfo info : infos) {
list.add(info.files());
}
return Iterables.flatten(list);
}