下面列出了怎么用org.apache.lucene.index.SegmentReader的API类实例代码及写法,或者点击链接到github查看源代码。
/**
* Global stats on segments.
*/
public final SegmentsStats segmentsStats() {
ensureOpen();
try (final Searcher searcher = acquireSearcher("segments_stats", false)) {
SegmentsStats stats = new SegmentsStats();
for (LeafReaderContext reader : searcher.reader().leaves()) {
final SegmentReader segmentReader = segmentReader(reader.reader());
stats.add(1, segmentReader.ramBytesUsed());
stats.addTermsMemoryInBytes(guardedRamBytesUsed(segmentReader.getPostingsReader()));
stats.addStoredFieldsMemoryInBytes(guardedRamBytesUsed(segmentReader.getFieldsReader()));
stats.addTermVectorsMemoryInBytes(guardedRamBytesUsed(segmentReader.getTermVectorsReader()));
stats.addNormsMemoryInBytes(guardedRamBytesUsed(segmentReader.getNormsReader()));
stats.addDocValuesMemoryInBytes(guardedRamBytesUsed(segmentReader.getDocValuesReader()));
}
writerSegmentStats(stats);
return stats;
}
}
@Override
public synchronized Collection<Accountable> getChildResources() {
final List<Accountable> resources = new ArrayList<>();
long ramBytesUsed = 0;
for (LeafReaderContext ctx : indexReader.leaves()) {
ramBytesUsed += ((SegmentReader) ctx.reader()).ramBytesUsed();
}
resources.add(Accountables.namedAccountable("indexReader", ramBytesUsed));
if (taxoArrays != null) {
resources.add(Accountables.namedAccountable("taxoArrays", taxoArrays));
}
synchronized (categoryCache) {
resources.add(Accountables.namedAccountable("categoryCache", BYTES_PER_CACHE_ENTRY * categoryCache.size()));
}
synchronized (ordinalCache) {
resources.add(Accountables.namedAccountable("ordinalCache", BYTES_PER_CACHE_ENTRY * ordinalCache.size()));
}
return Collections.unmodifiableList(resources);
}
/**
* Given an IndexReader, asserts that there is at least one AtomcReader leaf,
* and that all LeafReader leaves are SegmentReader's that have a compound
* file status that matches the expected input.
*/
private static void assertCompoundSegments(IndexReader reader,
boolean compound) {
assertNotNull("Null leaves", reader.leaves());
assertTrue("no leaves", 0 < reader.leaves().size());
for (LeafReaderContext atomic : reader.leaves()) {
assertTrue("not a segment reader: " + atomic.reader().toString(),
atomic.reader() instanceof SegmentReader);
assertEquals("Compound status incorrect for: " +
atomic.reader().toString(),
compound,
((SegmentReader)atomic.reader()).getSegmentInfo().info.getUseCompoundFile());
}
}
/**
* Gets the live docs.
*
* @param indexReader the index reader
* @return the live docs
*/
private static ArrayList<Integer> getLiveDocs(IndexReader indexReader) {
ArrayList<Integer> list = new ArrayList<>();
ListIterator<LeafReaderContext> iterator = indexReader.leaves()
.listIterator();
while (iterator.hasNext()) {
LeafReaderContext lrc = iterator.next();
SegmentReader r = (SegmentReader) lrc.reader();
for (int docId = 0; docId < r.maxDoc(); docId++) {
if (r.numDocs() == r.maxDoc() || r.getLiveDocs().get(docId)) {
list.add(lrc.docBase + docId);
}
}
}
return list;
}
private void writeRowIds(Writer writer, SegmentReader segmentReader) throws IOException {
Terms terms = segmentReader.terms(BlurConstants.ROW_ID);
if (terms == null) {
return;
}
TermsEnum termsEnum = terms.iterator(null);
BytesRef rowId;
long s = System.nanoTime();
while ((rowId = termsEnum.next()) != null) {
long n = System.nanoTime();
if (n + _10_SECONDS > s) {
_progressable.progress();
s = System.nanoTime();
}
writer.append(new Text(rowId.utf8ToString()), NullWritable.get());
}
}
protected final DocsStats docsStats(IndexReader indexReader) {
long numDocs = 0;
long numDeletedDocs = 0;
long sizeInBytes = 0;
// we don't wait for a pending refreshes here since it's a stats call instead we mark it as accessed only which will cause
// the next scheduled refresh to go through and refresh the stats as well
for (LeafReaderContext readerContext : indexReader.leaves()) {
// we go on the segment level here to get accurate numbers
final SegmentReader segmentReader = Lucene.segmentReader(readerContext.reader());
SegmentCommitInfo info = segmentReader.getSegmentInfo();
numDocs += readerContext.reader().numDocs();
numDeletedDocs += readerContext.reader().numDeletedDocs();
try {
sizeInBytes += info.sizeInBytes();
} catch (IOException e) {
logger.trace(() -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
}
}
return new DocsStats(numDocs, numDeletedDocs, sizeInBytes);
}
private void fillSegmentInfo(SegmentReader segmentReader, boolean verbose, boolean search, Map<String, Segment> segments) {
SegmentCommitInfo info = segmentReader.getSegmentInfo();
assert segments.containsKey(info.info.name) == false;
Segment segment = new Segment(info.info.name);
segment.search = search;
segment.docCount = segmentReader.numDocs();
segment.delDocCount = segmentReader.numDeletedDocs();
segment.version = info.info.getVersion();
segment.compound = info.info.getUseCompoundFile();
try {
segment.sizeInBytes = info.sizeInBytes();
} catch (IOException e) {
logger.trace(() -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
}
segment.memoryInBytes = segmentReader.ramBytesUsed();
segment.segmentSort = info.info.getIndexSort();
if (verbose) {
segment.ramTree = Accountables.namedAccountable("root", segmentReader);
}
segment.attributes = info.info.getAttributes();
// TODO: add more fine grained mem stats values to per segment info here
segments.put(info.info.name, segment);
}
DirectoryReaderWithAllLiveDocs(DirectoryReader in) throws IOException {
super(in, new SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader leaf) {
SegmentReader segmentReader = segmentReader(leaf);
Bits hardLiveDocs = segmentReader.getHardLiveDocs();
if (hardLiveDocs == null) {
return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
}
// TODO: Can we avoid calculate numDocs by using SegmentReader#getSegmentInfo with LUCENE-8458?
int numDocs = 0;
for (int i = 0; i < hardLiveDocs.length(); i++) {
if (hardLiveDocs.get(i)) {
numDocs++;
}
}
return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
}
});
}
/**
* Tries to extract a segment reader from the given index reader.
* If no SegmentReader can be extracted an {@link IllegalStateException} is thrown.
*/
protected static SegmentReader segmentReader(LeafReader reader) {
if (reader instanceof SegmentReader) {
return (SegmentReader) reader;
} else if (reader instanceof FilterLeafReader) {
final FilterLeafReader fReader = (FilterLeafReader) reader;
return segmentReader(FilterLeafReader.unwrap(fReader));
}
// hard fail - we can't get a SegmentReader
throw new IllegalStateException("Can not extract segment reader from given index reader [" + reader + "]");
}
/** Returns the sum of RAM bytes used by each segment */
private static long getIndexHeapUsed(DirectoryReader reader) {
long indexHeapRamBytesUsed = 0;
for (LeafReaderContext leafReaderContext : reader.leaves()) {
LeafReader leafReader = leafReaderContext.reader();
if (leafReader instanceof SegmentReader) {
indexHeapRamBytesUsed += ((SegmentReader) leafReader)
.ramBytesUsed();
} else {
// Not supported for any reader that is not a SegmentReader
return -1;
}
}
return indexHeapRamBytesUsed;
}
private void initReader(AtomicReader reader) {
if (reader instanceof SegmentReader) {
reader.addCoreClosedListener(purgeCore);
} else {
// we have a slow reader of some sort, try to register a purge event
// rather than relying on gc:
Object key = reader.getCoreCacheKey();
if (key instanceof AtomicReader) {
((AtomicReader) key).addReaderClosedListener(purgeReader);
} else {
// last chance
reader.addReaderClosedListener(purgeReader);
}
}
}
@Override
public long ramBytesUsed() {
long mem = RamUsageEstimator.shallowSizeOf(this);
try {
if (searcherMgr != null) {
SearcherManager mgr;
IndexSearcher searcher;
synchronized (searcherMgrLock) {
mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference
searcher = mgr.acquire();
}
try {
for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
LeafReader reader = FilterLeafReader.unwrap(context.reader());
if (reader instanceof SegmentReader) {
mem += ((SegmentReader) context.reader()).ramBytesUsed();
}
}
} finally {
mgr.release(searcher);
}
}
return mem;
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
try {
if (searcherMgr != null) {
SearcherManager mgr;
IndexSearcher searcher;
synchronized (searcherMgrLock) {
mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference
searcher = mgr.acquire();
}
try {
for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
LeafReader reader = FilterLeafReader.unwrap(context.reader());
if (reader instanceof SegmentReader) {
resources.add(Accountables.namedAccountable("segment", (SegmentReader)reader));
}
}
} finally {
mgr.release(searcher);
}
}
return Collections.unmodifiableList(resources);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
private void createCacheFile(Path file, SegmentKey segmentKey) throws IOException {
LOG.info("Building cache for segment [{0}] to [{1}]", segmentKey, file);
Path tmpPath = getTmpWriterPath(file.getParent());
try (Writer writer = createWriter(_configuration, tmpPath)) {
DirectoryReader reader = getReader();
for (AtomicReaderContext context : reader.leaves()) {
SegmentReader segmentReader = AtomicReaderUtil.getSegmentReader(context.reader());
if (segmentReader.getSegmentName().equals(segmentKey.getSegmentName())) {
writeRowIds(writer, segmentReader);
break;
}
}
}
commitWriter(_configuration, file, tmpPath);
}
@SuppressWarnings("unchecked")
private static boolean isFiltered(int notAdjustedDocId, IndexReader reader, Filter filter) throws IOException {
if (filter == null) {
return false;
}
if (reader instanceof BaseCompositeReader) {
BaseCompositeReader<IndexReader> indexReader = (BaseCompositeReader<IndexReader>) reader;
List<? extends IndexReader> sequentialSubReaders = BaseCompositeReaderUtil.getSequentialSubReaders(indexReader);
int readerIndex = BaseCompositeReaderUtil.readerIndex(indexReader, notAdjustedDocId);
int readerBase = BaseCompositeReaderUtil.readerBase(indexReader, readerIndex);
int docId = notAdjustedDocId - readerBase;
IndexReader orgReader = sequentialSubReaders.get(readerIndex);
SegmentReader sReader = AtomicReaderUtil.getSegmentReader(orgReader);
if (sReader != null) {
SegmentReader segmentReader = (SegmentReader) sReader;
DocIdSet docIdSet = filter.getDocIdSet(segmentReader.getContext(), segmentReader.getLiveDocs());
DocIdSetIterator iterator = docIdSet.iterator();
if (iterator == null) {
return true;
}
if (iterator.advance(docId) == docId) {
return false;
}
return true;
}
throw new RuntimeException("Reader has to be a SegmentReader [" + orgReader + "]");
} else {
throw new RuntimeException("Reader has to be a BaseCompositeReader [" + reader + "]");
}
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
AtomicReader reader = context.reader();
Object key = reader.getCoreCacheKey();
DocIdSet docIdSet = _cache.get(key);
if (docIdSet != null) {
_hits.incrementAndGet();
return BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs);
}
// This will only allow a single instance be created per reader per filter
Object lock = getLock(key);
synchronized (lock) {
SegmentReader segmentReader = getSegmentReader(reader);
if (segmentReader == null) {
LOG.warn("Could not find SegmentReader from [{0}]", reader);
return _filter.getDocIdSet(context, acceptDocs);
}
Directory directory = getDirectory(segmentReader);
if (directory == null) {
LOG.warn("Could not find Directory from [{0}]", segmentReader);
return _filter.getDocIdSet(context, acceptDocs);
}
_misses.incrementAndGet();
String segmentName = segmentReader.getSegmentName();
docIdSet = docIdSetToCache(_filter.getDocIdSet(context, null), reader, segmentName, directory);
_cache.put(key, docIdSet);
return BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs);
}
}
private long doCountResults(IndexSearcher searcher,
SearchQueryRequest aRequest, MtasSpanQuery q) throws IOException
{
ListIterator<LeafReaderContext> leafReaderContextIterator = searcher.getIndexReader()
.leaves().listIterator();
Map<Long, Long> annotatableDocuments = listAnnotatableDocuments(aRequest.getProject(),
aRequest.getUser());
final float boost = 0;
SpanWeight spanweight = q.rewrite(searcher.getIndexReader()).createWeight(searcher, false,
boost);
long numResults = 0;
while (leafReaderContextIterator.hasNext()) {
LeafReaderContext leafReaderContext = leafReaderContextIterator.next();
try {
Spans spans = spanweight.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);
SegmentReader segmentReader = (SegmentReader) leafReaderContext.reader();
if (spans != null) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
if (segmentReader.numDocs() == segmentReader.maxDoc()
|| segmentReader.getLiveDocs().get(spans.docID())) {
Document document = segmentReader.document(spans.docID());
// Retrieve user
String user = document.get(FIELD_USER);
// Retrieve source and annotation document ids
String rawSourceDocumentId = document.get(FIELD_SOURCE_DOCUMENT_ID);
String rawAnnotationDocumentId = document
.get(FIELD_ANNOTATION_DOCUMENT_ID);
if (rawSourceDocumentId == null || rawAnnotationDocumentId == null) {
log.trace("Indexed document lacks source/annotation document IDs"
+ " - source: {}, annotation: {}", rawSourceDocumentId,
rawAnnotationDocumentId);
continue;
}
long sourceDocumentId = Long.valueOf(rawSourceDocumentId);
long annotationDocumentId = Long.valueOf(rawAnnotationDocumentId);
// If the query is limited to a given document, skip any results
// which are not in the given document
Optional<SourceDocument> limitedToDocument = aRequest
.getLimitedToDocument();
if (limitedToDocument.isPresent() && !Objects
.equals(limitedToDocument.get().getId(), sourceDocumentId)) {
log.trace("Query limited to document {}, skipping results for "
+ "document {}", limitedToDocument.get().getId(),
sourceDocumentId);
continue;
}
if (annotatableDocuments.containsKey(sourceDocumentId)
&& annotationDocumentId == -1) {
// Exclude result if the retrieved document is a sourcedocument
// (that is, has annotationDocument = -1) AND it has a
// corresponding annotation document for this user
log.trace("Skipping results from indexed source document {} in"
+ "favor of results from the corresponding annotation "
+ "document", sourceDocumentId);
continue;
}
else if (annotationDocumentId != -1 && !aRequest.getUser().getUsername()
.equals(user)) {
// Exclude result if the retrieved document is an annotation
// document (that is, annotationDocument != -1 and its username
// is different from the quering user
log.trace("Skipping results from annotation document for user {} "
+ "which does not match the requested user {}", user,
aRequest.getUser().getUsername());
continue;
}
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
numResults++;
}
}
}
}
}
catch (Exception e) {
log.error("Unable to process query results", e);
numResults = -1;
}
}
return numResults;
}
private static void doQuery(IndexReader indexReader, String field, MtasSpanQuery q,
List<String> prefixes)
throws IOException
{
ListIterator<LeafReaderContext> iterator = indexReader.leaves().listIterator();
IndexSearcher searcher = new IndexSearcher(indexReader);
final float boost = 0;
SpanWeight spanweight = q.rewrite(indexReader).createWeight(searcher, false, boost);
while (iterator.hasNext()) {
System.out.println("#### new iteration ####");
LeafReaderContext lrc = iterator.next();
Spans spans = spanweight.getSpans(lrc, SpanWeight.Postings.POSITIONS);
SegmentReader segmentReader = (SegmentReader) lrc.reader();
Terms terms = segmentReader.terms(field);
CodecInfo mtasCodecInfo = CodecInfo.getCodecInfoFromTerms(terms);
if (spans != null) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
if (segmentReader.numDocs() == segmentReader.maxDoc()
|| segmentReader.getLiveDocs().get(spans.docID())) {
String idValue = segmentReader.document(spans.docID()).getField(FIELD_ID)
.stringValue();
System.out.println("******** New doc " + spans.docID() + "-" + idValue);
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
System.out.println("------");
List<MtasTokenString> tokens = mtasCodecInfo
.getPrefixFilteredObjectsByPositions(field, spans.docID(),
prefixes, spans.startPosition(),
(spans.endPosition() - 1));
for (MtasTokenString token : tokens) {
System.out.print("docId: " + (lrc.docBase + spans.docID()) + ", ");
System.out.print(" position: " + token.getPositionStart()
+ (!Objects.equals(token.getPositionEnd(),
token.getPositionStart())
? "-" + token.getPositionEnd()
: ""));
System.out.print(" offset: " + token.getOffsetStart() + "-"
+ token.getOffsetEnd());
System.out.print(" mtasId: " + token.getId());
System.out.println(" " + token.getPrefix()
+ (token.getPostfix() != null ? ":" + token.getPostfix()
: "")
+ ", ");
}
System.out.println("------");
List<MtasTreeHit<String>> hits = mtasCodecInfo
.getPositionedTermsByPrefixesAndPositionRange(field,
spans.docID(), prefixes, spans.startPosition(),
(spans.endPosition() - 1));
for (MtasTreeHit<String> hit : hits) {
System.out.print("docId: " + (lrc.docBase + spans.docID()) + ", ");
System.out.print("position: " + hit.startPosition
+ (hit.endPosition != hit.startPosition
? "-" + hit.endPosition
: ""));
System.out.println(" " + CodecUtil.termPrefix(hit.data)
+ (CodecUtil.termValue(hit.data) != null
? ":" + CodecUtil.termValue(hit.data)
: "")
+ ", ");
}
}
// if (prefixes != null && !prefixes.isEmpty()) {
// }
}
}
}
}
}
private SimpleOrderedMap<Object> getSegmentInfo(
SegmentCommitInfo segmentCommitInfo, boolean withSizeInfo, boolean withFieldInfos,
List<LeafReaderContext> leafContexts, IndexSchema schema) throws IOException {
SimpleOrderedMap<Object> segmentInfoMap = new SimpleOrderedMap<>();
segmentInfoMap.add(NAME, segmentCommitInfo.info.name);
segmentInfoMap.add("delCount", segmentCommitInfo.getDelCount());
segmentInfoMap.add("softDelCount", segmentCommitInfo.getSoftDelCount());
segmentInfoMap.add("hasFieldUpdates", segmentCommitInfo.hasFieldUpdates());
segmentInfoMap.add("sizeInBytes", segmentCommitInfo.sizeInBytes());
segmentInfoMap.add("size", segmentCommitInfo.info.maxDoc());
Long timestamp = Long.parseLong(segmentCommitInfo.info.getDiagnostics()
.get("timestamp"));
segmentInfoMap.add("age", new Date(timestamp));
segmentInfoMap.add("source",
segmentCommitInfo.info.getDiagnostics().get("source"));
segmentInfoMap.add("version", segmentCommitInfo.info.getVersion().toString());
// don't open a new SegmentReader - try to find the right one from the leaf contexts
SegmentReader seg = null;
for (LeafReaderContext lrc : leafContexts) {
LeafReader leafReader = lrc.reader();
leafReader = FilterLeafReader.unwrap(leafReader);
if (leafReader instanceof SegmentReader) {
SegmentReader sr = (SegmentReader)leafReader;
if (sr.getSegmentInfo().info.equals(segmentCommitInfo.info)) {
seg = sr;
break;
}
}
}
if (seg != null) {
LeafMetaData metaData = seg.getMetaData();
if (metaData != null) {
segmentInfoMap.add("createdVersionMajor", metaData.getCreatedVersionMajor());
segmentInfoMap.add("minVersion", metaData.getMinVersion().toString());
if (metaData.getSort() != null) {
segmentInfoMap.add("sort", metaData.getSort().toString());
}
}
}
if (!segmentCommitInfo.info.getDiagnostics().isEmpty()) {
segmentInfoMap.add("diagnostics", segmentCommitInfo.info.getDiagnostics());
}
if (!segmentCommitInfo.info.getAttributes().isEmpty()) {
segmentInfoMap.add("attributes", segmentCommitInfo.info.getAttributes());
}
if (withSizeInfo) {
Directory dir = segmentCommitInfo.info.dir;
List<Pair<String, Long>> files = segmentCommitInfo.files().stream()
.map(f -> {
long size = -1;
try {
size = dir.fileLength(f);
} catch (IOException e) {
}
return new Pair<String, Long>(f, size);
}).sorted((p1, p2) -> {
if (p1.second() > p2.second()) {
return -1;
} else if (p1.second() < p2.second()) {
return 1;
} else {
return 0;
}
}).collect(Collectors.toList());
if (!files.isEmpty()) {
SimpleOrderedMap<Object> topFiles = new SimpleOrderedMap<>();
for (int i = 0; i < Math.min(files.size(), 5); i++) {
Pair<String, Long> p = files.get(i);
topFiles.add(p.first(), RamUsageEstimator.humanReadableUnits(p.second()));
}
segmentInfoMap.add("largestFiles", topFiles);
}
}
if (seg != null && withSizeInfo) {
SimpleOrderedMap<Object> ram = new SimpleOrderedMap<>();
ram.add("total", seg.ramBytesUsed());
for (Accountable ac : seg.getChildResources()) {
accountableToMap(ac, ram::add);
}
segmentInfoMap.add("ramBytesUsed", ram);
}
if (withFieldInfos) {
if (seg == null) {
log.debug("Skipping segment info - not available as a SegmentReader: {}", segmentCommitInfo);
} else {
FieldInfos fis = seg.getFieldInfos();
SimpleOrderedMap<Object> fields = new SimpleOrderedMap<>();
for (FieldInfo fi : fis) {
fields.add(fi.name, getFieldInfo(seg, fi, schema));
}
segmentInfoMap.add("fields", fields);
}
}
return segmentInfoMap;
}
private Directory getDirectory(SegmentReader reader) {
return reader.directory();
}
private SegmentReader getSegmentReader(AtomicReader reader) {
if (reader instanceof SegmentReader) {
return (SegmentReader) reader;
}
return null;
}