org.apache.lucene.index.SegmentWriteState#org.apache.lucene.util.packed.PackedInts源码实例Demo

下面列出了org.apache.lucene.index.SegmentWriteState#org.apache.lucene.util.packed.PackedInts 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException {
    finishLeaf();

    context = ctx;
    docDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
    buckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT);

    return new LeafBucketCollector() {
        int lastDoc = 0;

        @Override
        public void collect(int doc, long bucket) throws IOException {
            docDeltas.add(doc - lastDoc);
            buckets.add(bucket);
            lastDoc = doc;
            maxBucket = Math.max(maxBucket, bucket);
        }
    };
}
 
源代码2 项目: Elasticsearch   文件: PackedArrayIndexFieldData.java
private long getPageMemoryUsage(PackedLongValues values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
    int bitsRequired;
    long pageMemorySize = 0;
    PackedInts.FormatAndBits formatAndBits;
    if (pageMaxOrdinal == Long.MIN_VALUE) {
        // empty page - will use the null reader which just stores size
        pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);

    } else {
        long pageMinValue = values.get(pageMinOrdinal);
        long pageMaxValue = values.get(pageMaxOrdinal);
        long pageDelta = pageMaxValue - pageMinValue;
        if (pageDelta != 0) {
            bitsRequired = pageDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
            formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
            pageMemorySize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
            pageMemorySize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
        } else {
            // empty page
            pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
        }
    }
    return pageMemorySize;
}
 
源代码3 项目: lucene-solr   文件: FixedGapTermsIndexWriter.java
public FixedGapTermsIndexWriter(SegmentWriteState state, int termIndexInterval) throws IOException {
  if (termIndexInterval <= 0) {
    throw new IllegalArgumentException("invalid termIndexInterval: " + termIndexInterval);
  }
  this.termIndexInterval = termIndexInterval;
  final String indexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
  out = state.directory.createOutput(indexFileName, state.context);
  boolean success = false;
  try {
    CodecUtil.writeIndexHeader(out, CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    out.writeVInt(termIndexInterval);
    out.writeVInt(PackedInts.VERSION_CURRENT);
    out.writeVInt(BLOCKSIZE);
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(out);
    }
  }
}
 
源代码4 项目: lucene-solr   文件: LZ4.java
@Override
void reset(byte[] bytes, int off, int len) {
  Objects.checkFromIndexSize(off, len, bytes.length);
  this.bytes = bytes;
  this.base = off;
  this.lastOff = off - 1;
  this.end = off + len;
  final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS);
  final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1);
  hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
  if (hashTable == null || hashTable.size() < 1 << hashLog || hashTable.getBitsPerValue() < bitsPerOffset) {
    hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT);
  } else {
    // Avoid calling hashTable.clear(), this makes it costly to compress many short sequences otherwise.
    // Instead, get() checks that references are less than the current offset.
    get(off); // this sets the hashTable for the first 4 bytes as a side-effect
  }
}
 
源代码5 项目: lucene-solr   文件: TestLSBRadixSorter.java
public void test(LSBRadixSorter sorter, int[] arr, int len) {
  final int[] expected = ArrayUtil.copyOfSubArray(arr, 0, len);
  Arrays.sort(expected);

  int numBits = 0;
  for (int i = 0; i < len; ++i) {
    numBits = Math.max(numBits, PackedInts.bitsRequired(arr[i]));
  }

  if (random().nextBoolean()) {
    numBits = TestUtil.nextInt(random(), numBits, 32);
  }

  sorter.sort(numBits, arr, len);
  final int[] actual = ArrayUtil.copyOfSubArray(arr, 0, len);
  assertArrayEquals(expected, actual);
}
 
@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.SORTED_SET);
  // write the ord -> byte[] as a binary field
  addBinaryField(field, values);
  // write the stream of ords as a numeric field
  // NOTE: we could return an iterator that delta-encodes these within a doc
  addNumericField(field, ords);
  
  // write the doc -> ord count as a absolute index to the stream
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.NUMERIC);
  meta.writeVInt(PackedInts.VERSION_CURRENT);
  meta.writeLong(data.getFilePointer());
  meta.writeVLong(maxDoc);
  meta.writeVInt(BLOCK_SIZE);

  final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
  long addr = 0;
  for (Number v : docToOrdCount) {
    addr += v.longValue();
    writer.add(addr);
  }
  writer.finish();
}
 
源代码7 项目: Elasticsearch   文件: HyperLogLogPlusPlus.java
/**
 * Compute the required precision so that <code>count</code> distinct entries
 * would be counted with linear counting.
 */
public static int precisionFromThreshold(long count) {
    final long hashTableEntries = (long) Math.ceil(count / MAX_LOAD_FACTOR);
    int precision = PackedInts.bitsRequired(hashTableEntries * RamUsageEstimator.NUM_BYTES_INT);
    precision = Math.max(precision, MIN_PRECISION);
    precision = Math.min(precision, MAX_PRECISION);
    return precision;
}
 
源代码8 项目: Elasticsearch   文件: OrdinalsBuilder.java
public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException {
    this.maxDoc = maxDoc;
    int startBitsPerValue = 8;
    if (numTerms >= 0) {
        startBitsPerValue = PackedInts.bitsRequired(numTerms);
    }
    ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio);
    spare = new LongsRef();
}
 
源代码9 项目: Elasticsearch   文件: OrdinalsBuilder.java
/**
 * Builds an {@link Ordinals} instance from the builders current state.
 */
public Ordinals build(Settings settings) {
    final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.FASTEST);
    final boolean forceMultiOrdinals = settings.getAsBoolean(FORCE_MULTI_ORDINALS, false);
    if (forceMultiOrdinals || numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getValueCount(), acceptableOverheadRatio)) {
        // MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields
        return new MultiOrdinals(this, acceptableOverheadRatio);
    } else {
        return new SinglePackedOrdinals(this, acceptableOverheadRatio);
    }
}
 
源代码10 项目: Elasticsearch   文件: MultiOrdinals.java
/**
 * Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%.
 */
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds, float acceptableOverheadRatio) {
    int bitsPerOrd = PackedInts.bitsRequired(numOrds);
    bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd, acceptableOverheadRatio).bitsPerValue;
    // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
    // beginning of the block and all docs have one at the end of the block
    final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
    final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
    int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
    bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset, acceptableOverheadRatio).bitsPerValue;

    final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
    final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
    return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
}
 
源代码11 项目: Elasticsearch   文件: ParentChildIndexFieldData.java
private static OrdinalMap buildOrdinalMap(AtomicParentChildFieldData[] atomicFD, String parentType) throws IOException {
    final SortedDocValues[] ordinals = new SortedDocValues[atomicFD.length];
    for (int i = 0; i < ordinals.length; ++i) {
        ordinals[i] = atomicFD[i].getOrdinalsValues(parentType);
    }
    return OrdinalMap.build(null, ordinals, PackedInts.DEFAULT);
}
 
public void testDateCompression() throws IOException {
  try (final Directory dir = new ByteBuffersDirectory()) {
    final IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    final IndexWriter iwriter = new IndexWriter(dir, iwc);

    final long base = 13; // prime
    final long day = 1000L * 60 * 60 * 24;

    final Document doc = new Document();
    final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
    doc.add(dvf);
    for (int i = 0; i < 300; ++i) {
      dvf.setLongValue(base + random().nextInt(1000) * day);
      iwriter.addDocument(doc);
    }
    iwriter.forceMerge(1);
    final long size1 = dirSize(dir);
    for (int i = 0; i < 50; ++i) {
      dvf.setLongValue(base + random().nextInt(1000) * day);
      iwriter.addDocument(doc);
    }
    iwriter.forceMerge(1);
    final long size2 = dirSize(dir);
    // make sure the new longs costed less than if they had only been packed
    assertTrue(size2 < size1 + (PackedInts.bitsRequired(day) * 50) / 8);
  }
}
 
源代码13 项目: lucene-solr   文件: BinaryDocValuesWriter.java
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.bytes = new PagedBytes(BLOCK_BITS);
  this.bytesOut = bytes.getDataOutput();
  this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  this.iwBytesUsed = iwBytesUsed;
  this.docsWithField = new DocsWithFieldSet();
  this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
源代码14 项目: lucene-solr   文件: SortedSetDocValuesWriter.java
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
源代码15 项目: lucene-solr   文件: SortedDocValuesWriter.java
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
源代码16 项目: lucene-solr   文件: MergeState.java
static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
  final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
  int del = 0;
  for (int i = 0; i < maxDoc; ++i) {
    docMapBuilder.add(i - del);
    if (liveDocs.get(i) == false) {
      ++del;
    }
  }
  return docMapBuilder.build();
}
 
源代码17 项目: lucene-solr   文件: MultiDocValues.java
/** Returns a SortedDocValues for a reader's docvalues (potentially doing extremely slow things).
 * <p>
 * This is an extremely slow way to access sorted values. Instead, access them per-segment
 * with {@link LeafReader#getSortedDocValues(String)}
 * </p>  
 */
public static SortedDocValues getSortedValues(final IndexReader r, final String field) throws IOException {
  final List<LeafReaderContext> leaves = r.leaves();
  final int size = leaves.size();
  
  if (size == 0) {
    return null;
  } else if (size == 1) {
    return leaves.get(0).reader().getSortedDocValues(field);
  }
  
  boolean anyReal = false;
  final SortedDocValues[] values = new SortedDocValues[size];
  final int[] starts = new int[size+1];
  long totalCost = 0;
  for (int i = 0; i < size; i++) {
    LeafReaderContext context = leaves.get(i);
    SortedDocValues v = context.reader().getSortedDocValues(field);
    if (v == null) {
      v = DocValues.emptySorted();
    } else {
      anyReal = true;
      totalCost += v.cost();
    }
    values[i] = v;
    starts[i] = context.docBase;
  }
  starts[size] = r.maxDoc();
  
  if (anyReal == false) {
    return null;
  } else {
    IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
    IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
    OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
    return new MultiSortedDocValues(values, starts, mapping, totalCost);
  }
}
 
源代码18 项目: lucene-solr   文件: MultiDocValues.java
/** Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things).
 * <p>
 * This is an extremely slow way to access sorted values. Instead, access them per-segment
 * with {@link LeafReader#getSortedSetDocValues(String)}
 * </p>  
 */
public static SortedSetDocValues getSortedSetValues(final IndexReader r, final String field) throws IOException {
  final List<LeafReaderContext> leaves = r.leaves();
  final int size = leaves.size();
  
  if (size == 0) {
    return null;
  } else if (size == 1) {
    return leaves.get(0).reader().getSortedSetDocValues(field);
  }
  
  boolean anyReal = false;
  final SortedSetDocValues[] values = new SortedSetDocValues[size];
  final int[] starts = new int[size+1];
  long totalCost = 0;
  for (int i = 0; i < size; i++) {
    LeafReaderContext context = leaves.get(i);
    SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
    if (v == null) {
      v = DocValues.emptySortedSet();
    } else {
      anyReal = true;
      totalCost += v.cost();
    }
    values[i] = v;
    starts[i] = context.docBase;
  }
  starts[size] = r.maxDoc();
  
  if (anyReal == false) {
    return null;
  } else {
    IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
    IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
    OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
    return new MultiSortedSetDocValues(values, starts, mapping, totalCost);
  }
}
 
源代码19 项目: lucene-solr   文件: NumericDocValuesWriter.java
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  iwBytesUsed.addAndGet(bytesUsed);
}
 
源代码20 项目: lucene-solr   文件: IndexSorter.java
@Override
public ComparableProvider[] getComparableProviders(List<? extends LeafReader> readers) throws IOException {
  final ComparableProvider[] providers = new ComparableProvider[readers.size()];
  final SortedDocValues[] values = new SortedDocValues[readers.size()];
  for(int i=0;i<readers.size();i++) {
    final SortedDocValues sorted = valuesProvider.get(readers.get(i));
    values[i] = sorted;
  }
  OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
  final int missingOrd;
  if (missingValue == SortField.STRING_LAST) {
    missingOrd = Integer.MAX_VALUE;
  } else {
    missingOrd = Integer.MIN_VALUE;
  }

  for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
    final SortedDocValues readerValues = values[readerIndex];
    final LongValues globalOrds = ordinalMap.getGlobalOrds(readerIndex);
    providers[readerIndex] = docID -> {
      if (readerValues.advanceExact(docID)) {
        // translate segment's ord to global ord space:
        return globalOrds.get(readerValues.ordValue());
      } else {
        return missingOrd;
      }
    };
  }
  return providers;
}
 
源代码21 项目: lucene-solr   文件: NormValuesWriter.java
public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  docsWithField = new DocsWithFieldSet();
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  iwBytesUsed.addAndGet(bytesUsed);
}
 
源代码22 项目: lucene-solr   文件: SortedNumericDocValuesWriter.java
public SortedNumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
  iwBytesUsed.addAndGet(bytesUsed);
}
 
源代码23 项目: lucene-solr   文件: DocValuesFieldUpdates.java
protected DocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) {
  this.maxDoc = maxDoc;
  this.delGen = delGen;
  this.field = field;
  if (type == null) {
    throw new NullPointerException("DocValuesType must not be null");
  }
  this.type = type;
  bitsPerValue = PackedInts.bitsRequired(maxDoc - 1) + SHIFT;
  docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
}
 
源代码24 项目: lucene-solr   文件: NodeHash.java
private void rehash() throws IOException {
  final PagedGrowableWriter oldTable = table;

  table = new PagedGrowableWriter(2*oldTable.size(), 1<<30, PackedInts.bitsRequired(count), PackedInts.COMPACT);
  mask = table.size()-1;
  for(long idx=0;idx<oldTable.size();idx++) {
    final long address = oldTable.get(idx);
    if (address != 0) {
      addNew(address);
    }
  }
}
 
源代码25 项目: lucene-solr   文件: TestTimSorterWorstCase.java
/** Create an array for the given list of runs. */
private static PackedInts.Mutable createArray(int length, List<Integer> runs) {
  PackedInts.Mutable array = PackedInts.getMutable(length, 1, 0);
  int endRun = -1;
  for (long len : runs) {
    array.set(endRun += len, 1);
  }
  array.set(length - 1, 0);
  return array;
}
 
源代码26 项目: Elasticsearch   文件: OrdinalsBuilder.java
/**
 * Return a {@link org.apache.lucene.util.packed.PackedInts.Reader} instance mapping every doc ID to its first ordinal + 1 if it exists and 0 otherwise.
 */
public PackedInts.Reader getFirstOrdinals() {
    return ordinals.firstOrdinals;
}
 
源代码27 项目: Elasticsearch   文件: ParentChildIndexFieldData.java
TypeBuilder(float acceptableTransientOverheadRatio, LeafReader reader) throws IOException {
    bytes = new PagedBytes(15);
    termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
}
 
源代码28 项目: Elasticsearch   文件: PackedArrayIndexFieldData.java
protected CommonSettings.MemoryStorageFormat chooseStorageFormat(LeafReader reader, PackedLongValues values, Ordinals build, RandomAccessOrds ordinals,
                                                                 long minValue, long maxValue, float acceptableOverheadRatio, int pageSize) {

    CommonSettings.MemoryStorageFormat format;

    // estimate memory usage for a single packed array
    long packedDelta = maxValue - minValue + 1; // allow for a missing value
    // valuesDelta can be negative if the difference between max and min values overflows the positive side of longs.
    int bitsRequired = packedDelta < 0 ? 64 : PackedInts.bitsRequired(packedDelta);
    PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
    final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;

    // ordinal memory usage
    final long ordinalsSize = build.ramBytesUsed() + values.ramBytesUsed();

    // estimate the memory signature of paged packing
    long pagedSingleValuesSize = (reader.maxDoc() / pageSize + 1) * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // array of pages
    int pageIndex = 0;
    long pageMinOrdinal = Long.MAX_VALUE;
    long pageMaxOrdinal = Long.MIN_VALUE;
    for (int i = 1; i < reader.maxDoc(); ++i, pageIndex = (pageIndex + 1) % pageSize) {
        ordinals.setDocument(i);
        if (ordinals.cardinality() > 0) {
            long ordinal = ordinals.ordAt(0);
            pageMaxOrdinal = Math.max(ordinal, pageMaxOrdinal);
            pageMinOrdinal = Math.min(ordinal, pageMinOrdinal);
        }
        if (pageIndex == pageSize - 1) {
            // end of page, we now know enough to estimate memory usage
            pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);

            pageMinOrdinal = Long.MAX_VALUE;
            pageMaxOrdinal = Long.MIN_VALUE;
        }
    }

    if (pageIndex > 0) {
        // last page estimation
        pageIndex++;
        pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);
    }

    if (ordinalsSize < singleValuesSize) {
        if (ordinalsSize < pagedSingleValuesSize) {
            format = CommonSettings.MemoryStorageFormat.ORDINALS;
        } else {
            format = CommonSettings.MemoryStorageFormat.PAGED;
        }
    } else {
        if (pagedSingleValuesSize < singleValuesSize) {
            format = CommonSettings.MemoryStorageFormat.PAGED;
        } else {
            format = CommonSettings.MemoryStorageFormat.PACKED;
        }
    }
    return format;
}
 
源代码29 项目: lucene-solr   文件: TestJoinUtil.java
public void testMinMaxDocs() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(
      random(),
      dir,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
  );

  int minChildDocsPerParent = 2;
  int maxChildDocsPerParent = 16;
  int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
  int[] childDocsPerParent = new int[numParents];
  for (int p = 0; p < numParents; p++) {
    String parentId = Integer.toString(p);
    Document parentDoc = new Document();
    parentDoc.add(new StringField("id", parentId, Field.Store.YES));
    parentDoc.add(new StringField("type", "to", Field.Store.NO));
    parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
    iw.addDocument(parentDoc);
    int numChildren = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent);
    childDocsPerParent[p] = numChildren;
    for (int c = 0; c < numChildren; c++) {
      String childId = Integer.toString(p + c);
      Document childDoc = new Document();
      childDoc.add(new StringField("id", childId, Field.Store.YES));
      childDoc.add(new StringField("type", "from", Field.Store.NO));
      childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
      iw.addDocument(childDoc);
    }
  }
  iw.close();

  IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
  SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
  for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
    values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
  }
  OrdinalMap ordinalMap = OrdinalMap.build(
      null, values, PackedInts.DEFAULT
  );
  Query fromQuery = new TermQuery(new Term("type", "from"));
  Query toQuery = new TermQuery(new Term("type", "to"));

  int iters = RandomNumbers.randomIntBetween(random(), 3, 9);
  for (int i = 1; i <= iters; i++) {
    final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
    int min = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent - 1);
    int max = RandomNumbers.randomIntBetween(random(), min, maxChildDocsPerParent);
    if (VERBOSE) {
      System.out.println("iter=" + i);
      System.out.println("scoreMode=" + scoreMode);
      System.out.println("min=" + min);
      System.out.println("max=" + max);
    }
    Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, searcher, scoreMode, ordinalMap, min, max);
    TotalHitCountCollector collector = new TotalHitCountCollector();
    searcher.search(joinQuery, collector);
    int expectedCount = 0;
    for (int numChildDocs : childDocsPerParent) {
      if (numChildDocs >= min && numChildDocs <= max) {
        expectedCount++;
      }
    }
    assertEquals(expectedCount, collector.getTotalHits());
  }

  searcher.getIndexReader().close();
  dir.close();
}
 
源代码30 项目: lucene-solr   文件: LegacyFieldsIndexReader.java
LegacyFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws IOException {
  maxDoc = si.maxDoc();
  int[] docBases = new int[16];
  long[] startPointers = new long[16];
  int[] avgChunkDocs = new int[16];
  long[] avgChunkSizes = new long[16];
  PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16];
  PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16];

  final int packedIntsVersion = fieldsIndexIn.readVInt();

  int blockCount = 0;

  for (;;) {
    final int numChunks = fieldsIndexIn.readVInt();
    if (numChunks == 0) {
      break;
    }
    if (blockCount == docBases.length) {
      final int newSize = ArrayUtil.oversize(blockCount + 1, 8);
      docBases = ArrayUtil.growExact(docBases, newSize);
      startPointers = ArrayUtil.growExact(startPointers, newSize);
      avgChunkDocs = ArrayUtil.growExact(avgChunkDocs, newSize);
      avgChunkSizes = ArrayUtil.growExact(avgChunkSizes, newSize);
      docBasesDeltas = ArrayUtil.growExact(docBasesDeltas, newSize);
      startPointersDeltas = ArrayUtil.growExact(startPointersDeltas, newSize);
    }

    // doc bases
    docBases[blockCount] = fieldsIndexIn.readVInt();
    avgChunkDocs[blockCount] = fieldsIndexIn.readVInt();
    final int bitsPerDocBase = fieldsIndexIn.readVInt();
    if (bitsPerDocBase > 32) {
      throw new CorruptIndexException("Corrupted bitsPerDocBase: " + bitsPerDocBase, fieldsIndexIn);
    }
    docBasesDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase);

    // start pointers
    startPointers[blockCount] = fieldsIndexIn.readVLong();
    avgChunkSizes[blockCount] = fieldsIndexIn.readVLong();
    final int bitsPerStartPointer = fieldsIndexIn.readVInt();
    if (bitsPerStartPointer > 64) {
      throw new CorruptIndexException("Corrupted bitsPerStartPointer: " + bitsPerStartPointer, fieldsIndexIn);
    }
    startPointersDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer);

    ++blockCount;
  }

  this.docBases = ArrayUtil.copyOfSubArray(docBases, 0, blockCount);
  this.startPointers = ArrayUtil.copyOfSubArray(startPointers, 0, blockCount);
  this.avgChunkDocs = ArrayUtil.copyOfSubArray(avgChunkDocs, 0, blockCount);
  this.avgChunkSizes = ArrayUtil.copyOfSubArray(avgChunkSizes, 0, blockCount);
  this.docBasesDeltas = ArrayUtil.copyOfSubArray(docBasesDeltas, 0, blockCount);
  this.startPointersDeltas = ArrayUtil.copyOfSubArray(startPointersDeltas, 0, blockCount);
}