下面列出了org.apache.lucene.index.SegmentReadState#org.apache.lucene.store.DataInput 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Restore a {@link ForUtil} from a {@link DataInput}.
*/
ForUtil(DataInput in) throws IOException {
int packedIntsVersion = in.readVInt();
PackedInts.checkVersion(packedIntsVersion);
encodedSizes = new int[33];
encoders = new PackedInts.Encoder[33];
decoders = new PackedInts.Decoder[33];
iterations = new int[33];
for (int bpv = 1; bpv <= 32; ++bpv) {
final int code = in.readVInt();
final int formatId = code >>> 5;
final int bitsPerValue = (code & 31) + 1;
final PackedInts.Format format = PackedInts.Format.byId(formatId);
assert format.isSupported(bitsPerValue);
encodedSizes[bpv] = encodedSize(format, packedIntsVersion, bitsPerValue);
encoders[bpv] = PackedInts.getEncoder(
format, packedIntsVersion, bitsPerValue);
decoders[bpv] = PackedInts.getDecoder(
format, packedIntsVersion, bitsPerValue);
iterations[bpv] = computeIterations(decoders[bpv]);
}
}
private static void decode13(DataInput in, long[] tmp, long[] longs) throws IOException {
in.readLELongs(tmp, 0, 26);
shiftLongs(tmp, 26, longs, 0, 3, MASK16_13);
for (int iter = 0, tmpIdx = 0, longsIdx = 26; iter < 2; ++iter, tmpIdx += 13, longsIdx += 3) {
long l0 = (tmp[tmpIdx+0] & MASK16_3) << 10;
l0 |= (tmp[tmpIdx+1] & MASK16_3) << 7;
l0 |= (tmp[tmpIdx+2] & MASK16_3) << 4;
l0 |= (tmp[tmpIdx+3] & MASK16_3) << 1;
l0 |= (tmp[tmpIdx+4] >>> 2) & MASK16_1;
longs[longsIdx+0] = l0;
long l1 = (tmp[tmpIdx+4] & MASK16_2) << 11;
l1 |= (tmp[tmpIdx+5] & MASK16_3) << 8;
l1 |= (tmp[tmpIdx+6] & MASK16_3) << 5;
l1 |= (tmp[tmpIdx+7] & MASK16_3) << 2;
l1 |= (tmp[tmpIdx+8] >>> 1) & MASK16_2;
longs[longsIdx+1] = l1;
long l2 = (tmp[tmpIdx+8] & MASK16_1) << 12;
l2 |= (tmp[tmpIdx+9] & MASK16_3) << 9;
l2 |= (tmp[tmpIdx+10] & MASK16_3) << 6;
l2 |= (tmp[tmpIdx+11] & MASK16_3) << 3;
l2 |= (tmp[tmpIdx+12] & MASK16_3) << 0;
longs[longsIdx+2] = l2;
}
}
private void replayAddDocument(Connection c, NodeProcess primary, DataInput in) throws IOException {
String id = in.readString();
Document doc = new Document();
doc.add(new StringField("docid", id, Field.Store.YES));
String title = readNullableString(in);
if (title != null) {
doc.add(new StringField("title", title, Field.Store.NO));
doc.add(new TextField("titleTokenized", title, Field.Store.NO));
}
String body = readNullableString(in);
if (body != null) {
doc.add(new TextField("body", body, Field.Store.NO));
}
String marker = readNullableString(in);
if (marker != null) {
//TestStressNRTReplication.message("xlog: replay marker=" + id);
doc.add(new StringField("marker", marker, Field.Store.YES));
}
// For both add and update originally, we use updateDocument to replay,
// because the doc could in fact already be in the index:
// nocomit what if this fails?
primary.addOrUpdateDocument(c, doc, false);
}
private void handleAddDocument(DataInput in, DataOutput out) throws IOException {
int fieldCount = in.readVInt();
Document doc = new Document();
for(int i=0;i<fieldCount;i++) {
String name = in.readString();
String value = in.readString();
// NOTE: clearly NOT general!
if (name.equals("docid") || name.equals("marker")) {
doc.add(new StringField(name, value, Field.Store.YES));
} else if (name.equals("title")) {
doc.add(new StringField("title", value, Field.Store.YES));
doc.add(new Field("titleTokenized", value, tokenizedWithTermVectors));
} else if (name.equals("body")) {
doc.add(new Field("body", value, tokenizedWithTermVectors));
} else {
throw new IllegalArgumentException("unhandled field name " + name);
}
}
writer.addDocument(doc);
}
public static FuzzySet deserialize(DataInput in) throws IOException
{
int version=in.readInt();
if (version == VERSION_SPI) {
in.readString();
}
final HashFunction hashFunction = hashFunctionForVersion(version);
int bloomSize=in.readInt();
int numLongs=in.readInt();
long[]longs=new long[numLongs];
for (int i = 0; i < numLongs; i++) {
longs[i]=in.readLong();
}
FixedBitSet bits = new FixedBitSet(longs,bloomSize+1);
return new FuzzySet(bits,bloomSize,hashFunction);
}
/** Pulls bytes from the provided IndexInput. */
public BytesStore(DataInput in, long numBytes, int maxBlockSize) throws IOException {
int blockSize = 2;
int blockBits = 1;
while(blockSize < numBytes && blockSize < maxBlockSize) {
blockSize *= 2;
blockBits++;
}
this.blockBits = blockBits;
this.blockSize = blockSize;
this.blockMask = blockSize-1;
long left = numBytes;
while(left > 0) {
final int chunk = (int) Math.min(blockSize, left);
byte[] block = new byte[chunk];
in.readBytes(block, 0, block.length);
blocks.add(block);
left -= chunk;
}
// So .getPosition still works
nextWrite = blocks.get(blocks.size()-1).length;
}
public BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
int linesCount = input.readVInt();
if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES) {
throw new CorruptIndexException("Illegal number of lines in block: " + linesCount, input);
}
long baseDocsFP = input.readVLong();
long basePositionsFP = input.readVLong();
long basePayloadsFP = input.readVLong();
int termStatesBaseOffset = input.readVInt();
if (termStatesBaseOffset < 0) {
throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset, input);
}
int middleTermOffset = input.readVInt();
if (middleTermOffset < 0) {
throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset, input);
}
BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset, middleTermOffset);
}
private static void decode20(DataInput in, long[] tmp, long[] longs) throws IOException {
in.readLELongs(tmp, 0, 40);
shiftLongs(tmp, 40, longs, 0, 12, MASK32_20);
for (int iter = 0, tmpIdx = 0, longsIdx = 40; iter < 8; ++iter, tmpIdx += 5, longsIdx += 3) {
long l0 = (tmp[tmpIdx+0] & MASK32_12) << 8;
l0 |= (tmp[tmpIdx+1] >>> 4) & MASK32_8;
longs[longsIdx+0] = l0;
long l1 = (tmp[tmpIdx+1] & MASK32_4) << 16;
l1 |= (tmp[tmpIdx+2] & MASK32_12) << 4;
l1 |= (tmp[tmpIdx+3] >>> 8) & MASK32_4;
longs[longsIdx+1] = l1;
long l2 = (tmp[tmpIdx+3] & MASK32_8) << 12;
l2 |= (tmp[tmpIdx+4] & MASK32_12) << 0;
longs[longsIdx+2] = l2;
}
}
@Override
public Object read(DataInput in) throws IOException {
final long code = in.readVLong();
if ((code & 1) == 0) {
// single long
final long v = code >>> 1;
if (v == 0) {
return NO_OUTPUT;
} else {
return Long.valueOf(v);
}
} else {
// two longs
final long first = code >>> 1;
final long second = in.readVLong();
return new TwoLongs(first, second);
}
}
/**
* Creates an array with content retrieved from the given DataInput.
* @param in a DataInput, positioned at the start of Packed64-content.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* @throws java.io.IOException if the values for the backing array could not
* be retrieved.
*/
public Packed64(int packedIntsVersion, DataInput in, int valueCount, int bitsPerValue)
throws IOException {
super(valueCount, bitsPerValue);
final PackedInts.Format format = PackedInts.Format.PACKED;
final long byteCount = format.byteCount(packedIntsVersion, valueCount, bitsPerValue); // to know how much to read
final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue); // to size the array
blocks = new long[longCount];
// read as many longs as we can
for (int i = 0; i < byteCount / 8; ++i) {
blocks[i] = in.readLong();
}
final int remaining = (int) (byteCount % 8);
if (remaining != 0) {
// read the last bytes
long lastLong = 0;
for (int i = 0; i < remaining; ++i) {
lastLong |= (in.readByte() & 0xFFL) << (56 - i * 8);
}
blocks[blocks.length - 1] = lastLong;
}
maskRight = ~0L << (BLOCK_SIZE-bitsPerValue) >>> (BLOCK_SIZE-bitsPerValue);
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
}
private static void countFSTArcs(String fstFilePath) throws IOException {
byte[] buf = Files.readAllBytes(Paths.get(fstFilePath));
DataInput in = new ByteArrayDataInput(buf);
FST<BytesRef> fst = new FST<>(in, in, ByteSequenceOutputs.getSingleton());
BytesRefFSTEnum<BytesRef> fstEnum = new BytesRefFSTEnum<>(fst);
int binarySearchArcCount = 0, directAddressingArcCount = 0, listArcCount = 0;
while(fstEnum.next() != null) {
if (fstEnum.arcs[fstEnum.upto].bytesPerArc() == 0) {
listArcCount ++;
} else if (fstEnum.arcs[fstEnum.upto].nodeFlags() == FST.ARCS_FOR_DIRECT_ADDRESSING) {
directAddressingArcCount ++;
} else {
binarySearchArcCount ++;
}
}
System.out.println("direct addressing arcs = " + directAddressingArcCount
+ ", binary search arcs = " + binarySearchArcCount
+ " list arcs = " + listArcCount);
}
/**
* Reads a double in a variable-length format. Reads between one and
* nine bytes. Small integral values typically take fewer bytes.
*/
static double readZDouble(DataInput in) throws IOException {
int b = in.readByte() & 0xFF;
if (b == 0xFF) {
// negative value
return Double.longBitsToDouble(in.readLong());
} else if (b == 0xFE) {
// float
return Float.intBitsToFloat(in.readInt());
} else if ((b & 0x80) != 0) {
// small integer [-1..124]
return (b & 0x7f) - 1;
} else {
// positive double
long bits = ((long) b) << 56 | ((in.readInt() & 0xFFFFFFFFL) << 24) | ((in.readShort() & 0xFFFFL) << 8) | (in.readByte() & 0xFFL);
return Double.longBitsToDouble(bits);
}
}
private static void decode5(DataInput in, long[] tmp, long[] longs) throws IOException {
in.readLELongs(tmp, 0, 10);
shiftLongs(tmp, 10, longs, 0, 3, MASK8_5);
for (int iter = 0, tmpIdx = 0, longsIdx = 10; iter < 2; ++iter, tmpIdx += 5, longsIdx += 3) {
long l0 = (tmp[tmpIdx+0] & MASK8_3) << 2;
l0 |= (tmp[tmpIdx+1] >>> 1) & MASK8_2;
longs[longsIdx+0] = l0;
long l1 = (tmp[tmpIdx+1] & MASK8_1) << 4;
l1 |= (tmp[tmpIdx+2] & MASK8_3) << 1;
l1 |= (tmp[tmpIdx+3] >>> 2) & MASK8_1;
longs[longsIdx+1] = l1;
long l2 = (tmp[tmpIdx+3] & MASK8_2) << 3;
l2 |= (tmp[tmpIdx+4] & MASK8_3) << 0;
longs[longsIdx+2] = l2;
}
}
private static void decode10(DataInput in, long[] tmp, long[] longs) throws IOException {
in.readLELongs(tmp, 0, 20);
shiftLongs(tmp, 20, longs, 0, 6, MASK16_10);
for (int iter = 0, tmpIdx = 0, longsIdx = 20; iter < 4; ++iter, tmpIdx += 5, longsIdx += 3) {
long l0 = (tmp[tmpIdx+0] & MASK16_6) << 4;
l0 |= (tmp[tmpIdx+1] >>> 2) & MASK16_4;
longs[longsIdx+0] = l0;
long l1 = (tmp[tmpIdx+1] & MASK16_2) << 8;
l1 |= (tmp[tmpIdx+2] & MASK16_6) << 2;
l1 |= (tmp[tmpIdx+3] >>> 4) & MASK16_2;
longs[longsIdx+1] = l1;
long l2 = (tmp[tmpIdx+3] & MASK16_4) << 6;
l2 |= (tmp[tmpIdx+4] & MASK16_6) << 0;
longs[longsIdx+2] = l2;
}
}
private static void decode11(DataInput in, long[] tmp, long[] longs) throws IOException {
in.readLELongs(tmp, 0, 22);
shiftLongs(tmp, 22, longs, 0, 5, MASK16_11);
for (int iter = 0, tmpIdx = 0, longsIdx = 22; iter < 2; ++iter, tmpIdx += 11, longsIdx += 5) {
long l0 = (tmp[tmpIdx+0] & MASK16_5) << 6;
l0 |= (tmp[tmpIdx+1] & MASK16_5) << 1;
l0 |= (tmp[tmpIdx+2] >>> 4) & MASK16_1;
longs[longsIdx+0] = l0;
long l1 = (tmp[tmpIdx+2] & MASK16_4) << 7;
l1 |= (tmp[tmpIdx+3] & MASK16_5) << 2;
l1 |= (tmp[tmpIdx+4] >>> 3) & MASK16_2;
longs[longsIdx+1] = l1;
long l2 = (tmp[tmpIdx+4] & MASK16_3) << 8;
l2 |= (tmp[tmpIdx+5] & MASK16_5) << 3;
l2 |= (tmp[tmpIdx+6] >>> 2) & MASK16_3;
longs[longsIdx+2] = l2;
long l3 = (tmp[tmpIdx+6] & MASK16_2) << 9;
l3 |= (tmp[tmpIdx+7] & MASK16_5) << 4;
l3 |= (tmp[tmpIdx+8] >>> 1) & MASK16_4;
longs[longsIdx+3] = l3;
long l4 = (tmp[tmpIdx+8] & MASK16_1) << 10;
l4 |= (tmp[tmpIdx+9] & MASK16_5) << 5;
l4 |= (tmp[tmpIdx+10] & MASK16_5) << 0;
longs[longsIdx+4] = l4;
}
}
/**
* Retrieve PackedInts as a {@link ReaderIterator}
* @param in positioned at the beginning of a stored packed int structure.
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
* @return an iterator to access the values
* @throws IOException if the structure could not be retrieved.
* @lucene.internal
*/
public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException {
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final Format format = Format.byId(in.readVInt());
return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem);
}
/** Reset the current reader to wrap a stream of <code>valueCount</code>
* values contained in <code>in</code>. The block size remains unchanged. */
public void reset(DataInput in, long valueCount) {
this.in = in;
assert valueCount >= 0;
this.valueCount = valueCount;
off = blockSize;
ord = 0;
}
static Checkpoint readCheckpointV5_0_0(final DataInput in) throws IOException {
final long offset = in.readLong();
final int numOps = in.readInt();
final long generation = in.readLong();
final long minSeqNo = SequenceNumbers.NO_OPS_PERFORMED;
final long maxSeqNo = SequenceNumbers.NO_OPS_PERFORMED;
final long globalCheckpoint = SequenceNumbers.UNASSIGNED_SEQ_NO;
final long minTranslogGeneration = -1;
final long trimmedAboveSeqNo = SequenceNumbers.UNASSIGNED_SEQ_NO;
return new Checkpoint(offset, numOps, generation, minSeqNo, maxSeqNo, globalCheckpoint, minTranslogGeneration, trimmedAboveSeqNo);
}
private static void decode12(DataInput in, long[] tmp, long[] longs) throws IOException {
in.readLELongs(tmp, 0, 24);
shiftLongs(tmp, 24, longs, 0, 4, MASK16_12);
for (int iter = 0, tmpIdx = 0, longsIdx = 24; iter < 8; ++iter, tmpIdx += 3, longsIdx += 1) {
long l0 = (tmp[tmpIdx+0] & MASK16_4) << 8;
l0 |= (tmp[tmpIdx+1] & MASK16_4) << 4;
l0 |= (tmp[tmpIdx+2] & MASK16_4) << 0;
longs[longsIdx+0] = l0;
}
}
private String readNullableString(DataInput in) throws IOException {
byte b = in.readByte();
if (b == 0) {
return null;
} else if (b == 1) {
return in.readString();
} else {
throw new CorruptIndexException("invalid string lead byte " + b, in);
}
}
private void checkRandomReads(DataInput is) throws IOException {
for (int i = 0; i < COUNT; i++) {
assertEquals(INTS[i], is.readVInt());
assertEquals(INTS[i], is.readInt());
assertEquals(LONGS[i], is.readVLong());
assertEquals(LONGS[i], is.readLong());
}
}
@Override
public void skipOutput(DataInput in) throws IOException {
final int len = in.readVInt();
if (len != 0) {
in.skipBytes(len);
}
}
@Override
public SortField readSortField(DataInput in) throws IOException {
SortField sf = new SortedSetSortField(in.readString(), in.readInt() == 1, readSelectorType(in));
int missingValue = in.readInt();
if (missingValue == 1) {
sf.setMissingValue(SortField.STRING_FIRST);
}
else if (missingValue == 2) {
sf.setMissingValue(SortField.STRING_LAST);
}
return sf;
}
@Override
public SortField readSortField(DataInput in) throws IOException {
SortField sf = new SortField(in.readString(), readType(in), in.readInt() == 1);
if (in.readInt() == 1) {
// missing object
switch (sf.type) {
case STRING:
int missingString = in.readInt();
if (missingString == 1) {
sf.setMissingValue(STRING_FIRST);
}
else {
sf.setMissingValue(STRING_LAST);
}
break;
case INT:
sf.setMissingValue(in.readInt());
break;
case LONG:
sf.setMissingValue(in.readLong());
break;
case FLOAT:
sf.setMissingValue(NumericUtils.sortableIntToFloat(in.readInt()));
break;
case DOUBLE:
sf.setMissingValue(NumericUtils.sortableLongToDouble(in.readLong()));
break;
default:
throw new IllegalArgumentException("Cannot deserialize sort of type " + sf.type);
}
}
return sf;
}
static long readVLong(DataInput in) throws IOException {
byte b = in.readByte();
if (b >= 0) return b;
long i = b & 0x7FL;
b = in.readByte();
i |= (b & 0x7FL) << 7;
if (b >= 0) return i;
b = in.readByte();
i |= (b & 0x7FL) << 14;
if (b >= 0) return i;
b = in.readByte();
i |= (b & 0x7FL) << 21;
if (b >= 0) return i;
b = in.readByte();
i |= (b & 0x7FL) << 28;
if (b >= 0) return i;
b = in.readByte();
i |= (b & 0x7FL) << 35;
if (b >= 0) return i;
b = in.readByte();
i |= (b & 0x7FL) << 42;
if (b >= 0) return i;
b = in.readByte();
i |= (b & 0x7FL) << 49;
if (b >= 0) return i;
b = in.readByte();
i |= (b & 0xFFL) << 56;
return i;
}
BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, DocIdSetIterator docsWithFields) {
this.value = new BytesRefBuilder();
this.value.grow(maxLength);
this.lengthsIterator = lengths.iterator();
this.bytesIterator = bytesIterator;
this.docsWithField = docsWithFields;
}
/**
* Reads a {@link BlockTermState} from the provided {@link DataInput}.
* <p>
* Simpler variant of {@link Lucene84PostingsReader#decodeTerm(DataInput, FieldInfo, BlockTermState, boolean)}.
*
* @param reuse {@link BlockTermState} to reuse; or null to create a new one.
*/
public BlockTermState readTermState(long baseDocStartFP, long basePosStartFP, long basePayStartFP,
DataInput termStatesInput, FieldInfo fieldInfo, BlockTermState reuse) throws IOException {
IndexOptions indexOptions = fieldInfo.getIndexOptions();
boolean hasFreqs = indexOptions != IndexOptions.DOCS;
boolean hasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
IntBlockTermState intTermState = reuse != null ? reset((IntBlockTermState) reuse) : new IntBlockTermState();
intTermState.docFreq = termStatesInput.readVInt();
intTermState.totalTermFreq = hasFreqs ?
intTermState.docFreq + termStatesInput.readVLong() : intTermState.docFreq;
assert intTermState.totalTermFreq >= intTermState.docFreq;
if (intTermState.docFreq == 1) {
intTermState.singletonDocID = termStatesInput.readVInt();
} else {
intTermState.docStartFP = baseDocStartFP + termStatesInput.readVLong();
}
if (hasPositions) {
intTermState.posStartFP = basePosStartFP + termStatesInput.readVLong();
boolean hasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (hasOffsets || fieldInfo.hasPayloads()) {
intTermState.payStartFP = basePayStartFP + termStatesInput.readVLong();
}
if (intTermState.totalTermFreq > BLOCK_SIZE) {
intTermState.lastPosBlockOffset = termStatesInput.readVLong();
}
}
if (intTermState.docFreq > BLOCK_SIZE) {
intTermState.skipOffset = termStatesInput.readVLong();
}
return intTermState;
}
/**
* Skip 128 integers.
*/
void skip(DataInput in) throws IOException {
final int token = Byte.toUnsignedInt(in.readByte());
final int bitsPerValue = token & 0x1f;
final int numExceptions = token >>> 5;
if (bitsPerValue == 0) {
in.readVLong();
in.skipBytes((numExceptions << 1));
} else {
in.skipBytes(forUtil.numBytes(bitsPerValue) + (numExceptions << 1));
}
}
/**
* Reads {@code length} bytes from the given {@link DataInput} and stores
* them at {@code offset} in {@code bytes.bytes}.
*/
protected void readBytes(DataInput input, BytesRef bytes, int offset, int length) throws IOException {
assert bytes.offset == 0;
bytes.length = offset + length;
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length);
input.readBytes(bytes.bytes, offset, length);
}
protected Collection<FieldMetadata> readEncodedFieldsMetadata(int numFields, DataInput metadataInput, BlockDecoder blockDecoder,
FieldInfos fieldInfos, FieldMetadata.Serializer fieldMetadataReader,
int maxNumDocs) throws IOException {
long encodedLength = metadataInput.readVLong();
if (encodedLength < 0) {
throw new CorruptIndexException("Illegal encoded length: " + encodedLength, metadataInput);
}
BytesRef decodedBytes = blockDecoder.decode(metadataInput, encodedLength);
DataInput decodedMetadataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length);
return readUnencodedFieldsMetadata(numFields, decodedMetadataInput, fieldInfos, fieldMetadataReader, maxNumDocs);
}