下面列出了org.apache.lucene.search.SimpleCollector#org.apache.lucene.index.BinaryDocValues 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public void testIndexingPointsAndDocValues() throws Exception {
FieldType type = new FieldType();
type.setDimensions(1, 4);
type.setDocValuesType(DocValuesType.BINARY);
type.freeze();
Document doc = new Document();
byte[] packedPoint = "term".getBytes(StandardCharsets.UTF_8);
doc.add(new BinaryPoint("field", packedPoint, type));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
assertEquals(1, leafReader.getPointValues("field").size());
assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMinPackedValue());
assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMaxPackedValue());
BinaryDocValues dvs = leafReader.getBinaryDocValues("field");
assertEquals(0, dvs.nextDoc());
assertEquals("term", dvs.binaryValue().utf8ToString());
}
@Override
public ShapeValues getValues(LeafReaderContext readerContext) throws IOException {
final BinaryDocValues docValues = DocValues.getBinary(readerContext.reader(), fieldName);
return new ShapeValues() {
@Override
public boolean advanceExact(int doc) throws IOException {
return docValues.advanceExact(doc);
}
@Override
public Shape value() throws IOException {
BytesRef bytesRef = docValues.binaryValue();
DataInputStream dataInput
= new DataInputStream(new ByteArrayInputStream(bytesRef.bytes, bytesRef.offset, bytesRef.length));
return binaryCodec.readShape(dataInput);
}
};
}
public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
if (valuesIn == null) {
valuesIn = reader.getSortedDocValues(field);
}
if (valuesIn != null) {
// Not cached here by FieldCacheImpl (cached instead
// per-thread by SegmentReader):
return valuesIn;
}
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info == null) {
return DocValues.emptyBinary();
} else if (info.getDocValuesType() != DocValuesType.NONE) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (info.getIndexOptions() == IndexOptions.NONE) {
return DocValues.emptyBinary();
}
BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
return impl.iterator();
}
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
BinaryDocValues binaryDocValues = _binaryDocValuesCache.get(field.number);
if (binaryDocValues != null) {
return binaryDocValues;
}
synchronized (_binaryDocValuesCache) {
binaryDocValues = _binaryDocValuesCache.get(field.number);
if (binaryDocValues != null) {
return binaryDocValues;
}
binaryDocValues = newBinary(field);
if (_cache && binaryDocValues != null) {
_binaryDocValuesCache.put(field.number, binaryDocValues);
}
return binaryDocValues;
}
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
final BinaryDocValues binaryDocValues = in.getBinaryDocValues(field);
if (binaryDocValues == null) {
return null;
}
return new BinaryDocValues() {
@Override
public void get(int docID, BytesRef result) {
try {
if (_accessControl.hasAccess(ReadType.BINARY_DOC_VALUE, docID)) {
binaryDocValues.get(docID, result);
return;
}
// Default missing value.
result.bytes = MISSING;
result.length = 0;
result.offset = 0;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
}
@Test
public void testBinaryDocValues() throws IOException {
SecureAtomicReader secureReader = getSecureReader();
BinaryDocValues binaryDocValues = secureReader.getBinaryDocValues("bin");
BytesRef result = new BytesRef();
binaryDocValues.get(0, result);
assertEquals(new BytesRef("0".getBytes()), result);
binaryDocValues.get(1, result);
assertEquals(new BytesRef(), result);
binaryDocValues.get(2, result);
assertEquals(new BytesRef("2".getBytes()), result);
binaryDocValues.get(3, result);
assertEquals(new BytesRef(), result);
}
@Override
public SortedBinaryDocValues getBytesValues() {
try {
final BinaryDocValues values = DocValues.getBinary(reader, field);
final Bits docsWithField = DocValues.getDocsWithField(reader, field);
return FieldData.singleton(values, docsWithField);
} catch (IOException e) {
throw new IllegalStateException("Cannot load doc values", e);
}
}
@Override
protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException {
int maxDoc = reader.maxDoc();
final int[][] matrix = new int[maxDoc][];
BinaryDocValues valuesIn = reader.getBinaryDocValues(key.field);
if (valuesIn == null) {
for (int i = 0; i < maxDoc; ++i) {
matrix[i] = new int[0];
}
return new IntList(matrix);
}
for (int i = 0; i < maxDoc; ++i) {
String str = valuesIn.get(i).utf8ToString();
if (StringUtils.isEmpty(str)) {
matrix[i] = new int[0];
continue;
}
JSONArray array = JSON.parseArray(str);
matrix[i] = new int[array.size()];
for (int j = 0; j < array.size(); ++j) {
matrix[i][j] = array.getInteger(j);
}
}
return new IntList(matrix);
}
@Override
protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException {
int maxDoc = reader.maxDoc();
final long[][] matrix = new long[maxDoc][];
BinaryDocValues valuesIn = reader.getBinaryDocValues(key.field);
if (valuesIn == null) {
for (int i = 0; i < maxDoc; ++i) {
matrix[i] = new long[0];
}
return new LongList(matrix);
}
for (int i = 0; i < maxDoc; ++i) {
String str = valuesIn.get(i).utf8ToString();
if (StringUtils.isEmpty(str)) {
matrix[i] = new long[0];
continue;
}
JSONArray array = JSON.parseArray(str);
matrix[i] = new long[array.size()];
for (int j = 0; j < array.size(); ++j) {
matrix[i][j] = array.getInteger(j);
}
}
return new LongList(matrix);
}
@Override
protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException {
int maxDoc = reader.maxDoc();
final float[][] matrix = new float[maxDoc][];
BinaryDocValues valuesIn = reader.getBinaryDocValues(key.field);
if (valuesIn == null) {
for (int i = 0; i < maxDoc; ++i) {
matrix[i] = new float[0];
}
return new FloatList(matrix);
}
for (int i = 0; i < maxDoc; ++i) {
String str = valuesIn.get(i).utf8ToString();
if (StringUtils.isEmpty(str)) {
matrix[i] = new float[0];
continue;
}
JSONArray array = JSON.parseArray(str);
matrix[i] = new float[array.size()];
for (int j = 0; j < array.size(); ++j) {
matrix[i][j] = array.getFloat(j);
}
}
return new FloatList(matrix);
}
@Override
protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException {
int maxDoc = reader.maxDoc();
final double[][] matrix = new double[maxDoc][];
BinaryDocValues valuesIn = reader.getBinaryDocValues(key.field);
if (valuesIn == null) {
for (int i = 0; i < maxDoc; ++i) {
matrix[i] = new double[0];
}
return new DoubleList(matrix);
}
for (int i = 0; i < maxDoc; ++i) {
String str = valuesIn.get(i).utf8ToString();
if (StringUtils.isEmpty(str)) {
matrix[i] = new double[0];
continue;
}
JSONArray array = JSON.parseArray(str);
matrix[i] = new double[array.size()];
for (int j = 0; j < array.size(); ++j) {
matrix[i][j] = array.getFloat(j);
}
}
return new DoubleList(matrix);
}
@Override
protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException {
int maxDoc = reader.maxDoc();
final String[][] matrix = new String[maxDoc][];
BinaryDocValues valuesIn = reader.getBinaryDocValues(key.field);
if (valuesIn == null) {
for (int i = 0; i < maxDoc; ++i) {
matrix[i] = new String[0];
}
return new StringList(matrix);
}
for (int i = 0; i < maxDoc; ++i) {
String str = valuesIn.get(i).utf8ToString();
if (StringUtils.isEmpty(str)) {
matrix[i] = new String[0];
continue;
}
JSONArray array = JSON.parseArray(str);
matrix[i] = new String[array.size()];
for (int j = 0; j < array.size(); ++j) {
matrix[i][j] = array.getString(j);
}
}
return new StringList(matrix);
}
public void testDocValuesDoNotAffectBoostPositionsOrOffset() throws Exception {
Document doc = new Document();
doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox")));
doc.add(new TextField("text", "quick brown fox", Field.Store.NO));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer, true, true);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
TermsEnum tenum = leafReader.terms("text").iterator();
assertEquals("brown", tenum.next().utf8ToString());
PostingsEnum penum = tenum.postings(null, PostingsEnum.OFFSETS);
assertEquals(0, penum.nextDoc());
assertEquals(1, penum.freq());
assertEquals(1, penum.nextPosition());
assertEquals(6, penum.startOffset());
assertEquals(11, penum.endOffset());
assertEquals("fox", tenum.next().utf8ToString());
penum = tenum.postings(penum, PostingsEnum.OFFSETS);
assertEquals(0, penum.nextDoc());
assertEquals(1, penum.freq());
assertEquals(2, penum.nextPosition());
assertEquals(12, penum.startOffset());
assertEquals(15, penum.endOffset());
assertEquals("quick", tenum.next().utf8ToString());
penum = tenum.postings(penum, PostingsEnum.OFFSETS);
assertEquals(0, penum.nextDoc());
assertEquals(1, penum.freq());
assertEquals(0, penum.nextPosition());
assertEquals(0, penum.startOffset());
assertEquals(5, penum.endOffset());
BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("text");
assertEquals(0, binaryDocValues.nextDoc());
assertEquals("quick brown fox", binaryDocValues.binaryValue().utf8ToString());
}
private Optional<DocValues> createBinaryDocValues(int docid, String field, DocValuesType dvType)
throws IOException {
BinaryDocValues bvalues = IndexUtils.getBinaryDocValues(reader, field);
if (bvalues.advanceExact(docid)) {
DocValues dv = DocValues.of(
dvType,
Collections.singletonList(BytesRef.deepCopyOf(bvalues.binaryValue())),
Collections.emptyList());
return Optional.of(dv);
}
return Optional.empty();
}
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException
{
final BinaryDocValues terms = DocValues.getBinary(readerContext.reader(), field);
final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
Terms t = MultiTerms.getTerms(top, qfield);
final TermsEnum termsEnum = t == null ? TermsEnum.EMPTY : t.iterator();
return new IntDocValues(this) {
int lastDocID = -1;
@Override
public int intVal(int doc) throws IOException {
if (doc < lastDocID) {
throw new IllegalArgumentException("docs were sent out-of-order: lastDocID=" + lastDocID + " vs docID=" + doc);
}
lastDocID = doc;
int curDocID = terms.docID();
if (doc > curDocID) {
curDocID = terms.advance(doc);
}
if (doc == curDocID) {
BytesRef term = terms.binaryValue();
if (termsEnum.seekExact(term)) {
return termsEnum.docFreq();
}
}
return 0;
}
};
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
if (facetFields.contains(field)) {
final OrdinalsReader ordsReader = getOrdinalsReader(field);
return new OrdinalMappingBinaryDocValues(ordsReader.getReader(in.getContext()), in.getBinaryDocValues(field));
} else {
return in.getBinaryDocValues(field);
}
}
private void verifyResults(Directory indexDir, Directory taxoDir) throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = newSearcher(indexReader);
FacetsCollector collector = new FacetsCollector();
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, collector);
// tag facets
Facets tagFacets = new FastTaxonomyFacetCounts("$tags", taxoReader, facetConfig, collector);
FacetResult result = tagFacets.getTopChildren(10, "tag");
for (LabelAndValue lv: result.labelValues) {
if (VERBOSE) {
System.out.println(lv);
}
assertEquals(NUM_DOCS, lv.value.intValue());
}
// id facets
Facets idFacets = new FastTaxonomyFacetCounts(taxoReader, facetConfig, collector);
FacetResult idResult = idFacets.getTopChildren(10, "id");
assertEquals(NUM_DOCS, idResult.childCount);
assertEquals(NUM_DOCS * 2, idResult.value); // each "id" appears twice
BinaryDocValues bdv = MultiDocValues.getBinaryValues(indexReader, "bdv");
BinaryDocValues cbdv = MultiDocValues.getBinaryValues(indexReader, "cbdv");
for (int i = 0; i < indexReader.maxDoc(); i++) {
assertEquals(i, bdv.nextDoc());
assertEquals(i, cbdv.nextDoc());
assertEquals(Integer.parseInt(cbdv.binaryValue().utf8ToString()), Integer.parseInt(bdv.binaryValue().utf8ToString())*2);
}
IOUtils.close(indexReader, taxoReader);
}
static GenericTermsCollector createCollectorSV(Function<BinaryDocValues> svFunction,
ScoreMode mode) {
switch (mode) {
case None:
return wrap(new TermsCollector.SV(svFunction));
case Avg:
return new SV.Avg(svFunction);
default:
return new SV(svFunction, mode);
}
}
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
assert field.getDocValuesType() == DocValuesType.BINARY;
BinaryDocValues values = in.getBinary(field);
assert values != null;
return new AssertingLeafReader.AssertingBinaryDocValues(values, maxDoc);
}
private synchronized void init() {
caches = new HashMap<>(6);
caches.put(Long.TYPE, new LongCache(this));
caches.put(BinaryDocValues.class, new BinaryDocValuesCache(this));
caches.put(SortedDocValues.class, new SortedDocValuesCache(this));
caches.put(DocTermOrds.class, new DocTermOrdsCache(this));
caches.put(DocsWithFieldCache.class, new DocsWithFieldCache(this));
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
BinaryDocValues values = in.getBinaryDocValues(field);
if (values != null) {
return values;
}
Type v = getType(field);
if (v == Type.BINARY) {
return FieldCache.DEFAULT.getTerms(in, field);
} else {
return null;
}
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
if (insaneField.equals(field)) {
return null;
} else {
return in.getBinaryDocValues(field);
}
}
private BytesRef getBytesRef(BinaryDocValues bdv, int docId)
throws IOException {
if (bdv != null && bdv.advance(docId) == docId) {
// if (bdv != null && bdv.docID() < docId && bdv.advance(docId) == docId) {
// if (bdv != null && bdv.advanceExact(docId)) {
return bdv.binaryValue();
}
return new BytesRef(BytesRef.EMPTY_BYTES);
}
private TreeSet<CachingSimpleResult> getReRankedResults(
Iterator<Integer> docIterator, BinaryDocValues binaryValues,
GlobalFeature queryFeature, GlobalFeature tmpFeature,
int maximumHits, IndexSearcher searcher) throws IOException {
TreeSet<CachingSimpleResult> resultScoreDocs = new TreeSet<>();
double maxDistance = -1f;
double tmpScore;
BytesRef bytesRef;
CachingSimpleResult tmpResult;
while (docIterator.hasNext()) {
// using DocValues to retrieve the field values ...
int doc = docIterator.next();
// bytesRef = binaryValues.get(doc);
bytesRef = getBytesRef(binaryValues, doc);
tmpFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length);
// Getting the document from the index.
// This is the slow step based on the field compression of stored fields.
// tmpFeature.setByteArrayRepresentation(d.getBinaryValue(name).bytes, d.getBinaryValue(name).offset, d.getBinaryValue(name).length);
tmpScore = queryFeature.getDistance(tmpFeature);
if (resultScoreDocs.size() < maximumHits) {
resultScoreDocs.add(new CachingSimpleResult(tmpScore, searcher.doc(doc), doc));
maxDistance = resultScoreDocs.last().getDistance();
} else if (tmpScore < maxDistance) {
// if it is nearer to the sample than at least one of the current set:
// remove the last one ...
tmpResult = resultScoreDocs.last();
resultScoreDocs.remove(tmpResult);
// set it with new values and re-insert.
tmpResult.set(tmpScore, searcher.doc(doc), doc);
resultScoreDocs.add(tmpResult);
// and set our new distance border ...
maxDistance = resultScoreDocs.last().getDistance();
}
}
return resultScoreDocs;
}
private BytesRef getBytesRef(BinaryDocValues bdv, int docId)
throws IOException {
if (bdv != null && bdv.advance(docId) == docId) {
// if (bdv != null && bdv.docID() < docId && bdv.advance(docId) == docId) {
// if (bdv != null && bdv.advanceExact(docId)) {
return bdv.binaryValue();
}
return new BytesRef(BytesRef.EMPTY_BYTES);
}
DiskDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec,
String metaExtension) throws IOException {
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
IndexInput in = state.directory.openInput(metaName, state.context);
boolean success = false;
try {
CodecUtil.checkHeader(in, metaCodec, DiskDocValuesFormat.VERSION_START, DiskDocValuesFormat.VERSION_START);
numerics = new ConcurrentHashMap<Integer, NumericEntry>();
ords = new ConcurrentHashMap<Integer, NumericEntry>();
ordIndexes = new ConcurrentHashMap<Integer, NumericEntry>();
binaries = new ConcurrentHashMap<Integer, BinaryEntry>();
_binaryDocValuesCache = new ConcurrentHashMap<Integer, BinaryDocValues>();
_numericDocValuesCache = new ConcurrentHashMap<Integer, NumericDocValues>();
_sortedDocValuesCache = new ConcurrentHashMap<Integer, SortedDocValues>();
_sortedSetDocValuesCache = new ConcurrentHashMap<Integer, SortedSetDocValues>();
readFields(in, state.fieldInfos);
success = true;
} finally {
if (success) {
IOUtils.close(in);
} else {
IOUtils.closeWhileHandlingException(in);
}
}
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
data = state.directory.openInput(dataName, state.context);
CodecUtil.checkHeader(data, dataCodec, DiskDocValuesFormat.VERSION_START, DiskDocValuesFormat.VERSION_START);
}
private BinaryDocValues newBinary(FieldInfo field) throws IOException {
BinaryEntry bytes = binaries.get(field.number);
if (bytes.minLength == bytes.maxLength) {
return getFixedBinary(field, bytes);
} else {
return getVariableBinary(field, bytes);
}
}
private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
final IndexInput data = this.data.clone();
return new LongBinaryDocValues() {
private final ThreadValue<IndexInput> in = new ThreadValue<IndexInput>() {
@Override
protected IndexInput initialValue() {
return data.clone();
}
};
@Override
public void get(long id, BytesRef result) {
long address = bytes.offset + id * bytes.maxLength;
try {
IndexInput indexInput = in.get();
indexInput.seek(address);
// NOTE: we could have one buffer, but various consumers (e.g.
// FieldComparatorSource)
// assume "they" own the bytes after calling this!
final byte[] buffer = new byte[bytes.maxLength];
indexInput.readBytes(buffer, 0, buffer.length);
result.bytes = buffer;
result.offset = 0;
result.length = buffer.length;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
}
private SortedDocValues newSortedDocValues(FieldInfo field) throws IOException {
final int valueCount = (int) binaries.get(field.number).count;
final BinaryDocValues binary = getBinary(field);
Tracer trace = Trace.trace("getSorted - BlockPackedReader - create");
final BlockPackedReader ordinals;
try {
NumericEntry entry = ords.get(field.number);
IndexInput data = this.data.clone();
data.seek(entry.offset);
ordinals = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
} finally {
trace.done();
}
return new SortedDocValues() {
@Override
public int getOrd(int docID) {
return (int) ordinals.get(docID);
}
@Override
public void lookupOrd(int ord, BytesRef result) {
binary.get(ord, result);
}
@Override
public int getValueCount() {
return valueCount;
}
};
}
@Override
@SuppressWarnings("rawtypes")
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final BinaryDocValues joinValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), joinField, false);
return new DoubleDocValues(this) {
@Override
public double doubleVal(int doc) {
BytesRef joinValue = joinValues.get(doc);
if (joinValue == null) {
throw new RuntimeException("No such doc: " + doc);
}
Object result = results.getResult(joinValue.utf8ToString());
if (result == null) {
return defaultValue;
}
if (result instanceof Iterable) {
Double max = null;
for (Object object : (Iterable)result) {
if (object != null) {
double value = getValue(object);
if (max == null || value > max) {
max = value;
}
}
}
return max != null ? max : defaultValue;
} else {
return getValue(result);
}
}
};
}