下面列出了org.apache.lucene.index.IndexOptions#DOCS 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public void close() throws IOException {
if (out != null) {
try {
final long dirStart = out.getFilePointer();
out.writeVInt(fields.size());
for(FieldMetaData field : fields) {
out.writeVInt(field.fieldInfo.number);
out.writeVLong(field.numTerms);
out.writeVLong(field.termsStartPointer);
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
out.writeVLong(field.sumTotalTermFreq);
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
}
writeTrailer(dirStart);
CodecUtil.writeFooter(out);
} finally {
IOUtils.close(out, postingsWriter, termsIndexWriter);
out = null;
}
}
}
private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException {
switch (b) {
case 0:
return IndexOptions.NONE;
case 1:
return IndexOptions.DOCS;
case 2:
return IndexOptions.DOCS_AND_FREQS;
case 3:
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
case 4:
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
default:
// BUG
throw new CorruptIndexException("invalid IndexOptions byte: " + b, input);
}
}
private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException {
switch (b) {
case 0:
return IndexOptions.NONE;
case 1:
return IndexOptions.DOCS;
case 2:
return IndexOptions.DOCS_AND_FREQS;
case 3:
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
case 4:
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
default:
// BUG
throw new CorruptIndexException("invalid IndexOptions byte: " + b, input);
}
}
@Override
public IndexOptions indexOptions() {
if (!indexed()) {
return IndexOptions.NONE;
}
IndexOptions options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
if (omitTermFreqAndPositions()) {
options = IndexOptions.DOCS;
} else if (omitPositions()) {
options = IndexOptions.DOCS_AND_FREQS;
} else if (storeOffsetsWithPositions()) {
options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
}
return options;
}
/**
* Utility method to create a {@link org.apache.lucene.document.FieldType}
* based on the {@link SchemaField}
*/
public static org.apache.lucene.document.FieldType createFieldType(SchemaField field) {
if (!field.indexed() && !field.stored()) {
log.trace("Ignoring unindexed/unstored field: {}", field);
return null;
}
org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
newType.setTokenized(field.isTokenized());
newType.setStored(field.stored());
newType.setOmitNorms(field.omitNorms());
IndexOptions options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
if (field.omitTermFreqAndPositions()) {
options = IndexOptions.DOCS;
} else if (field.omitPositions()) {
options = IndexOptions.DOCS_AND_FREQS;
} else if (field.storeOffsetsWithPositions()) {
options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
}
newType.setIndexOptions(options);
newType.setStoreTermVectors(field.storeTermVector());
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
newType.setStoreTermVectorPositions(field.storeTermPositions());
newType.setStoreTermVectorPayloads(field.storeTermPayloads());
return newType;
}
private static IndexOptions nodeIndexOptionValue(final Object propNode) {
final String value = propNode.toString();
if (INDEX_OPTIONS_OFFSETS.equalsIgnoreCase(value)) {
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else if (INDEX_OPTIONS_POSITIONS.equalsIgnoreCase(value)) {
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
} else if (INDEX_OPTIONS_FREQS.equalsIgnoreCase(value)) {
return IndexOptions.DOCS_AND_FREQS;
} else if (INDEX_OPTIONS_DOCS.equalsIgnoreCase(value)) {
return IndexOptions.DOCS;
} else {
throw new ElasticsearchParseException("failed to parse index option [{}]", value);
}
}
public TermVectorLeafReader(String field, Terms terms) {
fields = new Fields() {
@Override
public Iterator<String> iterator() {
return Collections.singletonList(field).iterator();
}
@Override
public Terms terms(String fld) throws IOException {
if (!field.equals(fld)) {
return null;
}
return terms;
}
@Override
public int size() {
return 1;
}
};
IndexOptions indexOptions;
if (!terms.hasFreqs()) {
indexOptions = IndexOptions.DOCS;
} else if (!terms.hasPositions()) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if (!terms.hasOffsets()) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
}
FieldInfo fieldInfo = new FieldInfo(field, 0,
true, true, terms.hasPayloads(),
indexOptions, DocValuesType.NONE, -1, Collections.emptyMap(), 0, 0, 0, false);
fieldInfos = new FieldInfos(new FieldInfo[]{fieldInfo});
}
@Override
public void close() throws IOException {
if (out != null) {
boolean success = false;
try {
// write field summary
final long dirStart = out.getFilePointer();
out.writeVInt(fields.size());
for (FieldMetaData field : fields) {
out.writeVInt(field.fieldInfo.number);
out.writeVLong(field.numTerms);
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
out.writeVLong(field.sumTotalTermFreq);
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
field.dict.save(out, out);
}
writeTrailer(out, dirStart);
CodecUtil.writeFooter(out);
success = true;
} finally {
if (success) {
IOUtils.close(out, postingsWriter);
} else {
IOUtils.closeWhileHandlingException(out, postingsWriter);
}
out = null;
}
}
}
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
this.postingsReader = postingsReader;
final IndexInput in = state.directory.openInput(termsFileName, state.context);
boolean success = false;
try {
CodecUtil.checkIndexHeader(in, FSTTermsWriter.TERMS_CODEC_NAME,
FSTTermsWriter.TERMS_VERSION_START,
FSTTermsWriter.TERMS_VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.checksumEntireFile(in);
this.postingsReader.init(in, state);
seekDir(in);
final FieldInfos fieldInfos = state.fieldInfos;
final int numFields = in.readVInt();
for (int i = 0; i < numFields; i++) {
int fieldNumber = in.readVInt();
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
long numTerms = in.readVLong();
long sumTotalTermFreq = in.readVLong();
// if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
int docCount = in.readVInt();
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
TermsReader previous = fields.put(fieldInfo.name, current);
checkFieldSummary(state.segmentInfo, in, current, previous);
}
success = true;
} finally {
if (success) {
IOUtils.close(in);
} else {
IOUtils.closeWhileHandlingException(in);
}
}
}
FilterFieldInfos(FieldInfos src, Collection<String> filterFields) {
// Copy all the input FieldInfo objects since the field numbering must be kept consistent
super(toArray(src));
boolean hasVectors = false;
boolean hasProx = false;
boolean hasPayloads = false;
boolean hasOffsets = false;
boolean hasFreq = false;
boolean hasNorms = false;
boolean hasDocValues = false;
boolean hasPointValues = false;
this.filteredNames = new HashSet<>(filterFields);
this.filtered = new ArrayList<>(filterFields.size());
for (FieldInfo fi : src) {
if (this.filteredNames.contains(fi.name)) {
this.filtered.add(fi);
hasVectors |= fi.hasVectors();
hasProx |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
hasFreq |= fi.getIndexOptions() != IndexOptions.DOCS;
hasOffsets |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
hasNorms |= fi.hasNorms();
hasDocValues |= fi.getDocValuesType() != DocValuesType.NONE;
hasPayloads |= fi.hasPayloads();
hasPointValues |= (fi.getPointDimensionCount() != 0);
}
}
this.filteredHasVectors = hasVectors;
this.filteredHasProx = hasProx;
this.filteredHasPayloads = hasPayloads;
this.filteredHasOffsets = hasOffsets;
this.filteredHasFreq = hasFreq;
this.filteredHasNorms = hasNorms;
this.filteredHasDocValues = hasDocValues;
this.filteredHasPointValues = hasPointValues;
}
/** Encodes the document length in the same way as {@link BM25Similarity}. */
@Override
public final long computeNorm(FieldInvertState state) {
final int numTerms;
if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
numTerms = state.getUniqueTermCount();
} else if (discountOverlaps) {
numTerms = state.getLength() - state.getNumOverlap();
} else {
numTerms = state.getLength();
}
return SmallFloat.intToByte4(numTerms);
}
@Override
public final long computeNorm(FieldInvertState state) {
final int numTerms;
if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
numTerms = state.getUniqueTermCount();
} else if (discountOverlaps) {
numTerms = state.getLength() - state.getNumOverlap();
} else {
numTerms = state.getLength();
}
return SmallFloat.intToByte4(numTerms);
}
@Override
public final long computeNorm(FieldInvertState state) {
final int numTerms;
if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
numTerms = state.getUniqueTermCount();
} else if (discountOverlaps) {
numTerms = state.getLength() - state.getNumOverlap();
} else {
numTerms = state.getLength();
}
return SmallFloat.intToByte4(numTerms);
}
private static IndexOptions nodeIndexOptionValue(final Object propNode) {
final String value = propNode.toString();
if (INDEX_OPTIONS_OFFSETS.equalsIgnoreCase(value)) {
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else if (INDEX_OPTIONS_POSITIONS.equalsIgnoreCase(value)) {
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
} else if (INDEX_OPTIONS_FREQS.equalsIgnoreCase(value)) {
return IndexOptions.DOCS_AND_FREQS;
} else if (INDEX_OPTIONS_DOCS.equalsIgnoreCase(value)) {
return IndexOptions.DOCS;
} else {
throw new ElasticsearchParseException("failed to parse index option [{}]", value);
}
}
@Override
protected IndexOptions getDefaultIndexOption() {
return IndexOptions.DOCS;
}
/**
* Parse common field attributes such as {@code doc_values} or {@code store}.
*/
public static void parseField(FieldMapper.Builder builder, String name, Map<String, Object> fieldNode, Mapper.TypeParser.ParserContext parserContext) {
Version indexVersionCreated = parserContext.indexVersionCreated();
for (Iterator<Map.Entry<String, Object>> iterator = fieldNode.entrySet().iterator(); iterator.hasNext();) {
Map.Entry<String, Object> entry = iterator.next();
final String propName = Strings.toUnderscoreCase(entry.getKey());
final Object propNode = entry.getValue();
if (propName.equals("index_name") && indexVersionCreated.before(Version.V_2_0_0_beta1)) {
builder.indexName(propNode.toString());
iterator.remove();
} else if (propName.equals("store")) {
builder.store(parseStore(name, propNode.toString()));
iterator.remove();
} else if (propName.equals("index")) {
parseIndex(name, propNode.toString(), builder);
iterator.remove();
} else if (propName.equals(DOC_VALUES)) {
builder.docValues(nodeBooleanValue(propNode));
iterator.remove();
} else if (propName.equals("boost")) {
builder.boost(nodeFloatValue(propNode));
iterator.remove();
} else if (propName.equals("omit_norms")) {
builder.omitNorms(nodeBooleanValue(propNode));
iterator.remove();
} else if (propName.equals("norms")) {
final Map<String, Object> properties = nodeMapValue(propNode, "norms");
for (Iterator<Entry<String, Object>> propsIterator = properties.entrySet().iterator(); propsIterator.hasNext();) {
Entry<String, Object> entry2 = propsIterator.next();
final String propName2 = Strings.toUnderscoreCase(entry2.getKey());
final Object propNode2 = entry2.getValue();
if (propName2.equals("enabled")) {
builder.omitNorms(!nodeBooleanValue(propNode2));
propsIterator.remove();
} else if (propName2.equals(Loading.KEY)) {
builder.normsLoading(Loading.parse(nodeStringValue(propNode2, null), null));
propsIterator.remove();
}
}
DocumentMapperParser.checkNoRemainingFields(propName, properties, parserContext.indexVersionCreated());
iterator.remove();
} else if (propName.equals("omit_term_freq_and_positions")) {
final IndexOptions op = nodeBooleanValue(propNode) ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
if (indexVersionCreated.onOrAfter(Version.V_1_0_0_RC2)) {
throw new ElasticsearchParseException("'omit_term_freq_and_positions' is not supported anymore - use ['index_options' : 'docs'] instead");
}
// deprecated option for BW compat
builder.indexOptions(op);
iterator.remove();
} else if (propName.equals("index_options")) {
builder.indexOptions(nodeIndexOptionValue(propNode));
iterator.remove();
} else if (propName.equals("include_in_all")) {
builder.includeInAll(nodeBooleanValue(propNode));
iterator.remove();
} else if (propName.equals("postings_format") && indexVersionCreated.before(Version.V_2_0_0_beta1)) {
// ignore for old indexes
iterator.remove();
} else if (propName.equals("doc_values_format") && indexVersionCreated.before(Version.V_2_0_0_beta1)) {
// ignore for old indexes
iterator.remove();
} else if (propName.equals("similarity")) {
builder.similarity(parserContext.similarityLookupService().similarity(propNode.toString()));
iterator.remove();
} else if (propName.equals("fielddata")) {
final Settings settings = Settings.builder().put(SettingsLoader.Helper.loadNestedFromMap(nodeMapValue(propNode, "fielddata"))).build();
builder.fieldDataSettings(settings);
iterator.remove();
} else if (propName.equals("copy_to")) {
if (parserContext.isWithinMultiField()) {
if (indexVersionCreated.after(Version.V_2_1_0) ||
(indexVersionCreated.after(Version.V_2_0_1) && indexVersionCreated.before(Version.V_2_1_0))) {
throw new MapperParsingException("copy_to in multi fields is not allowed. Found the copy_to in field [" + name + "] which is within a multi field.");
} else {
ESLoggerFactory.getLogger("mapping [" + parserContext.type() + "]").warn("Found a copy_to in field [" + name + "] which is within a multi field. This feature has been removed and the copy_to will be ignored.");
// we still parse this, otherwise the message will only appear once and the copy_to removed. After that it will appear again. Better to have it always.
}
}
parseCopyFields(propNode, builder);
iterator.remove();
}
}
if (indexVersionCreated.before(Version.V_2_2_0)) {
// analyzer, search_analyzer, term_vectors were accepted on all fields
// before 2.2, even though it made little sense
parseAnalyzersAndTermVectors(builder, name, fieldNode, parserContext);
}
}
protected FSTTermOutputs(FieldInfo fieldInfo) {
this.hasPos = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
}
private void decodeMetaData() throws IOException {
//System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termBlockOrd + " state=" + state);
if (!seekPending) {
// TODO: cutover to random-access API
// here.... really stupid that we have to decode N
// wasted term metadata just to get to the N+1th
// that we really need...
// lazily catch up on metadata decode:
final int limit = state.termBlockOrd;
boolean absolute = metaDataUpto == 0;
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
//System.out.println(" decode mdUpto=" + metaDataUpto);
// TODO: we could make "tiers" of metadata, ie,
// decode docFreq/totalTF but don't decode postings
// metadata; this way caller could get
// docFreq/totalTF w/o paying decode cost for
// postings
// TODO: if docFreq were bulk decoded we could
// just skipN here:
// docFreq, totalTermFreq
state.docFreq = freqReader.readVInt();
//System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
state.totalTermFreq = state.docFreq; // all postings have tf=1
} else {
state.totalTermFreq = state.docFreq + freqReader.readVLong();
//System.out.println(" totTF=" + state.totalTermFreq);
}
// metadata
postingsReader.decodeTerm(bytesReader, fieldInfo, state, absolute);
metaDataUpto++;
absolute = false;
}
} else {
//System.out.println(" skip! seekPending");
}
}
@Override
public long totalTermFreq() {
return indexOptions == IndexOptions.DOCS ? docFreq : totalTermFreq;
}