下面列出了org.apache.lucene.index.IndexableField#tokenStream ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public void prepare(PercolateContext context, ParsedDocument parsedDocument) {
MemoryIndex memoryIndex = cache.get();
for (IndexableField field : parsedDocument.rootDoc().getFields()) {
if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
continue;
}
try {
Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer();
// TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
// like the indexer does
try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
if (tokenStream != null) {
memoryIndex.addField(field.name(), tokenStream, field.boost());
}
}
} catch (Exception e) {
throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e);
}
}
context.initialize(new DocEngineSearcher(memoryIndex), parsedDocument);
}
@Override
public int doLogic() throws Exception {
List<IndexableField> fields = doc.getFields();
Analyzer analyzer = getRunData().getAnalyzer();
int tokenCount = 0;
for(final IndexableField field : fields) {
if (field.fieldType().indexOptions() == IndexOptions.NONE ||
field.fieldType().tokenized() == false) {
continue;
}
final TokenStream stream = field.tokenStream(analyzer, null);
// reset the TokenStream to the first token
stream.reset();
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
while(stream.incrementToken()) {
termAtt.getBytesRef();
tokenCount++;
}
stream.end();
stream.close();
}
totalTokenCount += tokenCount;
return tokenCount;
}
/**
* This methods performs the analysis for the seed document and extract the boosts if present.
* This is done only one time for the Seed Document.
*
* @param inputDocument the seed unseen document
* @param fieldName2tokensArray a map that associated to a field name the list of token arrays for all its values
* @param fieldName2boost a map that associates the boost to the field
* @throws IOException If there is a low-level I/O error
*/
private void analyzeSeedDocument(Document inputDocument, Map<String, List<String[]>> fieldName2tokensArray, Map<String, Float> fieldName2boost) throws IOException {
for (int i = 0; i < textFieldNames.length; i++) {
String fieldName = textFieldNames[i];
float boost = 1;
List<String[]> tokenizedValues = new LinkedList<>();
if (fieldName.contains("^")) {
String[] field2boost = fieldName.split("\\^");
fieldName = field2boost[0];
boost = Float.parseFloat(field2boost[1]);
}
IndexableField[] fieldValues = inputDocument.getFields(fieldName);
for (IndexableField fieldValue : fieldValues) {
TokenStream fieldTokens = fieldValue.tokenStream(field2analyzer.get(fieldName), null);
String[] fieldTokensArray = getTokenArray(fieldTokens);
tokenizedValues.add(fieldTokensArray);
}
fieldName2tokensArray.put(fieldName, tokenizedValues);
fieldName2boost.put(fieldName, boost);
textFieldNames[i] = fieldName;
}
}