下面列出了org.apache.lucene.index.FieldInfos#getIndexedFields ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Return a query that will return docs like the passed lucene document ID.
*
* @param docNum the documentID of the lucene doc to generate the 'More Like This" query for.
* @return a query that will return docs like the passed lucene document ID.
*/
public Query like(int docNum) throws IOException {
if (fieldNames == null) {
// gather list of valid fields from lucene
Collection<String> fields = FieldInfos.getIndexedFields(ir);
fieldNames = fields.toArray(new String[fields.size()]);
}
return createQuery(retrieveTerms(docNum));
}
/**
*
* @param filteredDocument Document with field values extracted for selected fields.
* @return More Like This query for the passed document.
*/
public Query like(Map<String, Collection<Object>> filteredDocument) throws IOException {
if (fieldNames == null) {
// gather list of valid fields from lucene
Collection<String> fields = FieldInfos.getIndexedFields(ir);
fieldNames = fields.toArray(new String[fields.size()]);
}
return createQuery(retrieveTerms(filteredDocument));
}
public String[] listFields()
{
if(dir==null)
open();
try (IndexReader indexReader = DirectoryReader.open(dir))
{
Collection<String> fields = FieldInfos.getIndexedFields(indexReader);
return fields.toArray(new String[fields.size()]);
} catch (IOException e) {
return new String[0];
}
}
/**
* Test ReadTokensTask
*/
public void testReadTokens() throws Exception {
// We will call ReadTokens on this many docs
final int NUM_DOCS = 20;
// Read tokens from first NUM_DOCS docs from Reuters and
// then build index from the same docs
String algLines1[] = {
"# ----- properties ",
"analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer",
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + getReuters20LinesFile(),
"# ----- alg ",
"{ReadTokens}: " + NUM_DOCS,
"ResetSystemErase",
"CreateIndex",
"{AddDoc}: " + NUM_DOCS,
"CloseIndex",
};
// Run algo
Benchmark benchmark = execBenchmark(algLines1);
List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();
// Count how many tokens all ReadTokens saw
int totalTokenCount1 = 0;
for (final TaskStats stat : stats) {
if (stat.getTask().getName().equals("ReadTokens")) {
totalTokenCount1 += stat.getCount();
}
}
// Separately count how many tokens are actually in the index:
IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory());
assertEquals(NUM_DOCS, reader.numDocs());
int totalTokenCount2 = 0;
Collection<String> fields = FieldInfos.getIndexedFields(reader);
for (String fieldName : fields) {
if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
continue;
}
Terms terms = MultiTerms.getTerms(reader, fieldName);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator();
PostingsEnum docs = null;
while(termsEnum.next() != null) {
docs = TestUtil.docs(random(), termsEnum, docs, PostingsEnum.FREQS);
while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
totalTokenCount2 += docs.freq();
}
}
}
reader.close();
// Make sure they are the same
assertEquals(totalTokenCount1, totalTokenCount2);
}
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
* indexed fields from terms with a document frequency greater than the given
* maxDocFreq
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param maxDocFreq Document frequency terms should be above in order to be stopwords
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
int maxDocFreq) throws IOException {
this(delegate, indexReader, FieldInfos.getIndexedFields(indexReader), maxDocFreq);
}
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
* indexed fields from terms with a document frequency percentage greater than
* the given maxPercentDocs
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
* contain a term, after which the word is considered to be a stop word
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
float maxPercentDocs) throws IOException {
this(delegate, indexReader, FieldInfos.getIndexedFields(indexReader), maxPercentDocs);
}