org.apache.lucene.search.SimpleCollector#org.apache.lucene.document.FieldType源码实例Demo

下面列出了org.apache.lucene.search.SimpleCollector#org.apache.lucene.document.FieldType 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: lucene-solr   文件: TestBlockPostingsFormat2.java
private Document newDocument() {
  Document doc = new Document();
  for (IndexOptions option : IndexOptions.values()) {
    if (option == IndexOptions.NONE) {
      continue;
    }
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    // turn on tvs for a cross-check, since we rely upon checkindex in this test (for now)
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorPayloads(true);
    ft.setIndexOptions(option);
    doc.add(new Field(option.toString(), "", ft));
  }
  return doc;
}
 
源代码2 项目: Elasticsearch   文件: FieldMapper.java
public static String termVectorOptionsToString(FieldType fieldType) {
    if (!fieldType.storeTermVectors()) {
        return "no";
    } else if (!fieldType.storeTermVectorOffsets() && !fieldType.storeTermVectorPositions()) {
        return "yes";
    } else if (fieldType.storeTermVectorOffsets() && !fieldType.storeTermVectorPositions()) {
        return "with_offsets";
    } else {
        StringBuilder builder = new StringBuilder("with");
        if (fieldType.storeTermVectorPositions()) {
            builder.append("_positions");
        }
        if (fieldType.storeTermVectorOffsets()) {
            builder.append("_offsets");
        }
        if (fieldType.storeTermVectorPayloads()) {
            builder.append("_payloads");
        }
        return builder.toString();
    }
}
 
源代码3 项目: lucene-solr   文件: TestTermVectorsWriter.java
public void testNoAbortOnBadTVSettings() throws Exception {
  Directory dir = newDirectory();
  // Don't use RandomIndexWriter because we want to be sure both docs go to 1 seg:
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  IndexWriter iw = new IndexWriter(dir, iwc);

  Document doc = new Document();
  iw.addDocument(doc);
  FieldType ft = new FieldType(StoredField.TYPE);
  ft.setStoreTermVectors(true);
  ft.freeze();
  doc.add(new Field("field", "value", ft));

  expectThrows(IllegalArgumentException.class, () -> {
    iw.addDocument(doc);
  });

  IndexReader r = DirectoryReader.open(iw);

  // Make sure the exc didn't lose our first document:
  assertEquals(1, r.numDocs());
  iw.close();
  r.close();
  dir.close();
}
 
源代码4 项目: semanticvectors   文件: FilePositionDoc.java
public static Document Document(File f)
     throws java.io.FileNotFoundException {
  Document doc = new Document();
  doc.add(new StoredField("path", f.getPath()));
  doc.add(new StoredField("modified",
                    DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE)));
  
  //create new FieldType to store term positions (TextField is not sufficiently configurable)
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  ft.setTokenized(true);
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorPositions(true);
  Field contentsField = new Field("contents", new FileReader(f), ft);

  doc.add(contentsField);
  return doc;
}
 
源代码5 项目: lucene-solr   文件: TestDocValuesIndexing.java
public void testExcIndexingDocBeforeDocValues() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  IndexWriter w = new IndexWriter(dir, iwc);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setDocValuesType(DocValuesType.SORTED);
  ft.freeze();
  Field field = new Field("test", "value", ft);
  field.setTokenStream(new TokenStream() {
      @Override
      public boolean incrementToken() {
        throw new RuntimeException("no");
      }
    });
  doc.add(field);
  expectThrows(RuntimeException.class, () -> {
    w.addDocument(doc);
  });

  w.addDocument(new Document());
  w.close();
  dir.close();
}
 
源代码6 项目: lucene-solr   文件: TestSloppyPhraseQuery.java
public void testInfiniteFreq1() throws Exception {
  String document = "drug druggy drug drug drug";
  
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  doc.add(newField("lyrics", document, new FieldType(TextField.TYPE_NOT_STORED)));
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  
  IndexSearcher is = newSearcher(ir);
  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.add(new Term("lyrics", "drug"), 1);
  builder.add(new Term("lyrics", "drug"), 3);
  builder.setSlop(1);
  PhraseQuery pq = builder.build();
  // "drug the drug"~1
  assertSaneScoring(pq, is);
  ir.close();
  dir.close();
}
 
源代码7 项目: lucene-solr   文件: TestCustomTermFreq.java
public void testFieldInvertState() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
  IndexWriter w = new IndexWriter(dir, iwc);

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  w.addDocument(doc);
  FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
  assertEquals(228, fis.getMaxTermFrequency());
  assertEquals(2, fis.getUniqueTermCount());
  assertEquals(0, fis.getNumOverlap());
  assertEquals(287, fis.getLength());

  IOUtils.close(w, dir);
}
 
源代码8 项目: lucene-solr   文件: TestPostingsOffsets.java
public void testLegalbutVeryLargeOffsets() throws Exception {
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
  Document doc = new Document();
  Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
  if (random().nextBoolean()) {
    t1.setPayload(new BytesRef("test"));
  }
  Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
  TokenStream tokenStream = new CannedTokenStream(
      new Token[] { t1, t2 }
  );
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  // store some term vectors for the checkindex cross-check
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorPositions(true);
  ft.setStoreTermVectorOffsets(true);
  Field field = new Field("foo", tokenStream, ft);
  doc.add(field);
  iw.addDocument(doc);
  iw.close();
  dir.close();
}
 
private IndexReader indexSomeFields() throws IOException {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.NONE);
  ft.setTokenized(false);
  ft.setStored(true);
  ft.freeze();

  Field title = new Field("title", "", fieldType);
  Field text = new Field("text", "", fieldType);
  Field category = new Field("category", "", fieldType);

  Document doc = new Document();
  doc.add(title);
  doc.add(text);
  doc.add(category);
  title.setStringValue("This is the title field.");
  text.setStringValue("This is the text field. You can put some text if you want.");
  category.setStringValue("This is the category field.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();
  return ir;
}
 
源代码10 项目: lucene-solr   文件: TestUnifiedHighlighter.java
private IndexReader indexSomeFields() throws IOException {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.NONE);
  ft.setTokenized(false);
  ft.setStored(true);
  ft.freeze();

  Field title = new Field("title", "", fieldType);
  Field text = new Field("text", "", fieldType);
  Field category = new Field("category", "", fieldType);

  Document doc = new Document();
  doc.add(title);
  doc.add(text);
  doc.add(category);
  title.setStringValue("This is the title field.");
  text.setStringValue("This is the text field. You can put some text if you want.");
  category.setStringValue("This is the category field.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();
  return ir;
}
 
源代码11 项目: lucene-solr   文件: DocMaker.java
public DocState(boolean reuseFields, FieldType ft, FieldType bodyFt) {

      this.reuseFields = reuseFields;
      
      if (reuseFields) {
        fields =  new HashMap<>();
        numericFields = new HashMap<>();
        
        // Initialize the map with the default fields.
        fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyFt));
        fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", ft));
        fields.put(DATE_FIELD, new Field(DATE_FIELD, "", ft));
        fields.put(ID_FIELD, new StringField(ID_FIELD, "", Field.Store.YES));
        fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft));

        numericFields.put(DATE_MSEC_FIELD, new LongPoint(DATE_MSEC_FIELD, 0L));
        numericFields.put(TIME_SEC_FIELD, new IntPoint(TIME_SEC_FIELD, 0));
        
        doc = new Document();
      } else {
        numericFields = null;
        fields = null;
        doc = null;
      }
    }
 
private Document newGeoDocument(OIdentifiable oIdentifiable, Shape shape) {

    FieldType ft = new FieldType();
    ft.setIndexed(true);
    ft.setStored(true);

    Document doc = new Document();

    doc.add(OLuceneIndexType.createField(RID, oIdentifiable.getIdentity().toString(), Field.Store.YES,
        Field.Index.NOT_ANALYZED_NO_NORMS));
    for (IndexableField f : strategy.createIndexableFields(shape)) {
      doc.add(f);
    }

    doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));

    return doc;
  }
 
源代码13 项目: lucene-solr   文件: TestMemoryIndex.java
public void testIndexingPointsAndDocValues() throws Exception {
  FieldType type = new FieldType();
  type.setDimensions(1, 4);
  type.setDocValuesType(DocValuesType.BINARY);
  type.freeze();
  Document doc = new Document();
  byte[] packedPoint = "term".getBytes(StandardCharsets.UTF_8);
  doc.add(new BinaryPoint("field", packedPoint, type));
  MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
  LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();

  assertEquals(1, leafReader.getPointValues("field").size());
  assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMinPackedValue());
  assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMaxPackedValue());

  BinaryDocValues dvs = leafReader.getBinaryDocValues("field");
  assertEquals(0, dvs.nextDoc());
  assertEquals("term", dvs.binaryValue().utf8ToString());
}
 
源代码14 项目: lucene-solr   文件: AnalyzingInfixSuggester.java
private Document buildDocument(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException {
  String textString = text.utf8ToString();
  Document doc = new Document();
  FieldType ft = getTextFieldType();
  doc.add(new Field(TEXT_FIELD_NAME, textString, ft));
  if (minPrefixChars>0) {
    doc.add(new Field(TEXTGRAMS_FIELD_NAME, textString, ft));
  }
  doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO));
  doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text));
  doc.add(new NumericDocValuesField("weight", weight));
  if (payload != null) {
    doc.add(new BinaryDocValuesField("payloads", payload));
  }
  if (contexts != null) {
    for(BytesRef context : contexts) {
      doc.add(new StringField(CONTEXTS_FIELD_NAME, context, Field.Store.NO));
      doc.add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context));
    }
  }
  return doc;
}
 
源代码15 项目: lucene-solr   文件: TestTermVectorsReader.java
public void testIllegalVectorPositionsWithoutIndexed() throws Exception {
  Directory dir = newDirectory();
  MockAnalyzer a = new MockAnalyzer(random());
  a.setEnableChecks(false);
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, a);
  FieldType ft = new FieldType(StoredField.TYPE);
  ft.setStoreTermVectorPositions(true);
  Document doc = new Document();
  doc.add(new Field("field", "value", ft));
  
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    w.addDocument(doc);
  });
  assertEquals("cannot store term vector positions for a field that is not indexed (field=\"field\")", expected.getMessage());
  
  w.close();
  dir.close();
}
 
源代码16 项目: lucene-solr   文件: DocHelper.java
public static Document createDocument(int n, String indexName, int numFields) {
  StringBuilder sb = new StringBuilder();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);

  FieldType customType1 = new FieldType(StringField.TYPE_STORED);
  customType1.setStoreTermVectors(true);
  customType1.setStoreTermVectorPositions(true);
  customType1.setStoreTermVectorOffsets(true);

  final Document doc = new Document();
  doc.add(new Field("id", Integer.toString(n), customType1));
  doc.add(new Field("indexname", indexName, customType1));
  sb.append("a");
  sb.append(n);
  doc.add(new Field("field1", sb.toString(), customType));
  sb.append(" b");
  sb.append(n);
  for (int i = 1; i < numFields; i++) {
    doc.add(new Field("field" + (i + 1), sb.toString(), customType));
  }
  return doc;
}
 
private void addDocumentBlock(int id, int count, IndexWriter writer) throws IOException {
  FieldType fieldType = new FieldType();
  fieldType.setIndexed(true);
  fieldType.setOmitNorms(true);
  fieldType.setTokenized(false);
  fieldType.setStored(true);

  FieldType fieldTypeNoIndex = new FieldType();
  fieldTypeNoIndex.setStored(true);
  fieldTypeNoIndex.setIndexed(false);

  for (int i = 0; i < count; i++) {
    Document document = new Document();
    document.add(new Field("id", Integer.toString(id), fieldType));
    document.add(new Field("field", Integer.toString(i), fieldType));
    for (int j = 0; j < 100; j++) {
      document.add(new Field("field" + j, "testing here testing here testing here testing here testing here testing here testing here", fieldTypeNoIndex));
    }
    writer.addDocument(document);
  }
}
 
@Override
public void configure(String fieldNameForThisInstance, Map<String, String> properties, Configuration configuration) {
  String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP);
  if (precisionStepStr != null) {
    _precisionStep = Integer.parseInt(precisionStepStr);
    _typeStored = new FieldType(LongField.TYPE_STORED);
    _typeStored.setNumericPrecisionStep(_precisionStep);
    _typeStored.freeze();
    _typeNotStored = new FieldType(LongField.TYPE_NOT_STORED);
    _typeNotStored.setNumericPrecisionStep(_precisionStep);
    _typeNotStored.freeze();
  } else {
    _typeStored = LongField.TYPE_STORED;
    _typeNotStored = LongField.TYPE_NOT_STORED;
  }
}
 
源代码19 项目: lucene-solr   文件: PresearcherTestBase.java
public void testNonStringTermHandling() throws IOException {

    FieldType ft = new FieldType();
    ft.setTokenized(true);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);

    try (Monitor monitor = newMonitor()) {
      monitor.register(new MonitorQuery("1", new TermQuery(new Term("f", NON_STRING_TERM))));

      Document doc = new Document();
      doc.add(new Field("f", new NonStringTokenStream(), ft));
      MatchingQueries<QueryMatch> m = monitor.match(doc, QueryMatch.SIMPLE_MATCHER);
      assertEquals(1, m.getMatchCount());
      assertEquals(1, m.getQueriesRun());
    }

  }
 
源代码20 项目: jstarcraft-core   文件: StringIndexConverter.java
@Override
public Iterable<IndexableField> convert(LuceneContext context, String path, Field field, LuceneIndex annotation, Type type, Object data) {
    Collection<IndexableField> indexables = new LinkedList<>();
    FieldType configuration = new FieldType();
    configuration.setIndexOptions(IndexOptions.DOCS);
    if (annotation.analyze()) {
        configuration.setTokenized(true);

        LuceneTerm negative = annotation.negative();
        if (negative.offset()) {
            configuration.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        } else if (negative.position()) {
            configuration.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        } else if (negative.frequency()) {
            configuration.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
        }

        LuceneTerm positive = annotation.positive();
        if (positive.offset()) {
            configuration.setStoreTermVectorOffsets(true);
        }
        if (positive.position()) {
            configuration.setStoreTermVectorPositions(true);
        }
        if (positive.frequency()) {
            configuration.setStoreTermVectors(true);
        }
    }
    indexables.add(new org.apache.lucene.document.Field(path, (String) data, configuration));
    return indexables;
}
 
源代码21 项目: Elasticsearch   文件: GeoPointFieldMapper.java
@Override
public GeoPointFieldMapper build(BuilderContext context, String simpleName, MappedFieldType fieldType,
                                 MappedFieldType defaultFieldType, Settings indexSettings, ContentPath.Type pathType, DoubleFieldMapper latMapper,
                                 DoubleFieldMapper lonMapper, StringFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
                                 CopyTo copyTo) {
    fieldType.setTokenized(false);
    if (context.indexCreatedVersion().before(Version.V_2_3_0)) {
        fieldType.setNumericPrecisionStep(GeoPointField.PRECISION_STEP);
        fieldType.setNumericType(FieldType.NumericType.LONG);
    }
    setupFieldType(context);
    return new GeoPointFieldMapper(simpleName, fieldType, defaultFieldType, indexSettings, pathType, latMapper, lonMapper,
            geoHashMapper, multiFields, ignoreMalformed, copyTo);
}
 
源代码22 项目: lucene-solr   文件: TestCustomTermFreq.java
public void testInvalidProx() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
  assertEquals("field \"field\": cannot index positions while using custom TermFrequencyAttribute", e.getMessage());
  IOUtils.close(w, dir);
}
 
源代码23 项目: lucene-solr   文件: TestSloppyPhraseQuery.java
public void testSlopWithHoles() throws Exception {  
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setOmitNorms(true);
  Field f = new Field("lyrics", "", customType);
  Document doc = new Document();
  doc.add(f);
  f.setStringValue("drug drug");
  iw.addDocument(doc);
  f.setStringValue("drug druggy drug");
  iw.addDocument(doc);
  f.setStringValue("drug druggy druggy drug");
  iw.addDocument(doc);
  f.setStringValue("drug druggy drug druggy drug");
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);

  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.add(new Term("lyrics", "drug"), 1);
  builder.add(new Term("lyrics", "drug"), 4);
  PhraseQuery pq = builder.build();
  // "drug the drug"~1
  assertEquals(1, is.search(pq, 4).totalHits.value);
  builder.setSlop(1);
  pq = builder.build();
  assertEquals(3, is.search(pq, 4).totalHits.value);
  builder.setSlop(2);
  pq = builder.build();
  assertEquals(4, is.search(pq, 4).totalHits.value);
  ir.close();
  dir.close();
}
 
源代码24 项目: lucene-solr   文件: FastVectorHighlighterTest.java
public void testSimpleHighlightTest() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);
  
  doc.add(field);
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  
  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  FieldQuery fieldQuery  = highlighter.getFieldQuery( new TermQuery(new Term("field", "foo")), reader );
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  // highlighted results are centered 
  assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
  assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
  assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
  reader.close();
  writer.close();
  dir.close();
}
 
源代码25 项目: lucene-solr   文件: FastVectorHighlighterTest.java
public void testFunctionScoreQueryHighlight() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);

  doc.add(field);
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();

  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  FieldQuery fieldQuery  = highlighter.getFieldQuery( new FunctionScoreQuery(new TermQuery(new Term("field", "foo")), DoubleValuesSource.constant(1)), reader );
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  // highlighted results are centered
  assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
  assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
  assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
  reader.close();
  writer.close();
  dir.close();
}
 
源代码26 项目: lucene-solr   文件: TestTermVectorsWriter.java
public void testEndOffsetPositionCharAnalyzer() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  Field f = newField("field", "abcd   ", customType);
  doc.add(f);
  doc.add(f);
  w.addDocument(doc);
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator();
  assertNotNull(termsEnum.next());
  PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
  assertEquals(2, termsEnum.totalTermFreq());

  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(0, dpEnum.startOffset());
  assertEquals(4, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(8, dpEnum.startOffset());
  assertEquals(12, dpEnum.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

  r.close();
  dir.close();
}
 
源代码27 项目: lucene-solr   文件: HighlighterPhraseTest.java
public void testConcurrentPhrase() throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox jumped";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectors(true);
    document.add(new Field(FIELD, new TokenStreamConcurrent(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    final PhraseQuery phraseQuery = new PhraseQuery(FIELD, "fox", "jumped");
    TopDocs hits = indexSearcher.search(phraseQuery, 1);
    assertEquals(1, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(phraseQuery));

    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(),
        TEXT), highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
源代码28 项目: lucene-solr   文件: HighlighterPhraseTest.java
public void testSparsePhraseWithNoPositions() throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  try {
    final Document document = new Document();

    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectors(true);
    document.add(new Field(FIELD, TEXT, customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    final PhraseQuery phraseQuery = new PhraseQuery(1, FIELD, "did", "jump");
    TopDocs hits = indexSearcher.search(phraseQuery, 1);
    assertEquals(1, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(phraseQuery));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals("the fox <B>did</B> not <B>jump</B>", highlighter
        .getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
源代码29 项目: lucene-solr   文件: SolrDocumentFetcher.java
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
  Predicate<String> readAsBytes = ResultContext.READASBYTES.get();
  if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) {
    final FieldType ft = new FieldType(TextField.TYPE_STORED);
    ft.setStoreTermVectors(fieldInfo.hasVectors());
    ft.setOmitNorms(fieldInfo.omitsNorms());
    ft.setIndexOptions(fieldInfo.getIndexOptions());
    Objects.requireNonNull(value, "String value should not be null");
    doc.add(new StoredField(fieldInfo.name, value, ft));
  } else {
    super.stringField(fieldInfo, value);
  }

}
 
源代码30 项目: mmseg4j-solr   文件: UseLucene.java
private Document createDoc(int id) {
	Document doc = new Document();
	FieldType ft = new FieldType();
	ft.setTokenized(true);
	ft.setStored(true);
	ft.setIndexOptions(IndexOptions.DOCS);
	doc.add(new Field("id", "" + id, ft));

	FieldType ft2 = new FieldType();
	ft2.setTokenized(true);
	ft.setStored(true);
	ft2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
	doc.add(new Field("name", "echo ensh id " + id, ft2));
	return doc;
}