org.apache.lucene.search.AutomatonQuery#org.apache.lucene.util.automaton.Automata源码实例Demo

下面列出了org.apache.lucene.search.AutomatonQuery#org.apache.lucene.util.automaton.Automata 实例代码，或者点击链接到github查看源代码，也可以在右侧发表评论。

源代码1 项目： Elasticsearch 文件： IncludeExclude.java

private Automaton toAutomaton() {
    Automaton a = null;
    if (include != null) {
        a = include.toAutomaton();
    } else if (includeValues != null) {
        a = Automata.makeStringUnion(includeValues);
    } else {
        a = Automata.makeAnyString();
    }
    if (exclude != null) {
        a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    } else if (excludeValues != null) {
        a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    }
    return a;
}

源代码2 项目： Elasticsearch 文件： ContextMapping.java

/**
 * Create a automaton for a given context query this automaton will be used
 * to find the matching paths with the fst
 *
 * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query
 * @param queries list of {@link ContextQuery} defining the lookup context
 *
 * @return Automaton matching the given Query
 */
public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
    Automaton a = Automata.makeEmptyString();

    Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR);
    if (preserveSep) {
        // if separators are preserved the fst contains a SEP_LABEL
        // behind each gap. To have a matching automaton, we need to
        // include the SEP_LABEL in the query as well
        gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL));
    }

    for (ContextQuery query : queries) {
        a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
    }

    // TODO: should we limit this?  Do any of our ContextQuery impls really create exponential regexps?  GeoQuery looks safe (union
    // of strings).
    return Operations.determinize(a, Integer.MAX_VALUE);
}

源代码3 项目： lucene-solr 文件： ContextQuery.java

private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
  final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
  final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
  if (matchAllContexts || contexts.size() == 0) {
    return Operations.concatenate(matchAllAutomaton, sep);
  } else {
    Automaton contextsAutomaton = null;
    for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
      final ContextMetaData contextMetaData = entry.getValue();
      final IntsRef ref = entry.getKey();
      Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
      if (contextMetaData.exact == false) {
        contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
      }
      contextAutomaton = Operations.concatenate(contextAutomaton, sep);
      if (contextsAutomaton == null) {
        contextsAutomaton = contextAutomaton;
      } else {
        contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
      }
    }
    return contextsAutomaton;
  }
}

源代码4 项目： lucene-solr 文件： TestPrecedenceQueryParser.java

public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);

  PrecedenceQueryParser qp = new PrecedenceQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);
  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).parse("the^3",
      "field");
  assertNotNull(q);
}

源代码5 项目： lucene-solr 文件： TestQPHelper.java

public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
  StandardQueryParser qp = new StandardQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);

  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  StandardQueryParser qp2 = new StandardQueryParser();
  qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));

  q = qp2.parse("the^3", "field");
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertMatchNoDocsQuery(q);
  assertFalse(q instanceof BoostQuery);
}

源代码6 项目： lucene-solr 文件： QueryParserTestBase.java

public void testBoost()
  throws Exception {
  CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
  CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
  Query q = getQuery("on^1.0",qp);
  assertNotNull(q);
  q = getQuery("\"hello\"^2.0",qp);
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("hello^2.0",qp);
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("\"on\"^1.0",qp);
  assertNotNull(q);

  Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); 
  CommonQueryParserConfiguration qp2 = getParserConfig(a2);
  q = getQuery("the^3", qp2);
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertMatchNoDocsQuery(q);
  assertFalse(q instanceof BoostQuery);
}

源代码7 项目： lucene-solr 文件： TestRegexpQuery.java

public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = Operations.union(Arrays
        .asList(Automata.makeString("quick"),
        Automata.makeString("brown"),
        Automata.makeString("bob")));
    
    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
    myProvider, DEFAULT_MAX_DETERMINIZED_STATES);
  assertEquals(1, searcher.search(query, 5).totalHits.value);
}

源代码8 项目： lucene-solr 文件： TestAutomatonQuery.java

/**
 * Test some very simple automata.
 */
public void testAutomata() throws IOException {
  assertAutomatonHits(0, Automata.makeEmpty());
  assertAutomatonHits(0, Automata.makeEmptyString());
  assertAutomatonHits(2, Automata.makeAnyChar());
  assertAutomatonHits(3, Automata.makeAnyString());
  assertAutomatonHits(2, Automata.makeString("doc"));
  assertAutomatonHits(1, Automata.makeChar('a'));
  assertAutomatonHits(2, Automata.makeCharRange('a', 'b'));
  assertAutomatonHits(2, Automata.makeDecimalInterval(1233, 2346, 0));
  assertAutomatonHits(1, Automata.makeDecimalInterval(0, 2000, 0));
  assertAutomatonHits(2, Operations.union(Automata.makeChar('a'),
      Automata.makeChar('b')));
  assertAutomatonHits(0, Operations.intersection(Automata
      .makeChar('a'), Automata.makeChar('b')));
  assertAutomatonHits(1, Operations.minus(Automata.makeCharRange('a', 'b'), 
      Automata.makeChar('a'), DEFAULT_MAX_DETERMINIZED_STATES));
}

源代码9 项目： Elasticsearch 文件： CategoryContextMapping.java

@Override
public Automaton toAutomaton() {
    List<Automaton> automatons = new ArrayList<>();
    for (CharSequence value : values) {
        automatons.add(Automata.makeString(value.toString()));
    }
    return Operations.union(automatons);
}

源代码10 项目： Elasticsearch 文件： GeolocationContextMapping.java

@Override
public Automaton toAutomaton() {
    Automaton automaton;
    if(precisions == null || precisions.length == 0) {
         automaton = Automata.makeString(location);
    } else {
        automaton = Automata.makeString(location.substring(0, Math.max(1, Math.min(location.length(), precisions[0]))));
        for (int i = 1; i < precisions.length; i++) {
            final String cell = location.substring(0, Math.max(1, Math.min(location.length(), precisions[i])));
            automaton = Operations.union(automaton, Automata.makeString(cell));
        }
    }
    return automaton;
}

源代码11 项目： lucene-solr 文件： TokenStreamOffsetStrategy.java

private static CharArrayMatcher[] convertTermsToMatchers(BytesRef[] terms, CharArrayMatcher[] matchers) {
  CharArrayMatcher[] newAutomata = new CharArrayMatcher[terms.length + matchers.length];
  for (int i = 0; i < terms.length; i++) {
    String termString = terms[i].utf8ToString();
    CharacterRunAutomaton a = new CharacterRunAutomaton(Automata.makeString(termString));
    newAutomata[i] = LabelledCharArrayMatcher.wrap(termString, a::run);
  }
  // Append existing automata (that which is used for MTQs)
  System.arraycopy(matchers, 0, newAutomata, terms.length, matchers.length);
  return newAutomata;
}

源代码12 项目： lucene-solr 文件： RegexCompletionQuery.java

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  // If an empty regex is provided, we return an automaton that matches nothing. This ensures
  // consistency with PrefixCompletionQuery, which returns no results for an empty term.
  Automaton automaton = getTerm().text().isEmpty()
      ? Automata.makeEmpty()
      : new RegExp(getTerm().text(), flags).toAutomaton(maxDeterminizedStates);
  return new CompletionWeight(this, automaton);
}

源代码13 项目： lucene-solr 文件： ContextQuery.java

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, scoreMode, boost));
  final Automaton innerAutomaton = innerWeight.getAutomaton();

  // If the inner automaton matches nothing, then we return an empty weight to avoid
  // traversing all contexts during scoring.
  if (innerAutomaton.getNumStates() == 0) {
    return new CompletionWeight(this, innerAutomaton);
  }

  // if separators are preserved the fst contains a SEP_LABEL
  // behind each gap. To have a matching automaton, we need to
  // include the SEP_LABEL in the query as well
  Automaton optionalSepLabel = Operations.optional(Automata.makeChar(ConcatenateGraphFilter.SEP_LABEL));
  Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerAutomaton);
  Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
  contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);

  final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
  final TreeSet<Integer> contextLengths = new TreeSet<>();
  for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
    ContextMetaData contextMetaData = entry.getValue();
    contextMap.put(entry.getKey(), contextMetaData.boost);
    contextLengths.add(entry.getKey().length);
  }
  int[] contextLengthArray = new int[contextLengths.size()];
  final Iterator<Integer> iterator = contextLengths.descendingIterator();
  for (int i = 0; iterator.hasNext(); i++) {
    contextLengthArray[i] = iterator.next();
  }
  return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}

源代码14 项目： lucene-solr 文件： SearchEquivalenceTestBase.java

@BeforeClass
public static void beforeClass() throws Exception {
  Random random = random();
  directory = newDirectory();
  stopword = "" + randomChar();
  CharacterRunAutomaton stopset = new CharacterRunAutomaton(Automata.makeString(stopword));
  analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
  RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
  Document doc = new Document();
  Field id = new StringField("id", "", Field.Store.NO);
  Field field = new TextField("field", "", Field.Store.NO);
  doc.add(id);
  doc.add(field);
  
  // index some docs
  int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
  for (int i = 0; i < numDocs; i++) {
    id.setStringValue(Integer.toString(i));
    field.setStringValue(randomFieldContents());
    iw.addDocument(doc);
  }
  
  // delete some docs
  int numDeletes = numDocs/20;
  for (int i = 0; i < numDeletes; i++) {
    Term toDelete = new Term("id", Integer.toString(random.nextInt(numDocs)));
    if (random.nextBoolean()) {
      iw.deleteDocuments(toDelete);
    } else {
      iw.deleteDocuments(new TermQuery(toDelete));
    }
  }
  
  reader = iw.getReader();
  s1 = newSearcher(reader);
  s2 = newSearcher(reader);
  iw.close();
}

源代码15 项目： lucene-solr 文件： TestMockAnalyzer.java

/** Test a configuration that behaves a lot like KeepWordFilter */
public void testKeep() throws Exception {
  CharacterRunAutomaton keepWords = 
    new CharacterRunAutomaton(
        Operations.complement(
            Operations.union(
                Arrays.asList(Automata.makeString("foo"), Automata.makeString("bar"))),
            DEFAULT_MAX_DETERMINIZED_STATES));
  Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, keepWords);
  assertAnalyzesTo(a, "quick foo brown bar bar fox foo",
      new String[] { "foo", "bar", "bar", "foo" },
      new int[] { 2, 2, 1, 2 });
}

源代码16 项目： lucene-solr 文件： TermRangeQuery.java

public static Automaton toAutomaton(BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {

    if (lowerTerm == null) {
      // makeBinaryInterval is more picky than we are:
      includeLower = true;
    }

    if (upperTerm == null) {
      // makeBinaryInterval is more picky than we are:
      includeUpper = true;
    }

    return Automata.makeBinaryInterval(lowerTerm, includeLower, upperTerm, includeUpper);
  }

源代码17 项目： lucene-solr 文件： TermInSetQuery.java

private ByteRunAutomaton asByteRunAutomaton() {
  TermIterator iterator = termData.iterator();
  List<Automaton> automata = new ArrayList<>();
  for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
    automata.add(Automata.makeBinary(term));
  }
  return new CompiledAutomaton(Operations.union(automata)).runAutomaton;

}

源代码18 项目： lucene-solr 文件： TestAutomatonQuery.java

/**
 * Test that a nondeterministic automaton works correctly. (It should will be
 * determinized)
 */
public void testNFA() throws IOException {
  // accept this or three, the union is an NFA (two transitions for 't' from
  // initial state)
  Automaton nfa = Operations.union(Automata.makeString("this"),
      Automata.makeString("three"));
  assertAutomatonHits(2, nfa);
}

源代码19 项目： lucene-solr 文件： TestAutomatonQuery.java

public void testEquals() {
  AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), Automata
      .makeString("foobar"));
  // reference to a1
  AutomatonQuery a2 = a1;
  // same as a1 (accepts the same language, same term)
  AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"),
                          Operations.concatenate(
                               Automata.makeString("foo"),
                               Automata.makeString("bar")));
  // different than a1 (same term, but different language)
  AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"),
                                         Automata.makeString("different"));
  // different than a1 (different term, same language)
  AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"),
                                         Automata.makeString("foobar"));
  
  assertEquals(a1.hashCode(), a2.hashCode());
  assertEquals(a1, a2);
  
  assertEquals(a1.hashCode(), a3.hashCode());
  assertEquals(a1, a3);

  // different class
  AutomatonQuery w1 = new WildcardQuery(newTerm("foobar"));
  // different class
  AutomatonQuery w2 = new RegexpQuery(newTerm("foobar"));
  
  assertFalse(a1.equals(w1));
  assertFalse(a1.equals(w2));
  assertFalse(w1.equals(w2));
  assertFalse(a1.equals(a4));
  assertFalse(a1.equals(a5));
  assertFalse(a1.equals(null));
}

源代码20 项目： lucene-solr 文件： TestAutomatonQuery.java

/**
 * Test that rewriting to a single term works as expected, preserves
 * MultiTermQuery semantics.
 */
public void testRewriteSingleTerm() throws IOException {
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeString("piece"));
  Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), FN);
  assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum);
  assertEquals(1, automatonQueryNrHits(aq));
}

源代码21 项目： lucene-solr 文件： TestAutomatonQuery.java

/**
 * Test that rewriting to a prefix query works as expected, preserves
 * MultiTermQuery semantics.
 */
public void testRewritePrefix() throws IOException {
  Automaton pfx = Automata.makeString("do");
  Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString());
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
  assertEquals(3, automatonQueryNrHits(aq));
}

源代码22 项目： lucene-solr 文件： TestAutomatonQuery.java

/**
 * Test handling of the empty language
 */
public void testEmptyOptimization() throws IOException {
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeEmpty());
  // not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
  // instanceof EmptyTermEnum);
  Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), FN);
  assertSame(TermsEnum.EMPTY, aq.getTermsEnum(terms));
  assertEquals(0, automatonQueryNrHits(aq));
}

源代码23 项目： lucene-solr 文件： TestAutomatonQuery.java

public void testBiggishAutomaton() {
  int numTerms = TEST_NIGHTLY ? 3000 : 500;
  List<BytesRef> terms = new ArrayList<>();
  while (terms.size() < numTerms) {
    terms.add(new BytesRef(TestUtil.randomUnicodeString(random())));
  }
  Collections.sort(terms);
  new AutomatonQuery(new Term("foo", "bar"), Automata.makeStringUnion(terms), Integer.MAX_VALUE);
}

源代码24 项目： lucene-solr 文件： TestIndexWriter.java

public void testStopwordsPosIncHole2() throws Exception {
  // use two stopfilters for testing here
  Directory dir = newDirectory();
  final Automaton secondSet = Automata.makeString("foobar");
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new MockTokenizer();
      TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
      stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet));
      return new TokenStreamComponents(tokenizer, stream);
    }
  };
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
  Document doc = new Document();
  doc.add(new TextField("body", "just a foobar", Field.Store.NO));
  doc.add(new TextField("body", "test of gaps", Field.Store.NO));
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);
  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.add(new Term("body", "just"), 0);
  builder.add(new Term("body", "test"), 3);
  PhraseQuery pq = builder.build();
  // body:"just ? ? test"
  assertEquals(1, is.search(pq, 5).totalHits.value);
  ir.close();
  dir.close();
}

源代码25 项目： BioSolr 文件： XJoinQParserPlugin.java

@Override
@SuppressWarnings("unchecked")
Filter makeFilter(String fname, Iterator<BytesRef> it) {
  Automaton union = Automata.makeStringUnion(IteratorUtils.toList(it));
  return new MultiTermQueryWrapperFilter<AutomatonQuery>(new AutomatonQuery(new Term(fname), union)) {
  };
}

源代码26 项目： crate 文件： Regex.java

/** Return an {@link Automaton} that matches the given pattern. */
public static Automaton simpleMatchToAutomaton(String pattern) {
    List<Automaton> automata = new ArrayList<>();
    int previous = 0;
    for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) {
        automata.add(Automata.makeString(pattern.substring(previous, i)));
        automata.add(Automata.makeAnyString());
        previous = i + 1;
    }
    automata.add(Automata.makeString(pattern.substring(previous)));
    return Operations.concatenate(automata);
}

源代码27 项目： crate 文件： XContentMapValues.java

/**
 * Returns a function that filters a document map based on the given include and exclude rules.
 * @see #filter(Map, String[], String[]) for details
 */
public static Function<Map<String, ?>, Map<String, Object>> filter(String[] includes, String[] excludes) {
    CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString());

    CharacterRunAutomaton include;
    if (includes == null || includes.length == 0) {
        include = matchAllAutomaton;
    } else {
        Automaton includeA = Regex.simpleMatchToAutomaton(includes);
        includeA = makeMatchDotsInFieldNames(includeA);
        include = new CharacterRunAutomaton(includeA);
    }

    Automaton excludeA;
    if (excludes == null || excludes.length == 0) {
        excludeA = Automata.makeEmpty();
    } else {
        excludeA = Regex.simpleMatchToAutomaton(excludes);
        excludeA = makeMatchDotsInFieldNames(excludeA);
    }
    CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA);

    // NOTE: We cannot use Operations.minus because of the special case that
    // we want all sub properties to match as soon as an object matches

    return (map) -> filter(map,
        include, 0,
        exclude, 0,
        matchAllAutomaton);
}

源代码28 项目： lucene-solr 文件： CharArrayMatcher.java

static CharArrayMatcher fromTerms(List<BytesRef> terms) {
  CharacterRunAutomaton a = new CharacterRunAutomaton(Automata.makeStringUnion(terms));
  return a::run;
}

源代码29 项目： lucene-solr 文件： HighlighterTest.java

public void testMaxSizeHighlightTruncates() throws Exception {
  TestHighlightRunner helper = new TestHighlightRunner() {

    @Override
    public void run() throws Exception {
      String goodWord = "goodtoken";
      CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("stoppedtoken"));
      // we disable MockTokenizer checks because we will forcefully limit the 
      // tokenstream and call end() before incrementToken() returns false.
      final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
      analyzer.setEnableChecks(false);
      TermQuery query = new TermQuery(new Term("data", goodWord));

      String match;
      StringBuilder sb = new StringBuilder();
      sb.append(goodWord);
      for (int i = 0; i < 10000; i++) {
        sb.append(" ");
        // only one stopword
        sb.append("stoppedtoken");
      }
      SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
      Highlighter hg = getHighlighter(query, "data", fm);// new Highlighter(fm,
      // new
      // QueryTermScorer(query));
      hg.setTextFragmenter(new NullFragmenter());
      hg.setMaxDocCharsToAnalyze(100);
      match = hg.getBestFragment(analyzer, "data", sb.toString());
      assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
          .getMaxDocCharsToAnalyze());

      // add another tokenized word to the overrall length - but set way
      // beyond
      // the length of text under consideration (after a large slug of stop
      // words
      // + whitespace)
      sb.append(" ");
      sb.append(goodWord);
      match = hg.getBestFragment(analyzer, "data", sb.toString());
      assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
          .getMaxDocCharsToAnalyze());
    }
  };

  helper.start();

}

源代码30 项目： lucene-solr 文件： TestTermsEnum.java

public void testInvalidAutomatonTermsEnum() throws Exception {
  expectThrows(IllegalArgumentException.class,
               () -> {
                 new AutomatonTermsEnum(TermsEnum.EMPTY, new CompiledAutomaton(Automata.makeString("foo")));
               });
}

方法所在类

org.apache.lucene.search.AutomatonQuery