java.text.BreakIterator源码实例Demo

类java.text.BreakIterator源码实例Demo

下面列出了java.text.BreakIterator 类实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: HtmlUnit-Android   文件: V8BreakIterator.java
/**
 * Returns the type of the break.
 * @return {@code none}, {@code number}, {@code letter}, {@code kana}, {@code ideo} or {@code unknown}
 */
@JsxFunction
public String breakType() {
    if (!typeAlwaysNone_) {
        final int current = current();
        final int previous = breakIterator_.previous();
        if (previous == BreakIterator.DONE) {
            first();
        }
        else {
            next();
        }
        if (current != BreakIterator.DONE && previous != BreakIterator.DONE) {
            final String token = text_.substring(previous, current);
            if (token.matches(".*[a-zA-Z]+.*")) {
                return "letter";
            }
            if (token.matches("[0-9]+")) {
                return "number";
            }
        }
    }
    return "none";
}
 
源代码2 项目: openjdk-8   文件: TextComponent.java
/**
 * Needed to unify forward and backward searching.
 * The method assumes that s is the text assigned to words.
 */
private int findWordLimit(int index, BreakIterator words, boolean direction,
                                 String s) {
    // Fix for 4256660 and 4256661.
    // Words iterator is different from character and sentence iterators
    // in that end of one word is not necessarily start of another word.
    // Please see java.text.BreakIterator JavaDoc. The code below is
    // based on nextWordStartAfter example from BreakIterator.java.
    int last = (direction == NEXT) ? words.following(index)
                                   : words.preceding(index);
    int current = (direction == NEXT) ? words.next()
                                      : words.previous();
    while (current != BreakIterator.DONE) {
        for (int p = Math.min(last, current); p < Math.max(last, current); p++) {
            if (Character.isLetter(s.charAt(p))) {
                return last;
            }
        }
        last = current;
        current = (direction == NEXT) ? words.next()
                                      : words.previous();
    }
    return BreakIterator.DONE;
}
 
源代码3 项目: ignite-book-code-samples   文件: SplitSentence.java
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
    //Get the sentence content from the tuple
    String sentence = tuple.getString(0);
    //An iterator to get each word
    BreakIterator boundary=BreakIterator.getWordInstance();
    //Give the iterator the sentence
    boundary.setText(sentence);
    //Find the beginning first word
    int start=boundary.first();
    //Iterate over each word and emit it to the output stream
    for (int end = boundary.next(); end != BreakIterator.DONE; start=end, end=boundary.next()) {
        //get the word
        String word=sentence.substring(start,end);
        //If a word is whitespace characters, replace it with empty
        word=word.replaceAll("\\s+","");
        //if it's an actual word, emit it
        if (!word.equals("")) {
            collector.emit(new Values(word));
        }
    }
}
 
public static void usingBreakIterator() {
    Locale currentLocale = new Locale("en", "US");
    BreakIterator sentenceIterator = BreakIterator.getSentenceInstance();
    sentenceIterator.setText(paragraph);
    int boundary = sentenceIterator.first();
    while (boundary != BreakIterator.DONE) {
        int begin = boundary;
        System.out.print(boundary + "-");
        boundary = sentenceIterator.next();
        int end = boundary;
        if (end == BreakIterator.DONE) {
            break;
        }
        System.out.println(boundary + " ["
                + paragraph.substring(begin, end) + "]");
    }
}
 
源代码5 项目: RichTextFX   文件: SelectionImpl.java
@Override
public void selectWord(int wordPositionInArea) {
    if(area.getLength() == 0) {
        return;
    }

    BreakIterator breakIterator = BreakIterator.getWordInstance( getArea().getLocale() );
    breakIterator.setText(area.getText());
    breakIterator.preceding(wordPositionInArea);
    breakIterator.next();
    int wordStart = breakIterator.current();

    breakIterator.following(wordPositionInArea);
    breakIterator.next();
    int wordEnd = breakIterator.current();

    selectRange(wordStart, wordEnd);
}
 
源代码6 项目: talkback   文件: GranularityIterator.java
@Override
public @Nullable int[] preceding(int offset) {
  final int textLegth = getIteratorText().length();
  if (textLegth <= 0) {
    return null;
  }
  if (offset <= 0) {
    return null;
  }
  int end = offset;
  if (end > textLegth) {
    end = textLegth;
  }
  while (!breakIterator.isBoundary(end)) {
    end = breakIterator.preceding(end);
    if (end == BreakIterator.DONE) {
      return null;
    }
  }
  final int start = breakIterator.preceding(end);
  if (start == BreakIterator.DONE) {
    return null;
  }
  return getRange(start, end);
}
 
源代码7 项目: dragonwell8_jdk   文件: Bug4533872.java
void TestNext() {
    iter = BreakIterator.getWordInstance(Locale.US);

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);
        start = iter.first();
        int j = expected[i].length - 1;
        start = iter.next(j);
        end = iter.next();

        if (!expected[i][j].equals(given[i].substring(start, end))) {
            errln("Word break failure: printEachForward() expected:<" +
                  expected[i][j] + ">, got:<" +
                  given[i].substring(start, end) +
                  "> start=" + start + "  end=" + end);
        }
    }
}
 
源代码8 项目: lucene-solr   文件: TestSplittingBreakIterator.java
private void testBreakIterator(BreakIterator bi, String text, String boundaries) {
  bi.setText(text);

  //Test first & last
  testFirstAndLast(bi, text, boundaries);

  //Test if expected boundaries are consistent with reading them from next() in a loop:
  assertEquals(boundaries, readBoundariesToString(bi, text));

  //Test following() and preceding():
  // get each index, randomized in case their is a sequencing bug:
  List<Integer> indexes = randomIntsBetweenInclusive(text.length() + 1);
  testFollowing(bi, text, boundaries, indexes);
  testPreceding(bi, text, boundaries, indexes);

  //Test previous():
  testPrevious(bi, text, boundaries);
}
 
源代码9 项目: dragonwell8_jdk   文件: BreakIteratorTest.java
private Vector testLastAndPrevious(BreakIterator bi, String text) {
    int p = bi.last();
    int lastP = p;
    Vector<String> result = new Vector<String>();

    if (p != text.length())
        errln("last() returned " + p + " instead of " + text.length());
    while (p != BreakIterator.DONE) {
        p = bi.previous();
        if (p != BreakIterator.DONE) {
            if (p >= lastP)
                errln("previous() failed to move backward: previous() on position "
                                + lastP + " yielded " + p);

            result.insertElementAt(text.substring(p, lastP), 0);
        }
        else {
            if (lastP != 0)
                errln("previous() returned DONE prematurely: offset was "
                                + lastP + " instead of 0");
        }
        lastP = p;
    }
    return result;
}
 
源代码10 项目: dragonwell8_jdk   文件: BreakIteratorTest.java
private void testPreceding(BreakIterator bi, String text, int[] boundaries) {
    logln("testPreceding():");
    int p = 0;
    int i = 0;
    try {
        for (i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
            int b = bi.preceding(i);
            logln("bi.preceding(" + i + ") -> " + b);
            if (b != boundaries[p])
                errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
                      + ", got " + b);

            if (i == boundaries[p + 1])
                ++p;
        }
    } catch (IllegalArgumentException illargExp) {
        errln("IllegalArgumentException caught from preceding() for offset: " + i);
    }
}
 
源代码11 项目: dragonwell8_jdk   文件: BreakIteratorTest.java
private void testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
    logln("testIsBoundary():");
    int p = 1;
    boolean isB;
    for (int i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
        isB = bi.isBoundary(i);
        logln("bi.isBoundary(" + i + ") -> " + isB);

        if (i == boundaries[p]) {
            if (!isB)
                errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
            ++p;
        }
        else {
            if (isB)
                errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
        }
    }
}
 
源代码12 项目: openjdk-jdk9   文件: BreakIteratorTest.java
private void testPreceding(BreakIterator bi, String text, int[] boundaries) {
    logln("testPreceding():");
    int p = 0;
    int i = 0;
    try {
        for (i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
            int b = bi.preceding(i);
            logln("bi.preceding(" + i + ") -> " + b);
            if (b != boundaries[p])
                errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
                      + ", got " + b);

            if (i == boundaries[p + 1])
                ++p;
        }
    } catch (IllegalArgumentException illargExp) {
        errln("IllegalArgumentException caught from preceding() for offset: " + i);
    }
}
 
源代码13 项目: APICloud-Studio   文件: CommandExecutionUtils.java
/**
 * Tries to find the word at the given offset.
 * 
 * @param line
 *            the line
 * @param offset
 *            the offset
 * @return the word or <code>null</code> if none
 */
protected static IRegion findWordRegion(String line, int offset)
{
	BreakIterator breakIter = BreakIterator.getWordInstance();
	breakIter.setText(line);

	int start = breakIter.preceding(offset);
	if (start == BreakIterator.DONE)
		start = 0;

	int end = breakIter.following(offset);
	if (end == BreakIterator.DONE)
		end = line.length();

	if (breakIter.isBoundary(offset))
	{
		if (end - offset > offset - start)
		{
			start = offset;
		}
		else
		{
			end = offset;
		}
	}

	if (end == start)
	{
		return new Region(start, 0);
	}
	return new Region(start, end - start);
}
 
源代码14 项目: lucene-solr   文件: TestUnifiedHighlighter.java
/**
 * Make sure highlighter returns whole text when there
 * are no hits and BreakIterator is null.
 */
public void testEmptyHighlightsWhole() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Document doc = new Document();

  Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", fieldType);
  doc.add(body);
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
    @Override
    protected BreakIterator getBreakIterator(String field) {
      return new WholeBreakIterator();
    }
  };
  Query query = new TermQuery(new Term("body", "highlighting"));
  int[] docIDs = new int[]{0};
  String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
  assertEquals(1, snippets.length);
  assertEquals("test this is.  another sentence this test has.  far away is that planet.", snippets[0]);

  ir.close();
}
 
源代码15 项目: inception   文件: MtasDocumentIndex.java
private String preprocessQuery(String aQuery)
{
    String result;

    if (!(aQuery.contains("\"") || aQuery.contains("[") || aQuery.contains("]")
            || aQuery.contains("{") || aQuery.contains("}") || aQuery.contains("<")
            || aQuery.contains(">"))) {
        // Convert raw words query to a Mtas CQP query

        result = "";
        BreakIterator words = BreakIterator.getWordInstance();
        words.setText(aQuery);

        int start = words.first();
        int end = words.next();
        while (end != BreakIterator.DONE) {
            String word = aQuery.substring(start, end);
            if (!word.trim().isEmpty()) {
                // Add the word to the query
                result += "\"" + word + "\"";
            }
            start = end;
            end = words.next();
            if (end != BreakIterator.DONE) {
                result += " ";
            }
        }
    }
    else {
        result = aQuery;
    }

    return result;
}
 
/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}
 
源代码17 项目: openjdk-jdk8u   文件: Bug4533872.java
void TestPrintAt_2() {
    iter = BreakIterator.getWordInstance(Locale.US);

    int[][] index = {
        {2, 9, 10, 15, 17},
        {1, 9, 10, 13, 16, 18, 20},
        {4, 9, 10, 13, 16, 18, 20},
        {6, 7, 10, 11, 15},
    };

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);

        // Check preceding(0)'s return value - should equals BreakIterator.DONE.
        if (iter.preceding(0) != BreakIterator.DONE) {
             errln("Word break failure: printAt_2() expected:-1(BreakIterator.DONE), got:" +
                   iter.preceding(0));
        }

        for (int j = 0; j < index[i].length; j++) {
            start = iter.preceding(index[i][j]);
            end = iter.next();

            if (!expected[i][j].equals(given[i].substring(start, end))) {
                errln("Word break failure: printAt_2() expected:<" +
                      expected[i][j] + ">, got:<" +
                      given[i].substring(start, end) +
                      "> start=" + start + "  end=" + end);
            }
        }

        // Check next()'s return value - should equals BreakIterator.DONE.
        end = iter.last();
        start = iter.next();
        if (start != BreakIterator.DONE) {
             errln("Word break failure: printAt_2() expected:-1(BreakIterator.DONE), got:" + start);
        }
    }
}
 
源代码18 项目: openjdk-jdk8u-backup   文件: TextMeasurer.java
private void makeLayoutWindow(int localStart) {

        int compStart = localStart;
        int compLimit = fChars.length;

        // If we've already gone past the layout window, format to end of paragraph
        if (layoutCount > 0 && !haveLayoutWindow) {
            float avgLineLength = Math.max(layoutCharCount / layoutCount, 1);
            compLimit = Math.min(localStart + (int)(avgLineLength*EST_LINES), fChars.length);
        }

        if (localStart > 0 || compLimit < fChars.length) {
            if (charIter == null) {
                charIter = new CharArrayIterator(fChars);
            }
            else {
                charIter.reset(fChars);
            }
            if (fLineBreak == null) {
                fLineBreak = BreakIterator.getLineInstance();
            }
            fLineBreak.setText(charIter);
            if (localStart > 0) {
                if (!fLineBreak.isBoundary(localStart)) {
                    compStart = fLineBreak.preceding(localStart);
                }
            }
            if (compLimit < fChars.length) {
                if (!fLineBreak.isBoundary(compLimit)) {
                    compLimit = fLineBreak.following(compLimit);
                }
            }
        }

        ensureComponents(compStart, compLimit);
        haveLayoutWindow = true;
    }
 
源代码19 项目: openjdk-jdk8u   文件: BreakIteratorTest.java
private void doMultipleSelectionTest(BreakIterator iterator, String testText)
{
    logln("Multiple selection test...");
    BreakIterator testIterator = (BreakIterator)iterator.clone();
    int offset = iterator.first();
    int testOffset;
    int count = 0;

    do {
        testOffset = testIterator.first();
        testOffset = testIterator.next(count);
        logln("next(" + count + ") -> " + testOffset);
        if (offset != testOffset)
            errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);

        if (offset != BreakIterator.DONE) {
            count++;
            offset = iterator.next();
        }
    } while (offset != BreakIterator.DONE);

    // now do it backwards...
    offset = iterator.last();
    count = 0;

    do {
        testOffset = testIterator.last();
        testOffset = testIterator.next(count);
        logln("next(" + count + ") -> " + testOffset);
        if (offset != testOffset)
            errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);

        if (offset != BreakIterator.DONE) {
            count--;
            offset = iterator.previous();
        }
    } while (offset != BreakIterator.DONE);
}
 
/**
 * Sets the iterator to refer to the first boundary position following
 * the specified position.
 * @offset The position from which to begin searching for a break position.
 * @return The position of the first break after the current position.
 */
@Override
public int following(int offset) {

    CharacterIterator text = getText();
    checkOffset(offset, text);

    // Set our internal iteration position (temporarily)
    // to the position passed in.  If this is the _beginning_ position,
    // then we can just use next() to get our return value
    text.setIndex(offset);
    if (offset == text.getBeginIndex()) {
        cachedLastKnownBreak = handleNext();
        return cachedLastKnownBreak;
    }

    // otherwise, we have to sync up first.  Use handlePrevious() to back
    // us up to a known break position before the specified position (if
    // we can determine that the specified position is a break position,
    // we don't back up at all).  This may or may not be the last break
    // position at or before our starting position.  Advance forward
    // from here until we've passed the starting position.  The position
    // we stop on will be the first break position after the specified one.
    int result = cachedLastKnownBreak;
    if (result >= offset || result <= BreakIterator.DONE) {
        result = handlePrevious();
    } else {
        //it might be better to check if handlePrevious() give us closer
        //safe value but handlePrevious() is slow too
        //So, this has to be done carefully
        text.setIndex(result);
    }
    while (result != BreakIterator.DONE && result <= offset) {
        result = handleNext();
    }
    cachedLastKnownBreak = result;
    return result;
}
 
源代码21 项目: TencentKona-8   文件: BreakIteratorTest.java
public BreakIteratorTest()
{
    characterBreak = BreakIterator.getCharacterInstance();
    wordBreak = BreakIterator.getWordInstance();
    lineBreak = BreakIterator.getLineInstance();
    sentenceBreak = BreakIterator.getSentenceInstance();
}
 
public void testSliceMiddle() throws Exception {
  NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
  BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
  bi.setText(getCharArrayIterator(PADDING + SENTENCES[0] + PADDING, PADDING.length(), SENTENCES[0].length()));

  test1Sentence(bi, SENTENCES[0]);
}
 
源代码23 项目: Pydev   文件: SubWordActions.java
@Override
public void run() {
    // Check whether we are in a java code partition and the preference is enabled
    final IPreferenceStore store = getPreferenceStore();
    if (store.getString(SubWordPreferences.WORD_NAVIGATION_STYLE)
            .equals(SubWordPreferences.WORD_NAVIGATION_STYLE_NATIVE)) {
        super.run();
        return;
    }

    final ISourceViewer viewer = getSourceViewer();
    final IDocument document = viewer.getDocument();
    try {
        fIterator.setText((CharacterIterator) new DocumentCharacterIterator(document));
        int position = widgetOffset2ModelOffset(viewer, viewer.getTextWidget().getCaretOffset());
        if (position == -1) {
            return;
        }

        int next = findNextPosition(position);
        if (isBlockSelectionModeEnabled()
                && document.getLineOfOffset(next) != document.getLineOfOffset(position)) {
            super.run(); // may navigate into virtual white space
        } else if (next != BreakIterator.DONE) {
            setCaretPosition(next);
            getTextWidget().showSelection();
            fireSelectionChanged();
        }
    } catch (BadLocationException x) {
        // ignore
    }
}
 
源代码24 项目: jdk8u_jdk   文件: Bug4912404.java
public static void main(String[] args) {
    BreakIterator b = BreakIterator.getWordInstance();
    b.setText("abc");
    if (b.equals(null)) {
        throw new RuntimeException("BreakIterator.equals(null) should return false.");
    }
}
 
源代码25 项目: openjdk-jdk8u   文件: Bug4533872.java
void TestPrintEachBackward() {
    iter = BreakIterator.getWordInstance(Locale.US);

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);
        end = iter.last();

        // Check current()'s return value - should be same as last()'s.
        current = iter.current();
        if (end != current) {
            errln("Word break failure: printEachBackward() Unexpected current value: current()=" +
                  current + ", expected(=last())=" + end);
        }

        int j;
        for (start = iter.previous(), j = expected[i].length-1;
             start != BreakIterator.DONE;
             end = start, start = iter.previous(), j--) {

            // Check current()'s return value - should be same as previous()'s.
            current = iter.current();
            if (start != current) {
                errln("Word break failure: printEachBackward() Unexpected current value: current()=" +
                      current + ", expected(=previous())=" + start);
            }

            if (!expected[i][j].equals(given[i].substring(start, end))) {
                errln("Word break failure: printEachBackward() expected:<" +
                      expected[i][j] + ">, got:<" +
                      given[i].substring(start, end) +
                      "> start=" + start + "  end=" + end);
            }
        }
    }
}
 
private BreakIterator getBreakInstance(Locale locale,
                                              int type,
                                              String dataName,
                                              String dictionaryName) {
    if (locale == null) {
        throw new NullPointerException();
    }

    LocaleResources lr = LocaleProviderAdapter.forJRE().getLocaleResources(locale);
    String[] classNames = (String[]) lr.getBreakIteratorInfo("BreakIteratorClasses");
    String dataFile = (String) lr.getBreakIteratorInfo(dataName);

    try {
        switch (classNames[type]) {
        case "RuleBasedBreakIterator":
            return new RuleBasedBreakIterator(dataFile);
        case "DictionaryBasedBreakIterator":
            String dictionaryFile = (String) lr.getBreakIteratorInfo(dictionaryName);
            return new DictionaryBasedBreakIterator(dataFile, dictionaryFile);
        default:
            throw new IllegalArgumentException("Invalid break iterator class \"" +
                            classNames[type] + "\"");
        }
    } catch (IOException | MissingResourceException | IllegalArgumentException e) {
        throw new InternalError(e.toString(), e);
    }
}
 
源代码27 项目: TencentKona-8   文件: DocLocale.java
/**
 * Constructor
 */
DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) {
    this.docenv = docenv;
    this.localeName = localeName;
    this.useBreakIterator = useBreakIterator;
    locale = getLocale();
    if (locale == null) {
        docenv.exit();
    } else {
        Locale.setDefault(locale); // NOTE: updating global state
    }
    collator = Collator.getInstance(locale);
    sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}
 
源代码28 项目: relex   文件: DocSplitterFallbackImpl.java
public DocSplitterFallbackImpl()
{
	buffer = "";
	bdry = BreakIterator.getSentenceInstance(Locale.US);
	bdry.setText("");
	start = bdry.first();
}
 
源代码29 项目: Elasticsearch   文件: CustomPostingsHighlighter.java
@Override
protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
    if (returnNonHighlightedSnippets) {
        //we want to return the first sentence of the first snippet only
        return super.getEmptyHighlight(fieldName, bi, 1);
    }
    return EMPTY_PASSAGE;
}
 
源代码30 项目: openjdk-8-source   文件: DocLocale.java
/**
 * Constructor
 */
DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) {
    this.docenv = docenv;
    this.localeName = localeName;
    this.useBreakIterator = useBreakIterator;
    locale = getLocale();
    if (locale == null) {
        docenv.exit();
    } else {
        Locale.setDefault(locale); // NOTE: updating global state
    }
    collator = Collator.getInstance(locale);
    sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}