下面列出了java.text.BreakIterator#getSentenceInstance ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
this.fac = fac;
this.diagSource = diagSource;
this.comment = comment;
names = fac.names;
m = fac.docTreeMaker;
Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale;
Options options = fac.options;
boolean useBreakIterator = options.isSet("breakIterator");
if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
sentenceBreaker = BreakIterator.getSentenceInstance(locale);
initTagParsers();
}
public static void splitSentences(CAS aCas)
{
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
bi.setText(aCas.getDocumentText());
int last = bi.first();
int cur = bi.next();
while (cur != BreakIterator.DONE) {
int[] span = new int[] { last, cur };
trim(aCas.getDocumentText(), span);
if (!isEmpty(span[0], span[1])) {
aCas.addFsToIndexes(createSentence(aCas, span[0], span[1]));
}
last = cur;
cur = bi.next();
}
}
public static String calculateFirstSentence(String raw) {
// remove all the * from beginning of lines
String text = raw.replaceAll("(?m)^\\s*\\*", "").trim();
// assume a <p> paragraph tag signifies end of sentence
text = text.replaceFirst("(?ms)<p>.*", "").trim();
// assume completely blank line signifies end of sentence
text = text.replaceFirst("(?ms)\\n\\s*\\n.*", "").trim();
// assume @tag signifies end of sentence
text = text.replaceFirst("(?ms)\\n\\s*@(see|param|throws|return|author|since|exception|version|deprecated|todo)\\s.*", "").trim();
// Comment Summary using first sentence (Locale sensitive)
BreakIterator boundary = BreakIterator.getSentenceInstance(Locale.getDefault()); // todo - allow locale to be passed in
boundary.setText(text);
int start = boundary.first();
int end = boundary.next();
if (start > -1 && end > -1) {
// need to abbreviate this comment for the summary
text = text.substring(start, end);
}
return text;
}
/**
* parse a break iterator type for the specified locale
*/
protected BreakIterator parseBreakIterator(String type, Locale locale) {
if (type == null || "SENTENCE".equals(type)) {
return BreakIterator.getSentenceInstance(locale);
} else if ("LINE".equals(type)) {
return BreakIterator.getLineInstance(locale);
} else if ("WORD".equals(type)) {
return BreakIterator.getWordInstance(locale);
} else if ("CHARACTER".equals(type)) {
return BreakIterator.getCharacterInstance(locale);
} else {
throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type);
}
}
/**
* Returns the Segment at <code>index</code> representing either
* the paragraph or sentence as identified by <code>part</code>, or
* null if a valid paragraph/sentence can't be found. The offset
* will point to the start of the word/sentence in the array, and
* the modelOffset will point to the location of the word/sentence
* in the model.
*/
private IndexedSegment getSegmentAt(int part, int index)
throws BadLocationException {
IndexedSegment seg = getParagraphElementText(index);
if (seg == null) {
return null;
}
BreakIterator iterator;
switch (part) {
case AccessibleText.WORD:
iterator = BreakIterator.getWordInstance(getLocale());
break;
case AccessibleText.SENTENCE:
iterator = BreakIterator.getSentenceInstance(getLocale());
break;
default:
return null;
}
seg.first();
iterator.setText(seg);
int end = iterator.following(index - seg.modelOffset + seg.offset);
if (end == BreakIterator.DONE) {
return null;
}
if (end > seg.offset + seg.count) {
return null;
}
int begin = iterator.previous();
if (begin == BreakIterator.DONE ||
begin >= seg.offset + seg.count) {
return null;
}
seg.modelOffset = seg.modelOffset + begin - seg.offset;
seg.offset = begin;
seg.count = end - begin;
return seg;
}
public BreakIteratorTest()
{
characterBreak = BreakIterator.getCharacterInstance();
wordBreak = BreakIterator.getWordInstance();
lineBreak = BreakIterator.getLineInstance();
sentenceBreak = BreakIterator.getSentenceInstance();
}
/**
* Constructor
*/
DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) {
this.docenv = docenv;
this.localeName = localeName;
this.useBreakIterator = useBreakIterator;
locale = getLocale();
if (locale == null) {
docenv.exit();
} else {
Locale.setDefault(locale); // NOTE: updating global state
}
collator = Collator.getInstance(locale);
sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}
/**
* Creates a new spell check iterator.
*
* @param document the document containing the specified partition
* @param region the region to spell check
* @param locale the locale to use for spell checking
* @param breakIterator the break-iterator
*/
public SpellCheckIterator(IDocument document, IRegion region, Locale locale, BreakIterator breakIterator) {
fOffset= region.getOffset();
fWordIterator= breakIterator;
fDelimiter= TextUtilities.getDefaultLineDelimiter(document);
String content;
try {
content= document.get(region.getOffset(), region.getLength());
} catch (Exception exception) {
content= ""; //$NON-NLS-1$
}
fContent= content;
fWordIterator.setText(content);
fPredecessor= fWordIterator.first();
fSuccessor= fWordIterator.next();
final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
iterator.setText(content);
int offset= iterator.current();
while (offset != BreakIterator.DONE) {
fSentenceBreaks.add(new Integer(offset));
offset= iterator.next();
}
}
/**
* Returns the Segment at <code>index</code> representing either
* the paragraph or sentence as identified by <code>part</code>, or
* null if a valid paragraph/sentence can't be found. The offset
* will point to the start of the word/sentence in the array, and
* the modelOffset will point to the location of the word/sentence
* in the model.
*/
private IndexedSegment getSegmentAt(int part, int index)
throws BadLocationException {
IndexedSegment seg = getParagraphElementText(index);
if (seg == null) {
return null;
}
BreakIterator iterator;
switch (part) {
case AccessibleText.WORD:
iterator = BreakIterator.getWordInstance(getLocale());
break;
case AccessibleText.SENTENCE:
iterator = BreakIterator.getSentenceInstance(getLocale());
break;
default:
return null;
}
seg.first();
iterator.setText(seg);
int end = iterator.following(index - seg.modelOffset + seg.offset);
if (end == BreakIterator.DONE) {
return null;
}
if (end > seg.offset + seg.count) {
return null;
}
int begin = iterator.previous();
if (begin == BreakIterator.DONE ||
begin >= seg.offset + seg.count) {
return null;
}
seg.modelOffset = seg.modelOffset + begin - seg.offset;
seg.offset = begin;
seg.count = end - begin;
return seg;
}
/**
* Returns the Segment at <code>index</code> representing either
* the paragraph or sentence as identified by <code>part</code>, or
* null if a valid paragraph/sentence can't be found. The offset
* will point to the start of the word/sentence in the array, and
* the modelOffset will point to the location of the word/sentence
* in the model.
*/
private IndexedSegment getSegmentAt(int part, int index)
throws BadLocationException {
IndexedSegment seg = getParagraphElementText(index);
if (seg == null) {
return null;
}
BreakIterator iterator;
switch (part) {
case AccessibleText.WORD:
iterator = BreakIterator.getWordInstance(getLocale());
break;
case AccessibleText.SENTENCE:
iterator = BreakIterator.getSentenceInstance(getLocale());
break;
default:
return null;
}
seg.first();
iterator.setText(seg);
int end = iterator.following(index - seg.modelOffset + seg.offset);
if (end == BreakIterator.DONE) {
return null;
}
if (end > seg.offset + seg.count) {
return null;
}
int begin = iterator.previous();
if (begin == BreakIterator.DONE ||
begin >= seg.offset + seg.count) {
return null;
}
seg.modelOffset = seg.modelOffset + begin - seg.offset;
seg.offset = begin;
seg.count = end - begin;
return seg;
}
/**
* Returns the Segment at <code>index</code> representing either
* the paragraph or sentence as identified by <code>part</code>, or
* null if a valid paragraph/sentence can't be found. The offset
* will point to the start of the word/sentence in the array, and
* the modelOffset will point to the location of the word/sentence
* in the model.
*/
private IndexedSegment getSegmentAt(int part, int index)
throws BadLocationException {
IndexedSegment seg = getParagraphElementText(index);
if (seg == null) {
return null;
}
BreakIterator iterator;
switch (part) {
case AccessibleText.WORD:
iterator = BreakIterator.getWordInstance(getLocale());
break;
case AccessibleText.SENTENCE:
iterator = BreakIterator.getSentenceInstance(getLocale());
break;
default:
return null;
}
seg.first();
iterator.setText(seg);
int end = iterator.following(index - seg.modelOffset + seg.offset);
if (end == BreakIterator.DONE) {
return null;
}
if (end > seg.offset + seg.count) {
return null;
}
int begin = iterator.previous();
if (begin == BreakIterator.DONE ||
begin >= seg.offset + seg.count) {
return null;
}
seg.modelOffset = seg.modelOffset + begin - seg.offset;
seg.offset = begin;
seg.count = end - begin;
return seg;
}
/**
* Returns the Segment at <code>index</code> representing either
* the paragraph or sentence as identified by <code>part</code>, or
* null if a valid paragraph/sentence can't be found. The offset
* will point to the start of the word/sentence in the array, and
* the modelOffset will point to the location of the word/sentence
* in the model.
*/
private IndexedSegment getSegmentAt(int part, int index)
throws BadLocationException {
IndexedSegment seg = getParagraphElementText(index);
if (seg == null) {
return null;
}
BreakIterator iterator;
switch (part) {
case AccessibleText.WORD:
iterator = BreakIterator.getWordInstance(getLocale());
break;
case AccessibleText.SENTENCE:
iterator = BreakIterator.getSentenceInstance(getLocale());
break;
default:
return null;
}
seg.first();
iterator.setText(seg);
int end = iterator.following(index - seg.modelOffset + seg.offset);
if (end == BreakIterator.DONE) {
return null;
}
if (end > seg.offset + seg.count) {
return null;
}
int begin = iterator.previous();
if (begin == BreakIterator.DONE ||
begin >= seg.offset + seg.count) {
return null;
}
seg.modelOffset = seg.modelOffset + begin - seg.offset;
seg.offset = begin;
seg.count = end - begin;
return seg;
}
private static String[] splitBySentence(String text) {
List<String> sentences = new ArrayList<String>();
// Use Locale.US since the customizer is setting the default (US) locale text only:
BreakIterator it = BreakIterator.getSentenceInstance(Locale.US);
it.setText(text);
int start = it.first();
int end;
while ((end = it.next()) != BreakIterator.DONE) {
sentences.add(text.substring(start, end));
start = end;
}
return sentences.toArray(new String[sentences.size()]);
}
public void testSliceEnd() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("a000", 0, 1, expected, actual);
assertSameBreaks("ab000", 0, 1, expected, actual);
assertSameBreaks("abc000", 0, 1, expected, actual);
assertSameBreaks("000", 0, 0, expected, actual);
}
/**
* Returns the Segment at <code>index</code> representing either
* the paragraph or sentence as identified by <code>part</code>, or
* null if a valid paragraph/sentence can't be found. The offset
* will point to the start of the word/sentence in the array, and
* the modelOffset will point to the location of the word/sentence
* in the model.
*/
private IndexedSegment getSegmentAt(int part, int index)
throws BadLocationException {
IndexedSegment seg = getParagraphElementText(index);
if (seg == null) {
return null;
}
BreakIterator iterator;
switch (part) {
case AccessibleText.WORD:
iterator = BreakIterator.getWordInstance(getLocale());
break;
case AccessibleText.SENTENCE:
iterator = BreakIterator.getSentenceInstance(getLocale());
break;
default:
return null;
}
seg.first();
iterator.setText(seg);
int end = iterator.following(index - seg.modelOffset + seg.offset);
if (end == BreakIterator.DONE) {
return null;
}
if (end > seg.offset + seg.count) {
return null;
}
int begin = iterator.previous();
if (begin == BreakIterator.DONE ||
begin >= seg.offset + seg.count) {
return null;
}
seg.modelOffset = seg.modelOffset + begin - seg.offset;
seg.offset = begin;
seg.count = end - begin;
return seg;
}
/**
* Constructor
*/
DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) {
this.docenv = docenv;
this.localeName = localeName;
this.useBreakIterator = useBreakIterator;
locale = getLocale();
if (locale == null) {
docenv.exit();
} else {
Locale.setDefault(locale); // NOTE: updating global state
}
collator = Collator.getInstance(locale);
sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}
public void TestSentenceInvariants()
{
BreakIterator e = BreakIterator.getSentenceInstance();
doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
}
public SentenceAndWordTokenizer() {
super(newAttributeFactory(), BreakIterator.getSentenceInstance(Locale.ROOT));
}
public void TestSentenceInvariants()
{
BreakIterator e = BreakIterator.getSentenceInstance();
doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
}
public void writeDefaultMessages()
{
Properties defaultMessages = new Properties();
BreakIterator sentenceBreaks = BreakIterator.getSentenceInstance(Locale.US);
for (CompiledPropertyMetadata prop : properties.getProperties())
{
String descriptionMessage = PropertyMetadataConstants.PROPERTY_DESCRIPTION_PREFIX + prop.getName();
if (propertyMessages == null || !propertyMessages.containsKey(descriptionMessage))
{
Element docNode = propertyDocNodes.get(prop.getName());
if (docNode != null)
{
String docText = docNode.getTextContent();
sentenceBreaks.setText(docText);
int first = sentenceBreaks.first();
int next = sentenceBreaks.next();
String firstSentence = docText.substring(first, next);
firstSentence = PATTERN_LEADING_WHITE_SPACE.matcher(firstSentence).replaceAll("");
firstSentence = PATTERN_TRAILING_WHITE_SPACE.matcher(firstSentence).replaceAll("");
defaultMessages.setProperty(descriptionMessage, firstSentence);
}
}
}
if (!defaultMessages.isEmpty())
{
try
{
FileObject res = environment.getFiler().createResource(StandardLocation.CLASS_OUTPUT,
"", properties.getMessagesName() + PropertyMetadataConstants.MESSAGES_DEFAULTS_SUFFIX,
(javax.lang.model.element.Element[]) null);
try (OutputStream out = res.openOutputStream())
{
//TODO lucianc preserve order
defaultMessages.store(out, null);
}
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
}