下面列出了java.text.BreakIterator#first ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private int countWords(String text, Locale locale) {
int count = 0;
BreakIterator wordIterator = BreakIterator.getWordInstance(locale);
wordIterator.setText(text);
int start = wordIterator.first();
int end = wordIterator.next();
while (end != BreakIterator.DONE) {
char ch = text.charAt(start);
if (Character.isLetterOrDigit(ch)) {
count++;
}
start = end;
end = wordIterator.next();
}
return count;
}
private Vector testFirstAndNext(BreakIterator bi, String text) {
int p = bi.first();
int lastP = p;
Vector<String> result = new Vector<String>();
if (p != 0)
errln("first() returned " + p + " instead of 0");
while (p != BreakIterator.DONE) {
p = bi.next();
if (p != BreakIterator.DONE) {
if (p <= lastP)
errln("next() failed to move forward: next() on position "
+ lastP + " yielded " + p);
result.addElement(text.substring(lastP, p));
}
else {
if (lastP != text.length())
errln("next() returned DONE prematurely: offset was "
+ lastP + " instead of " + text.length());
}
lastP = p;
}
return result;
}
private static Collection<String> splitString(String source, int offset) {
/* 460 */ BreakIterator boundary = BreakIterator.getLineInstance();
/* 461 */ boundary.setText(source);
/* */
/* 463 */ List lines = Lists.newArrayList();
/* 464 */ StringBuilder currentLine = new StringBuilder();
/* 465 */ int start = boundary.first();
/* */
/* 467 */ int end = boundary.next();
/* 468 */ while (end != -1)
/* */ {
/* 470 */ String b = source.substring(start, end);
/* 471 */ if (currentLine.length() + b.length() < offset) {
/* 472 */ currentLine.append(b);
/* */ }
/* */ else {
/* 475 */ lines.add(currentLine.toString());
/* 476 */ currentLine = new StringBuilder(b);
/* */ }
/* 469 */ start = end; end = boundary.next();
/* */ }
/* */
/* 479 */ lines.add(currentLine.toString());
/* 480 */ return lines;
/* */ }
public static String wrapText(String someText, JComponent jComp, int lenLimit) {
BreakIterator iterator = BreakIterator.getWordInstance(Locale.getDefault());
iterator.setText(someText);
int start = iterator.first();
int end = iterator.next();
FontMetrics fm = jComp.getFontMetrics(jComp.getFont());
String s = "<html>";
int len = 0;
while (end != BreakIterator.DONE) {
String word = someText.substring(start, end);
if (len + fm.stringWidth(word) > lenLimit) {
s += "<br> ";
len = fm.stringWidth(word);
} else {
len += fm.stringWidth(word);
}
s += word;
start = end;
end = iterator.next();
}
s += "</html>";
return s;
}
/**
* Checks if a String is a multi word unit.
*
* @param t
* the t
* @return true, if is multi word
*/
private boolean isMultiWord(String t) {
BreakIterator tokenBreaker = BreakIterator.getWordInstance(locale);
tokenBreaker.setText(t);
// count tokens
int pos = tokenBreaker.first();
int nTokens = 0;
while (pos != BreakIterator.DONE) {
nTokens++;
pos = tokenBreaker.next();
}
nTokens = nTokens / 2;
return nTokens > 1;
}
private void doMultipleSelectionTest(BreakIterator iterator, String testText)
{
logln("Multiple selection test...");
BreakIterator testIterator = (BreakIterator)iterator.clone();
int offset = iterator.first();
int testOffset;
int count = 0;
do {
testOffset = testIterator.first();
testOffset = testIterator.next(count);
logln("next(" + count + ") -> " + testOffset);
if (offset != testOffset)
errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
if (offset != BreakIterator.DONE) {
count++;
offset = iterator.next();
}
} while (offset != BreakIterator.DONE);
// now do it backwards...
offset = iterator.last();
count = 0;
do {
testOffset = testIterator.last();
testOffset = testIterator.next(count);
logln("next(" + count + ") -> " + testOffset);
if (offset != testOffset)
errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
if (offset != BreakIterator.DONE) {
count--;
offset = iterator.previous();
}
} while (offset != BreakIterator.DONE);
}
/**
* Take the given filter text and break it down into words using a BreakIterator.
*
* @param text
* @return an array of words
*/
@SuppressWarnings({"rawtypes", "unchecked"})
private String[] getWords(String text) {
List words = new ArrayList();
// Break the text up into words, separating based on whitespace and
// common punctuation.
// Previously used String.split(..., "\\W"), where "\W" is a regular
// expression (see the Javadoc for class Pattern).
// Need to avoid both String.split and regular expressions, in order to
// compile against JCL Foundation (bug 80053).
// Also need to do this in an NL-sensitive way. The use of BreakIterator
// was suggested in bug 90579.
BreakIterator iter = BreakIterator.getWordInstance();
iter.setText(text);
int i = iter.first();
while (i != java.text.BreakIterator.DONE && i < text.length()) {
int j = iter.following(i);
if (j == java.text.BreakIterator.DONE) {
j = text.length();
}
// match the word
if (Character.isLetterOrDigit(text.charAt(i))) {
String word = text.substring(i, j);
words.add(word);
}
i = j;
}
return (String[]) words.toArray(new String[words.size()]);
}
MirroredBreakIterator(BreakIterator bi) {
List<Integer> b = new ArrayList<Integer>();
int i = bi.first();
charIndex = i;
for (; i != DONE; i = bi.next()) {
b.add(i);
}
boundaries = Collections.unmodifiableList(b);
}
private static void fillBreaks(@NonNull String text, @NonNull byte[] breaks, byte type) {
BreakIterator iterator;
switch (type) {
case CHARACTER:
iterator = BreakIterator.getCharacterInstance();
break;
default:
iterator = BreakIterator.getLineInstance();
break;
}
iterator.setText(text);
iterator.first();
byte forwardType = typeMode(type, true);
int charNext;
while ((charNext = iterator.next()) != BreakIterator.DONE) {
breaks[charNext - 1] |= forwardType;
}
iterator.last();
byte backwardType = typeMode(type, false);
int charIndex;
while ((charIndex = iterator.previous()) != BreakIterator.DONE) {
breaks[charIndex] |= backwardType;
}
}
MirroredBreakIterator(BreakIterator bi) {
List<Integer> b = new ArrayList<Integer>();
int i = bi.first();
charIndex = i;
for (; i != DONE; i = bi.next()) {
b.add(i);
}
boundaries = Collections.unmodifiableList(b);
}
/**
* Paragraph-fill the specified input text, indenting lines to 'indent' and
* wrapping lines at 'width'. Returns the formatted result.
*/
static String paragraphFill(String in, int indent, int width) {
String indentString = Strings.repeat(" ", indent);
StringBuilder out = new StringBuilder();
String sep = "";
for (String paragraph : NEWLINE_SPLITTER.split(in)) {
// TODO(ccalvarin) break iterators expect hyphenated words to be line-breakable, which looks
// funny for --flag
BreakIterator boundary = BreakIterator.getLineInstance(); // (factory)
boundary.setText(paragraph);
out.append(sep).append(indentString);
int cursor = indent;
for (int start = boundary.first(), end = boundary.next();
end != BreakIterator.DONE;
start = end, end = boundary.next()) {
String word =
paragraph.substring(start, end); // (may include trailing space)
if (word.length() + cursor > width) {
out.append('\n').append(indentString);
cursor = indent;
}
out.append(word);
cursor += word.length();
}
sep = "\n";
}
return out.toString();
}
MirroredBreakIterator(BreakIterator bi) {
List<Integer> b = new ArrayList<Integer>();
int i = bi.first();
charIndex = i;
for (; i != DONE; i = bi.next()) {
b.add(i);
}
boundaries = Collections.unmodifiableList(b);
}
MirroredBreakIterator(BreakIterator bi) {
List<Integer> b = new ArrayList<Integer>();
int i = bi.first();
charIndex = i;
for (; i != DONE; i = bi.next()) {
b.add(i);
}
boundaries = Collections.unmodifiableList(b);
}
private void _display(String caption, String text, String messageType) {
captionLabel.setText(caption);
BreakIterator iter = BreakIterator.getWordInstance();
if (text != null) {
iter.setText(text);
int start = iter.first(), end;
int nLines = 0;
do {
end = iter.next();
if (end == BreakIterator.DONE ||
text.substring(start, end).length() >= 50)
{
lineLabels[nLines].setText(text.substring(start, end == BreakIterator.DONE ?
iter.last() : end));
textPanel.add(lineLabels[nLines++]);
start = end;
}
if (nLines == BALLOON_WORD_LINE_MAX_COUNT) {
if (end != BreakIterator.DONE) {
lineLabels[nLines - 1].setText(
new String(lineLabels[nLines - 1].getText() + " ..."));
}
break;
}
} while (end != BreakIterator.DONE);
textPanel.setLayout(new GridLayout(nLines, 1));
}
if ("ERROR".equals(messageType)) {
iconImage = errorImage;
} else if ("WARNING".equals(messageType)) {
iconImage = warnImage;
} else if ("INFO".equals(messageType)) {
iconImage = infoImage;
} else {
iconImage = null;
}
if (iconImage != null) {
Dimension tpSize = textPanel.getSize();
iconCanvas.setSize(BALLOON_ICON_WIDTH, (BALLOON_ICON_HEIGHT > tpSize.height ?
BALLOON_ICON_HEIGHT : tpSize.height));
iconCanvas.validate();
}
SunToolkit.executeOnEventHandlerThread(target, new Runnable() {
public void run() {
if (liveArguments.isDisposed()) {
return;
}
Point parLoc = getParent().getLocationOnScreen();
Dimension parSize = getParent().getSize();
show(new Point(parLoc.x + parSize.width/2, parLoc.y + parSize.height/2),
BALLOON_TRAY_ICON_INDENT);
if (iconImage != null) {
iconCanvas.updateImage(iconImage); // call it after the show(..) above
}
}
});
}
private void _display(String caption, String text, String messageType) {
captionLabel.setText(caption);
BreakIterator iter = BreakIterator.getWordInstance();
if (text != null) {
iter.setText(text);
int start = iter.first(), end;
int nLines = 0;
do {
end = iter.next();
if (end == BreakIterator.DONE ||
text.substring(start, end).length() >= 50)
{
lineLabels[nLines].setText(text.substring(start, end == BreakIterator.DONE ?
iter.last() : end));
textPanel.add(lineLabels[nLines++]);
start = end;
}
if (nLines == BALLOON_WORD_LINE_MAX_COUNT) {
if (end != BreakIterator.DONE) {
lineLabels[nLines - 1].setText(
new String(lineLabels[nLines - 1].getText() + " ..."));
}
break;
}
} while (end != BreakIterator.DONE);
textPanel.setLayout(new GridLayout(nLines, 1));
}
if ("ERROR".equals(messageType)) {
iconImage = errorImage;
} else if ("WARNING".equals(messageType)) {
iconImage = warnImage;
} else if ("INFO".equals(messageType)) {
iconImage = infoImage;
} else {
iconImage = null;
}
if (iconImage != null) {
Dimension tpSize = textPanel.getSize();
iconCanvas.setSize(BALLOON_ICON_WIDTH, (BALLOON_ICON_HEIGHT > tpSize.height ?
BALLOON_ICON_HEIGHT : tpSize.height));
iconCanvas.validate();
}
SunToolkit.executeOnEventHandlerThread(target, new Runnable() {
public void run() {
if (liveArguments.isDisposed()) {
return;
}
Point parLoc = getParent().getLocationOnScreen();
Dimension parSize = getParent().getSize();
show(new Point(parLoc.x + parSize.width/2, parLoc.y + parSize.height/2),
BALLOON_TRAY_ICON_INDENT);
if (iconImage != null) {
iconCanvas.updateImage(iconImage); // call it after the show(..) above
}
}
});
}
/** Asserts that two breakiterators break the text the same way */
public static void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) {
expected.setText(one);
actual.setText(two);
assertEquals(expected.current(), actual.current());
// next()
int v = expected.current();
while (v != BreakIterator.DONE) {
assertEquals(v = expected.next(), actual.next());
assertEquals(expected.current(), actual.current());
}
// first()
assertEquals(expected.first(), actual.first());
assertEquals(expected.current(), actual.current());
// last()
assertEquals(expected.last(), actual.last());
assertEquals(expected.current(), actual.current());
// previous()
v = expected.current();
while (v != BreakIterator.DONE) {
assertEquals(v = expected.previous(), actual.previous());
assertEquals(expected.current(), actual.current());
}
// following()
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
expected.first();
actual.first();
assertEquals(expected.following(i), actual.following(i));
assertEquals(expected.current(), actual.current());
}
// preceding()
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
expected.last();
actual.last();
assertEquals(expected.preceding(i), actual.preceding(i));
assertEquals(expected.current(), actual.current());
}
}
private void _display(String caption, String text, String messageType) {
captionLabel.setText(caption);
BreakIterator iter = BreakIterator.getWordInstance();
if (text != null) {
iter.setText(text);
int start = iter.first(), end;
int nLines = 0;
do {
end = iter.next();
if (end == BreakIterator.DONE ||
text.substring(start, end).length() >= 50)
{
lineLabels[nLines].setText(text.substring(start, end == BreakIterator.DONE ?
iter.last() : end));
textPanel.add(lineLabels[nLines++]);
start = end;
}
if (nLines == BALLOON_WORD_LINE_MAX_COUNT) {
if (end != BreakIterator.DONE) {
lineLabels[nLines - 1].setText(
new String(lineLabels[nLines - 1].getText() + " ..."));
}
break;
}
} while (end != BreakIterator.DONE);
textPanel.setLayout(new GridLayout(nLines, 1));
}
if ("ERROR".equals(messageType)) {
iconImage = errorImage;
} else if ("WARNING".equals(messageType)) {
iconImage = warnImage;
} else if ("INFO".equals(messageType)) {
iconImage = infoImage;
} else {
iconImage = null;
}
if (iconImage != null) {
Dimension tpSize = textPanel.getSize();
iconCanvas.setSize(BALLOON_ICON_WIDTH, (BALLOON_ICON_HEIGHT > tpSize.height ?
BALLOON_ICON_HEIGHT : tpSize.height));
iconCanvas.validate();
}
SunToolkit.executeOnEventHandlerThread(target, new Runnable() {
public void run() {
if (liveArguments.isDisposed()) {
return;
}
Point parLoc = getParent().getLocationOnScreen();
Dimension parSize = getParent().getSize();
show(new Point(parLoc.x + parSize.width/2, parLoc.y + parSize.height/2),
BALLOON_TRAY_ICON_INDENT);
if (iconImage != null) {
iconCanvas.updateImage(iconImage); // call it after the show(..) above
}
}
});
}
/**
* Formats a long string into a 72-column, indented paragraph
*
* @param text
* The text to be filled
* @param indent
* The number of spaces to indent
*
* author David Whitlock
*/
static String fillParagraph(String text, int indent) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw, true);
String indentString = "";
for (int i = 0; i < indent; i++) {
indentString += " ";
}
pw.print(indentString);
int printed = indentString.length();
boolean firstWord = true;
BreakIterator boundary = BreakIterator.getWordInstance();
boundary.setText(text);
int start = boundary.first();
for (int end = boundary.next(); end != BreakIterator.DONE;
start = end, end = boundary.next()) {
String word = text.substring(start, end);
if (printed + word.length() > 72) {
pw.println("");
pw.print(indentString);
printed = indentString.length();
firstWord = true;
}
if (word.charAt(word.length() - 1) == '\n') {
pw.write(word, 0, word.length() - 1);
} else if (firstWord &&
Character.isWhitespace(word.charAt(0))) {
pw.write(word, 1, word.length() - 1);
} else {
pw.print(word);
}
printed += (end - start);
firstWord = false;
}
return sw.toString();
}
private void formatText(PrintWriter writer, String target, int initialLength) {
BreakIterator boundary = BreakIterator.getLineInstance();
boundary.setText(target);
int start = boundary.first();
int end = boundary.next();
int lineLength = initialLength;
while (end != BreakIterator.DONE) {
// Look at the end and only accept whitespace breaks
char endChar = target.charAt(end-1);
while (!Character.isWhitespace(endChar)) {
int lastEnd = end;
end = boundary.next();
if (end == BreakIterator.DONE) {
// give up. We are at the end of the string
end = lastEnd;
break;
}
endChar = target.charAt(end-1);
}
int wordEnd = end;
if (endChar == '\n') {
// trim off the \n since println will do it for us
wordEnd--;
if (wordEnd > 0 && target.charAt(wordEnd-1) == '\r') {
wordEnd--;
}
} else if (endChar == '\t') {
// figure tabs use 8 characters
lineLength += 7;
}
String word = target.substring(start, wordEnd);
lineLength += word.length();
writer.print(word);
if (endChar == '\n' || endChar == '\r') {
// force end of line
writer.println();
writer.print(" ");
lineLength = 2;
}
start = end;
end = boundary.next();
}
if (lineLength != 0) {
writer.println();
}
}
/**
* Bug 4638433
*/
public void TestLineBreakBasedOnUnicode3_0_0()
{
BreakIterator iter;
int i;
/* Latin Extend-B characters
* 0x0218-0x0233 which have been added since Unicode 3.0.0.
*/
iter = BreakIterator.getWordInstance(Locale.US);
iter.setText("\u0216\u0217\u0218\u0219\u021A");
i = iter.first();
i = iter.next();
if (i != 5) {
errln("Word break failure: failed to stop at 5 and bounded at " + i);
}
iter = BreakIterator.getLineInstance(Locale.US);
/* <Three(Nd)><Two(Nd)><Low Double Prime Quotation Mark(Pe)><One(Nd)>
* \u301f has changed its category from Ps to Pe since Unicode 2.1.
*/
iter.setText("32\u301f1");
i = iter.first();
i = iter.next();
if (i != 3) {
errln("Line break failure: failed to skip before \\u301F(Pe) at 3 and bounded at " + i);
}
/* Mongolian <Letter A(Lo)><Todo Soft Hyphen(Pd)><Letter E(Lo)>
* which have been added since Unicode 3.0.0.
*/
iter.setText("\u1820\u1806\u1821");
i = iter.first();
i = iter.next();
if (i != 2) {
errln("Mongolian line break failure: failed to skip position before \\u1806(Pd) at 2 and bounded at " + i);
}
/* Khmer <ZERO(Nd)><Currency Symbol(Sc)><ONE(Nd)> which have
* been added since Unicode 3.0.0.
*/
iter.setText("\u17E0\u17DB\u17E1");
i = iter.first();
i = iter.next();
if (i != 1) {
errln("Khmer line break failure: failed to stop before \\u17DB(Sc) at 1 and bounded at " + i);
}
i = iter.next();
if (i != 3) {
errln("Khmer line break failure: failed to skip position after \\u17DB(Sc) at 3 and bounded at " + i);
}
/* Ogham <Letter UR(Lo)><Space Mark(Zs)><Letter OR(Lo)> which have
* been added since Unicode 3.0.0.
*/
iter.setText("\u1692\u1680\u1696");
i = iter.first();
i = iter.next();
if (i != 2) {
errln("Ogham line break failure: failed to skip postion before \\u1680(Zs) at 2 and bounded at " + i);
}
// Confirm changes in BreakIteratorRules_th.java have been reflected.
iter = BreakIterator.getLineInstance(new Locale("th", ""));
/* Thai <Seven(Nd)>
* <Left Double Quotation Mark(Pi)>
* <Five(Nd)>
* <Right Double Quotation Mark(Pf)>
* <Three(Nd)>
*/
iter.setText("\u0E57\u201C\u0E55\u201D\u0E53");
i = iter.first();
i = iter.next();
if (i != 1) {
errln("Thai line break failure: failed to stop before \\u201C(Pi) at 1 and bounded at " + i);
}
i = iter.next();
if (i != 4) {
errln("Thai line break failure: failed to stop after \\u201D(Pf) at 4 and bounded at " + i);
}
}