下面列出了java.text.CharacterIterator#next ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Calculate break positions eagerly parallel to reading text.
*/
public void setText(CharacterIterator ci) {
int begin = ci.getBeginIndex();
text = new char[ci.getEndIndex() - begin];
int[] breaks0 = new int[text.length + 1];
int brIx = 0;
breaks0[brIx++] = begin;
int charIx = 0;
boolean inWs = false;
for (char c = ci.first(); c != CharacterIterator.DONE; c = ci.next()) {
text[charIx] = c;
boolean ws = Character.isWhitespace(c);
if (inWs && !ws) {
breaks0[brIx++] = charIx + begin;
}
inWs = ws;
charIx++;
}
if (text.length > 0) {
breaks0[brIx++] = text.length + begin;
}
System.arraycopy(breaks0, 0, breaks = new int[brIx], 0, brIx);
}
/**
* Returns a string containing the characters from the given iterator.
*
* @param iterator the iterator (<code>null</code> not permitted).
*
* @return A string.
*/
private String characterIteratorToString(CharacterIterator iterator) {
int endIndex = iterator.getEndIndex();
int beginIndex = iterator.getBeginIndex();
int count = endIndex - beginIndex;
if (count <= 0) {
return "";
}
char[] chars = new char[count];
int i = 0;
char c = iterator.first();
while (c != CharacterIterator.DONE) {
chars[i] = c;
i++;
c = iterator.next();
}
return new String(chars);
}
/**
* Indicates whether or not this <code>Font</code> can display the
* text specified by the <code>iter</code> starting at
* <code>start</code> and ending at <code>limit</code>.
*
* @param iter a {@link CharacterIterator} object
* @param start the specified starting offset into the specified
* <code>CharacterIterator</code>.
* @param limit the specified ending offset into the specified
* <code>CharacterIterator</code>.
* @return an offset into <code>iter</code> that points
* to the first character in <code>iter</code> that this
* <code>Font</code> cannot display; or <code>-1</code> if
* this <code>Font</code> can display all characters in
* <code>iter</code>.
* @since 1.2
*/
public int canDisplayUpTo(CharacterIterator iter, int start, int limit) {
Font2D font2d = getFont2D();
char c = iter.setIndex(start);
for (int i = start; i < limit; i++, c = iter.next()) {
if (font2d.canDisplay(c)) {
continue;
}
if (!Character.isHighSurrogate(c)) {
return i;
}
char c2 = iter.next();
// c2 could be CharacterIterator.DONE which is not a low surrogate.
if (!Character.isLowSurrogate(c2)) {
return i;
}
if (!font2d.canDisplay(Character.toCodePoint(c, c2))) {
return i;
}
i++;
}
return -1;
}
/**
* Indicates whether or not this <code>Font</code> can display the
* text specified by the <code>iter</code> starting at
* <code>start</code> and ending at <code>limit</code>.
*
* @param iter a {@link CharacterIterator} object
* @param start the specified starting offset into the specified
* <code>CharacterIterator</code>.
* @param limit the specified ending offset into the specified
* <code>CharacterIterator</code>.
* @return an offset into <code>iter</code> that points
* to the first character in <code>iter</code> that this
* <code>Font</code> cannot display; or <code>-1</code> if
* this <code>Font</code> can display all characters in
* <code>iter</code>.
* @since 1.2
*/
public int canDisplayUpTo(CharacterIterator iter, int start, int limit) {
Font2D font2d = getFont2D();
char c = iter.setIndex(start);
for (int i = start; i < limit; i++, c = iter.next()) {
if (font2d.canDisplay(c)) {
continue;
}
if (!Character.isHighSurrogate(c)) {
return i;
}
char c2 = iter.next();
// c2 could be CharacterIterator.DONE which is not a low surrogate.
if (!Character.isLowSurrogate(c2)) {
return i;
}
if (!font2d.canDisplay(Character.toCodePoint(c, c2))) {
return i;
}
i++;
}
return -1;
}
private void parseVariable(final CharacterIterator iterator) {
// Consume the starting '{' character
iterator.next();
final String varName = readFragment(iterator, VARIABLE_NAME_TERMINATORS, ESCAPE_CHARS, VARNAME_DISALLOWED_CHARS);
// The iterator is currently pointing to the end character.
if (iterator.current() == ':') {
// Skip the ':' character
iterator.next();
final String regexp = readFragment(iterator, END_OF_VARIABLE_DECLARATION, ESCAPE_CHARS, NONE);
tokens.add(new VariableToken(varName, regexp));
}
else {
tokens.add(new VariableToken(varName, DEFAULT_VARIABLE_REGEXP));
}
// The iterator should now be pointing to the varname end delimiter.
checkArgument(iterator.current() == '}', "Variable does not end with '}' at position %s", iterator.getIndex());
// Consume it.
iterator.next();
}
public static String escapeNonCustomRegex(String path) {
/*
* TODO replace with a regular expression
*/
StringBuilder sb = new StringBuilder();
boolean inCustomRegion = false;
CharacterIterator it = new StringCharacterIterator(path);
for (char ch = it.first(); ch != CharacterIterator.DONE; ch = it.next()) {
if (ch == CUSTOM_REGEX_START) {
inCustomRegion = true;
} else if (ch == CUSTOM_REGEX_END) {
inCustomRegion = false;
}
if (REGEX_SPECIAL_CHARS.contains(ch) && !inCustomRegion) {
sb.append('\\');
}
sb.append(ch);
}
return sb.toString();
}
/**
* Calculate break positions eagerly parallel to reading text.
*/
public void setText(CharacterIterator ci) {
int begin = ci.getBeginIndex();
text = new char[ci.getEndIndex() - begin];
int[] breaks0 = new int[text.length + 1];
int brIx = 0;
breaks0[brIx++] = begin;
int charIx = 0;
boolean inWs = false;
for (char c = ci.first(); c != CharacterIterator.DONE; c = ci.next()) {
text[charIx] = c;
boolean ws = Character.isWhitespace(c);
if (inWs && !ws) {
breaks0[brIx++] = charIx + begin;
}
inWs = ws;
charIx++;
}
if (text.length > 0) {
breaks0[brIx++] = text.length + begin;
}
System.arraycopy(breaks0, 0, breaks = new int[brIx], 0, brIx);
}
/**
* Calculate break positions eagerly parallel to reading text.
*/
public void setText(CharacterIterator ci) {
int begin = ci.getBeginIndex();
text = new char[ci.getEndIndex() - begin];
int[] breaks0 = new int[text.length + 1];
int brIx = 0;
breaks0[brIx++] = begin;
int charIx = 0;
boolean inWs = false;
for (char c = ci.first(); c != CharacterIterator.DONE; c = ci.next()) {
text[charIx] = c;
boolean ws = Character.isWhitespace(c);
if (inWs && !ws) {
breaks0[brIx++] = charIx + begin;
}
inWs = ws;
charIx++;
}
if (text.length > 0) {
breaks0[brIx++] = text.length + begin;
}
System.arraycopy(breaks0, 0, breaks = new int[brIx], 0, brIx);
}
public static int nextTrail32(CharacterIterator ci, int lead) {
if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
return DONE32;
}
int retVal = lead;
if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
char cTrail = ci.next();
if (UTF16.isTrailSurrogate(cTrail)) {
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
(cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
} else {
ci.previous();
}
}
return retVal;
}
/**
* Returns the text that was matched by the most recent call to
* {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
* If the iterator is not pointing at a valid match (e.g. just after
* construction or after {@link #DONE} has been returned,
* returns an empty string.
*
* @return the substring in the target test of the most recent match,
* or null if there is no match currently.
* @see #first
* @see #next
* @see #previous
* @see #last
* @stable ICU 2.0
*/
public String getMatchedText() {
if (search_.matchedLength() > 0) {
int limit = search_.matchedIndex_ + search_.matchedLength();
StringBuilder result = new StringBuilder(search_.matchedLength());
CharacterIterator it = search_.text();
it.setIndex(search_.matchedIndex_);
while (it.getIndex() < limit) {
result.append(it.current());
it.next();
}
it.setIndex(search_.matchedIndex_);
return result.toString();
}
return null;
}
/**
* Returns a string based on the input string, but with all characters
* with ordinal values < 32 or >= 128 replaced with ' '.
*
* @param src - The string to clean
*
* @return The original string if it does not contain any characters
* outside the allowed range, or a new string with any characters
* outside the allowed range converted to ' '
*/
public static String cleanString(String src) {
if (src == null) {
return null;
}
boolean foundBad = false;
final CharacterIterator it = new StringCharacterIterator(src);
for (char c = it.first(); c != CharacterIterator.DONE; c = it.next()) {
if (c < ASCII_PRINTABLE_LOW || c >= ASCII_PRINTABLE_HI) {
foundBad = true;
break;
}
}
if (!foundBad) {
return src;
}
final StringBuilder res = new StringBuilder();
for (char c = it.first(); c != CharacterIterator.DONE; c = it.next()) {
if (c < ASCII_PRINTABLE_LOW || c >= ASCII_PRINTABLE_HI) {
res.append(ASCII_SPACE);
} else {
res.append(c);
}
}
return res.toString();
}
public StandardGlyphVector(Font font, CharacterIterator iter, FontRenderContext frc) {
int offset = iter.getBeginIndex();
char[] text = new char [iter.getEndIndex() - offset];
for(char c = iter.first();
c != CharacterIterator.DONE;
c = iter.next()) {
text[iter.getIndex() - offset] = c;
}
init(font, text, 0, text.length, frc, UNINITIALIZED_FLAGS);
}
/**
* Move the iterator forward to the next code point, and return that code point,
* leaving the iterator positioned at char returned.
* For Supplementary chars, the iterator is left positioned at the lead surrogate.
* @param ci The character iterator
* @return The next code point.
*/
public static int next32(CharacterIterator ci) {
// If the current position is at a surrogate pair, move to the trail surrogate
// which leaves it in position for underlying iterator's next() to work.
int c = ci.current();
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
c = ci.next();
if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) {
ci.previous();
}
}
// For BMP chars, this next() is the real deal.
c = ci.next();
// If we might have a lead surrogate, we need to peak ahead to get the trail
// even though we don't want to really be positioned there.
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
c = nextTrail32(ci, c);
}
if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
// We got a supplementary char. Back the iterator up to the postion
// of the lead surrogate.
ci.previous();
}
return c;
}
public StandardGlyphVector(Font font, CharacterIterator iter, FontRenderContext frc) {
int offset = iter.getBeginIndex();
char[] text = new char [iter.getEndIndex() - offset];
for(char c = iter.first();
c != CharacterIterator.DONE;
c = iter.next()) {
text[iter.getIndex() - offset] = c;
}
init(font, text, 0, text.length, frc, UNINITIALIZED_FLAGS);
}
/**
* Extract link content from a character iterator. It is assumed that the
* '#' has already been eaten. It leaves the character iterator at the first
* character after the link text.
*
* @param ci
* The character iterator.
*
* @return Link text (or an empty string).
*/
protected String extractLink(final CharacterIterator ci) {
final StringBuilder sbuf = new StringBuilder();
char ch = ci.current();
char terminator = ' ';
// color quoted compound words like "#'iron sword'"
if (ch == '\'') {
terminator = ch;
}
while (ch != CharacterIterator.DONE) {
if (ch == terminator) {
if (terminator == ' ') {
/*
* Continued link (#abc #def)?
*/
ch = ci.next();
if (ch == '#') {
ch = ' ';
} else {
ci.previous();
break;
}
} else {
break;
}
}
sbuf.append(ch);
ch = ci.next();
}
/*
* Don't treat word delimiter(s) on the end as link text
*/
int len = sbuf.length();
while (len != 0) {
if (!isWordDelim(sbuf.charAt(--len))) {
len++;
break;
}
sbuf.setLength(len);
ci.previous();
}
/*
* Nothing found?
*/
if (len == 0) {
return null;
}
return sbuf.toString();
}
/**
* Implements JSON string escaping as specified <a href="http://www.ietf.org/rfc/rfc4627.txt">here</a>.
* <ul> <li>The following characters are escaped by prefixing them with a '\' :
* \b,\f,\n,\r,\t,\,"</li> <li>Other control characters in the range 0x0000-0x001F are escaped
* using the \\uXXXX notation</li> <li>UTF-16 surrogate pairs are encoded using the \\uXXXX\\uXXXX
* notation</li> <li>any other character is printed as-is</li> </ul>
*/
static String escapeText(String input) {
StringBuilder builder = new StringBuilder(input.length());
CharacterIterator iter = new StringCharacterIterator(input);
for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
switch (c) {
case '\b':
builder.append("\\b");
break;
case '\f':
builder.append("\\f");
break;
case '\n':
builder.append("\\n");
break;
case '\r':
builder.append("\\r");
break;
case '\t':
builder.append("\\t");
break;
case '\\':
builder.append("\\\\");
break;
case '"':
builder.append("\\\"");
break;
default:
// Check for other control characters
if (c >= 0x0000 && c <= 0x001F) {
appendEscapedUnicode(builder, c);
} else if (Character.isHighSurrogate(c)) {
// Encode the surrogate pair using 2 six-character sequence (\\uXXXX\\uXXXX)
appendEscapedUnicode(builder, c);
c = iter.next();
if (c == CharacterIterator.DONE) {
throw new IllegalArgumentException(
"invalid unicode string: unexpected high surrogate pair value "
+ "without corresponding low value.");
}
appendEscapedUnicode(builder, c);
} else {
// Anything else can be printed as-is
builder.append(c);
}
break;
}
}
return builder.toString();
}
/**
* Escapes a string to make it usable in JavaScript.
* @param s input string
* @return escaped string, without quotes
*/
public static String makeJavaScriptString( String s )
{
StringBuffer output = new StringBuffer( s.length( ) );
CharacterIterator it = new StringCharacterIterator(s);
for (char c = it.first(); c != CharacterIterator.DONE; c = it.next())
{
switch ( c )
{
// backspace
case 0x08:
output.append( BACKSLASH + "b" );
break;
// tab
case 0x09:
output.append( BACKSLASH + "t" );
break;
// newline
case 0x0A:
output.append( BACKSLASH + "n" );
break;
// form feed
case 0x0C:
output.append( BACKSLASH + "f" );
break;
// carriage return
case 0x0D:
output.append( BACKSLASH + "r" );
break;
// single quote
case 0x27:
// double quote
case 0x22:
// slash
case 0x2F:
// backslash
case 0x5C:
output.append( BACKSLASH + c );
break;
// string ranges
default:
output.append( c );
}
}
return output.toString();
}
/**
* Implements JSON string escaping as specified <a href="http://www.ietf.org/rfc/rfc4627.txt">here</a>.
* <ul>
* <li>The following characters are escaped by prefixing them with a '\' : \b,\f,\n,\r,\t,\,"</li>
* <li>Other control characters in the range 0x0000-0x001F are escaped using the \\uXXXX notation</li>
* <li>UTF-16 surrogate pairs are encoded using the \\uXXXX\\uXXXX notation</li>
* <li>any other character is printed as-is</li>
* </ul>
*/
static String escapeText(String input) {
StringBuilder builder = new StringBuilder(input.length());
CharacterIterator iter = new StringCharacterIterator(input);
for(char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
switch(c) {
case '\b':
builder.append("\\b");
break;
case '\f':
builder.append("\\f");
break;
case '\n':
builder.append("\\n");
break;
case '\r':
builder.append("\\r");
break;
case '\t':
builder.append("\\t");
break;
case '\\':
builder.append("\\\\");
break;
case '"':
builder.append("\\\"");
break;
default:
// Check for other control characters
if(c >= 0x0000 && c <= 0x001F) {
appendEscapedUnicode(builder, c);
} else if(Character.isHighSurrogate(c)) {
// Encode the surrogate pair using 2 six-character sequence (\\uXXXX\\uXXXX)
appendEscapedUnicode(builder, c);
c = iter.next();
if(c == CharacterIterator.DONE) throw new IllegalArgumentException("invalid unicode string: unexpected high surrogate pair value without corresponding low value.");
appendEscapedUnicode(builder, c);
} else {
// Anything else can be printed as-is
builder.append(c);
}
break;
}
}
return builder.toString();
}
/**
* Returns the logical bounds of the characters indexed in the
* specified {@link CharacterIterator} in the
* specified <code>FontRenderContext</code>. The logical bounds
* contains the origin, ascent, advance, and height, which includes
* the leading. The logical bounds does not always enclose all the
* text. For example, in some languages and in some fonts, accent
* marks can be positioned above the ascent or below the descent.
* To obtain a visual bounding box, which encloses all the text,
* use the {@link TextLayout#getBounds() getBounds} method of
* <code>TextLayout</code>.
* <p>Note: The returned bounds is in baseline-relative coordinates
* (see {@link java.awt.Font class notes}).
* @param ci the specified <code>CharacterIterator</code>
* @param beginIndex the initial offset in <code>ci</code>
* @param limit the end offset in <code>ci</code>
* @param frc the specified <code>FontRenderContext</code>
* @return a <code>Rectangle2D</code> that is the bounding box of the
* characters indexed in the specified <code>CharacterIterator</code>
* in the specified <code>FontRenderContext</code>.
* @see FontRenderContext
* @see Font#createGlyphVector
* @since 1.2
* @throws IndexOutOfBoundsException if <code>beginIndex</code> is
* less than the start index of <code>ci</code>, or
* <code>limit</code> is greater than the end index of
* <code>ci</code>, or <code>beginIndex</code> is greater
* than <code>limit</code>
*/
public Rectangle2D getStringBounds(CharacterIterator ci,
int beginIndex, int limit,
FontRenderContext frc) {
int start = ci.getBeginIndex();
int end = ci.getEndIndex();
if (beginIndex < start) {
throw new IndexOutOfBoundsException("beginIndex: " + beginIndex);
}
if (limit > end) {
throw new IndexOutOfBoundsException("limit: " + limit);
}
if (beginIndex > limit) {
throw new IndexOutOfBoundsException("range length: " +
(limit - beginIndex));
}
char[] arr = new char[limit - beginIndex];
ci.setIndex(beginIndex);
for(int idx = 0; idx < arr.length; idx++) {
arr[idx] = ci.current();
ci.next();
}
return getStringBounds(arr,0,arr.length,frc);
}
/**
* Returns the logical bounds of the characters indexed in the
* specified {@link CharacterIterator} in the
* specified <code>FontRenderContext</code>. The logical bounds
* contains the origin, ascent, advance, and height, which includes
* the leading. The logical bounds does not always enclose all the
* text. For example, in some languages and in some fonts, accent
* marks can be positioned above the ascent or below the descent.
* To obtain a visual bounding box, which encloses all the text,
* use the {@link TextLayout#getBounds() getBounds} method of
* <code>TextLayout</code>.
* <p>Note: The returned bounds is in baseline-relative coordinates
* (see {@link java.awt.Font class notes}).
* @param ci the specified <code>CharacterIterator</code>
* @param beginIndex the initial offset in <code>ci</code>
* @param limit the end offset in <code>ci</code>
* @param frc the specified <code>FontRenderContext</code>
* @return a <code>Rectangle2D</code> that is the bounding box of the
* characters indexed in the specified <code>CharacterIterator</code>
* in the specified <code>FontRenderContext</code>.
* @see FontRenderContext
* @see Font#createGlyphVector
* @since 1.2
* @throws IndexOutOfBoundsException if <code>beginIndex</code> is
* less than the start index of <code>ci</code>, or
* <code>limit</code> is greater than the end index of
* <code>ci</code>, or <code>beginIndex</code> is greater
* than <code>limit</code>
*/
public Rectangle2D getStringBounds(CharacterIterator ci,
int beginIndex, int limit,
FontRenderContext frc) {
int start = ci.getBeginIndex();
int end = ci.getEndIndex();
if (beginIndex < start) {
throw new IndexOutOfBoundsException("beginIndex: " + beginIndex);
}
if (limit > end) {
throw new IndexOutOfBoundsException("limit: " + limit);
}
if (beginIndex > limit) {
throw new IndexOutOfBoundsException("range length: " +
(limit - beginIndex));
}
char[] arr = new char[limit - beginIndex];
ci.setIndex(beginIndex);
for(int idx = 0; idx < arr.length; idx++) {
arr[idx] = ci.current();
ci.next();
}
return getStringBounds(arr,0,arr.length,frc);
}