下面列出了java.text.CharacterIterator#previous ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public static int previous32(CharacterIterator ci) {
if (ci.getIndex() <= ci.getBeginIndex()) {
return DONE32;
}
char trail = ci.previous();
int retVal = trail;
if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
char lead = ci.previous();
if (UTF16.isLeadSurrogate(lead)) {
retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
} else {
ci.next();
}
}
return retVal;
}
public static int current32(CharacterIterator ci) {
char lead = ci.current();
int retVal = lead;
if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
return retVal;
}
if (UTF16.isLeadSurrogate(lead)) {
int trail = (int)ci.next();
ci.previous();
if (UTF16.isTrailSurrogate((char)trail)) {
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
(trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
}
} else {
if (lead == CharacterIterator.DONE) {
if (ci.getIndex() >= ci.getEndIndex()) {
retVal = DONE32;
}
}
}
return retVal;
}
/**
* Check if a location is at a hard line break.
*
* @param cit iterator
* @return <code>true</code> if there is a hard line break
*/
private boolean isHardLineBreak(final CharacterIterator cit) {
// save the location while we are checking the preceding characters
final int currentIndex = cit.getIndex();
char currentChar = cit.previous();
while (currentChar != CharacterIterator.DONE && !Character.isLetterOrDigit(currentChar)) {
if (currentChar == '\n') {
cit.setIndex(currentIndex);
return true;
}
currentChar = cit.previous();
}
cit.setIndex(currentIndex);
return false;
}
public static int nextTrail32(CharacterIterator ci, int lead) {
if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
return DONE32;
}
int retVal = lead;
if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
char cTrail = ci.next();
if (UTF16.isTrailSurrogate(cTrail)) {
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
(cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
} else {
ci.previous();
}
}
return retVal;
}
public static int previous32(CharacterIterator ci) {
if (ci.getIndex() <= ci.getBeginIndex()) {
return DONE32;
}
char trail = ci.previous();
int retVal = trail;
if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
char lead = ci.previous();
if (UTF16.isLeadSurrogate(lead)) {
retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
} else {
ci.next();
}
}
return retVal;
}
public static int current32(CharacterIterator ci) {
char lead = ci.current();
int retVal = lead;
if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
return retVal;
}
if (UTF16.isLeadSurrogate(lead)) {
int trail = (int)ci.next();
ci.previous();
if (UTF16.isTrailSurrogate((char)trail)) {
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
(trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
}
} else {
if (lead == CharacterIterator.DONE) {
if (ci.getIndex() >= ci.getEndIndex()) {
retVal = DONE32;
}
}
}
return retVal;
}
/**
* For the given string, returns the number of UTF-8 bytes required to encode the string.
*
* @param string
* text to encode
* @return number of UTF-8 bytes required to encode
*/
public static int utf8Length(String string) {
CharacterIterator iter = new StringCharacterIterator(string);
char ch = iter.first();
int size = 0;
while (ch != CharacterIterator.DONE) {
if ((ch >= 0xD800) && (ch < 0xDC00)) {
// surrogate pair?
char trail = iter.next();
if ((trail > 0xDBFF) && (trail < 0xE000)) {
// valid pair
size += 4;
} else {
// invalid pair
size += 3;
iter.previous(); // rewind one
}
} else if (ch < 0x80) {
size++;
} else if (ch < 0x800) {
size += 2;
} else {
// ch < 0x10000, that is, the largest char value
size += 3;
}
ch = iter.next();
}
return size;
}
private static int codePointBefore(CharacterIterator iter, int index) {
int currentIterIndex = iter.getIndex();
iter.setIndex(index);
char codeUnit = iter.previous();
int cp = codeUnit;
if (Character.isLowSurrogate(codeUnit)) {
char prevUnit = iter.previous();
if (Character.isHighSurrogate(prevUnit)) {
cp = Character.toCodePoint(prevUnit, codeUnit);
}
}
iter.setIndex(currentIterIndex); // restore iter position
return cp;
}
/**
* Move the iterator forward to the next code point, and return that code point,
* leaving the iterator positioned at char returned.
* For Supplementary chars, the iterator is left positioned at the lead surrogate.
* @param ci The character iterator
* @return The next code point.
*/
public static int next32(CharacterIterator ci) {
// If the current position is at a surrogate pair, move to the trail surrogate
// which leaves it in position for underlying iterator's next() to work.
int c = ci.current();
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
c = ci.next();
if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) {
ci.previous();
}
}
// For BMP chars, this next() is the real deal.
c = ci.next();
// If we might have a lead surrogate, we need to peak ahead to get the trail
// even though we don't want to really be positioned there.
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
c = nextTrail32(ci, c);
}
if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
// We got a supplementary char. Back the iterator up to the postion
// of the lead surrogate.
ci.previous();
}
return c;
}
/**
* For the given string, returns the number of UTF-8 bytes
* required to encode the string.
* @param string text to encode
* @return number of UTF-8 bytes required to encode
*/
public static int utf8Length(String string) {
CharacterIterator iter = new StringCharacterIterator(string);
char ch = iter.first();
int size = 0;
while (ch != CharacterIterator.DONE) {
if ((ch >= 0xD800) && (ch < 0xDC00)) {
// surrogate pair?
char trail = iter.next();
if ((trail > 0xDBFF) && (trail < 0xE000)) {
// valid pair
size += 4;
} else {
// invalid pair
size += 3;
iter.previous(); // rewind one
}
} else if (ch < 0x80) {
size++;
} else if (ch < 0x800) {
size += 2;
} else {
// ch < 0x10000, that is, the largest char value
size += 3;
}
ch = iter.next();
}
return size;
}
/**
* For the given string, returns the number of UTF-8 bytes
* required to encode the string.
* @param string text to encode
* @return number of UTF-8 bytes required to encode
*/
public static int utf8Length(String string) {
CharacterIterator iter = new StringCharacterIterator(string);
char ch = iter.first();
int size = 0;
while (ch != CharacterIterator.DONE) {
if ((ch >= 0xD800) && (ch < 0xDC00)) {
// surrogate pair?
char trail = iter.next();
if ((trail > 0xDBFF) && (trail < 0xE000)) {
// valid pair
size += 4;
} else {
// invalid pair
size += 3;
iter.previous(); // rewind one
}
} else if (ch < 0x80) {
size++;
} else if (ch < 0x800) {
size += 2;
} else {
// ch < 0x10000, that is, the largest char value
size += 3;
}
ch = iter.next();
}
return size;
}
public static String reverse(final String value) {
if (value != null) {
final CharacterIterator it = new StringCharacterIterator(value);
final StringBuilder buffer = new StringBuilder();
for (char c = it.last(); c != CharacterIterator.DONE; c = it.previous()) {
buffer.append(c);
}
return buffer.toString();
}
return value;
}
/**
* For the given string, returns the number of UTF-8 bytes
* required to encode the string.
* @param string text to encode
* @return number of UTF-8 bytes required to encode
*/
public static int utf8Length(String string) {
CharacterIterator iter = new StringCharacterIterator(string);
char ch = iter.first();
int size = 0;
while (ch != CharacterIterator.DONE) {
if ((ch >= 0xD800) && (ch < 0xDC00)) {
// surrogate pair?
char trail = iter.next();
if ((trail > 0xDBFF) && (trail < 0xE000)) {
// valid pair
size += 4;
} else {
// invalid pair
size += 3;
iter.previous(); // rewind one
}
} else if (ch < 0x80) {
size++;
} else if (ch < 0x800) {
size += 2;
} else {
// ch < 0x10000, that is, the largest char value
size += 3;
}
ch = iter.next();
}
return size;
}
/**
* For the given string, returns the number of UTF-8 bytes required to
* encode the string.
*
* @param string
* text to encode
* @return number of UTF-8 bytes required to encode
*/
public static int utf8Length(final String string) {
final CharacterIterator iter = new StringCharacterIterator(string);
char ch = iter.first();
int size = 0;
while (ch != CharacterIterator.DONE) {
if ((ch >= 0xD800) && (ch < 0xDC00)) {
// surrogate pair?
char trail = iter.next();
if ((trail > 0xDBFF) && (trail < 0xE000)) {
// valid pair
size += 4;
} else {
// invalid pair
size += 3;
iter.previous(); // rewind one
}
} else if (ch < 0x80) {
size++;
} else if (ch < 0x800) {
size += 2;
} else {
// ch < 0x10000, that is, the largest char value
size += 3;
}
ch = iter.next();
}
return size;
}
public static String reverse(final String value) {
if (value != null) {
final CharacterIterator it = new StringCharacterIterator(value);
final StringBuilder buffer = new StringBuilder();
for (char c = it.last(); c != CharacterIterator.DONE; c = it.previous()) {
buffer.append(c);
}
return buffer.toString();
}
return value;
}
private static int codePointBefore(CharacterIterator iter, int index) {
int currentIterIndex = iter.getIndex();
iter.setIndex(index);
char codeUnit = iter.previous();
int cp = codeUnit;
if (Character.isLowSurrogate(codeUnit)) {
char prevUnit = iter.previous();
if (Character.isHighSurrogate(prevUnit)) {
cp = Character.toCodePoint(prevUnit, codeUnit);
}
}
iter.setIndex(currentIterIndex); // restore iter position
return cp;
}
/**
* Move the iterator forward to the next code point, and return that code point,
* leaving the iterator positioned at char returned.
* For Supplementary chars, the iterator is left positioned at the lead surrogate.
* @param ci The character iterator
* @return The next code point.
*/
public static int next32(CharacterIterator ci) {
// If the current position is at a surrogate pair, move to the trail surrogate
// which leaves it in position for underlying iterator's next() to work.
int c = ci.current();
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
c = ci.next();
if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) {
ci.previous();
}
}
// For BMP chars, this next() is the real deal.
c = ci.next();
// If we might have a lead surrogate, we need to peak ahead to get the trail
// even though we don't want to really be positioned there.
if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
c = nextTrail32(ci, c);
}
if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
// We got a supplementary char. Back the iterator up to the postion
// of the lead surrogate.
ci.previous();
}
return c;
}
/**
* For the given string, returns the number of UTF-8 bytes
* required to encode the string.
* @param string text to encode
* @return number of UTF-8 bytes required to encode
*/
public static int utf8Length(String string) {
CharacterIterator iter = new StringCharacterIterator(string);
char ch = iter.first();
int size = 0;
while (ch != CharacterIterator.DONE) {
if ((ch >= 0xD800) && (ch < 0xDC00)) {
// surrogate pair?
char trail = iter.next();
if ((trail > 0xDBFF) && (trail < 0xE000)) {
// valid pair
size += 4;
} else {
// invalid pair
size += 3;
iter.previous(); // rewind one
}
} else if (ch < 0x80) {
size++;
} else if (ch < 0x800) {
size += 2;
} else {
// ch < 0x10000, that is, the largest char value
size += 3;
}
ch = iter.next();
}
return size;
}
/**
* For the given string, returns the number of UTF-8 bytes
* required to encode the string.
* @param string text to encode
* @return number of UTF-8 bytes required to encode
*/
public static int utf8Length(String string) {
CharacterIterator iter = new StringCharacterIterator(string);
char ch = iter.first();
int size = 0;
while (ch != CharacterIterator.DONE) {
if ((ch >= 0xD800) && (ch < 0xDC00)) {
// surrogate pair?
char trail = iter.next();
if ((trail > 0xDBFF) && (trail < 0xE000)) {
// valid pair
size += 4;
} else {
// invalid pair
size += 3;
iter.previous(); // rewind one
}
} else if (ch < 0x80) {
size++;
} else if (ch < 0x800) {
size += 2;
} else {
// ch < 0x10000, that is, the largest char value
size += 3;
}
ch = iter.next();
}
return size;
}
/**
* Extract link content from a character iterator. It is assumed that the
* '#' has already been eaten. It leaves the character iterator at the first
* character after the link text.
*
* @param ci
* The character iterator.
*
* @return Link text (or an empty string).
*/
protected String extractLink(final CharacterIterator ci) {
final StringBuilder sbuf = new StringBuilder();
char ch = ci.current();
char terminator = ' ';
// color quoted compound words like "#'iron sword'"
if (ch == '\'') {
terminator = ch;
}
while (ch != CharacterIterator.DONE) {
if (ch == terminator) {
if (terminator == ' ') {
/*
* Continued link (#abc #def)?
*/
ch = ci.next();
if (ch == '#') {
ch = ' ';
} else {
ci.previous();
break;
}
} else {
break;
}
}
sbuf.append(ch);
ch = ci.next();
}
/*
* Don't treat word delimiter(s) on the end as link text
*/
int len = sbuf.length();
while (len != 0) {
if (!isWordDelim(sbuf.charAt(--len))) {
len++;
break;
}
sbuf.setLength(len);
ci.previous();
}
/*
* Nothing found?
*/
if (len == 0) {
return null;
}
return sbuf.toString();
}