下面列出了java.lang.Character.UnicodeBlock# of ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public List<Pair<Character, UnitType>> parseWithType(String str) {
List<Pair<Character, UnitType>> result = new ArrayList<>();
int length = str.length();
for (int i = 0; i < length; i++) {
char ch = str.charAt(i);
UnicodeBlock block = UnicodeBlock.of(ch);
if (block == UnicodeBlock.HANGUL_SYLLABLES) {
int cho, jung, jong, tmp;
tmp = ch - 0xAC00;
cho = tmp / (21 * 28);
tmp = tmp % (21 * 28);
jung = tmp / 28;
jong = tmp % 28;
result.add(new Pair<>(ChoSung[cho], UnitType.CHOSUNG));
result.add(new Pair<>(JungSung[jung], UnitType.JUNGSUNG));
if (jong != 0) {
result.add(new Pair<>(JongSung[jong], UnitType.JONGSUNG));
}
} else {
result.add(new Pair<>(ch, UnitType.OTHER));
}
}
return result;
}
static public TTUnicodeRange of(long a_unicode) {
initList();
TTUnicodeRange retval = null;
UnicodeBlock block = UnicodeBlock.of((int) a_unicode);
if (block == null) {
return retval;
}
int i;
for (i = 0; i < s_list.size(); i++) {
TTUnicodeRange range = s_list.get(i);
if (range.m_block.equals(block)) {
return range;
}
}
return retval;
}
protected QueryBuilder buildMatchPhraseQuery(final String f, final String text) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
if (text == null || text.length() != 1
|| (!fessConfig.getIndexFieldTitle().equals(f) && !fessConfig.getIndexFieldContent().equals(f))) {
return QueryBuilders.matchPhraseQuery(f, text);
}
final UnicodeBlock block = UnicodeBlock.of(text.codePointAt(0));
if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS //
|| block == UnicodeBlock.HIRAGANA //
|| block == UnicodeBlock.KATAKANA //
|| block == UnicodeBlock.HANGUL_SYLLABLES //
) {
return QueryBuilders.prefixQuery(f, text);
}
return QueryBuilders.matchPhraseQuery(f, text);
}
private static boolean checkCharContainChinese(char checkChar){
UnicodeBlock ub = UnicodeBlock.of(checkChar);
if(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS == ub ||
UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS == ub ||
UnicodeBlock.CJK_COMPATIBILITY_FORMS == ub ||
UnicodeBlock.CJK_RADICALS_SUPPLEMENT == ub ||
UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A == ub ||
UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B == ub){
return true;
}
return false;
}
private static int findWordStart(CharSequence text, int start) {
if ( text.length() <= start ){
return start;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));
for (; start > 0; start--) {
char c = text.charAt(start - 1);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return start;
}
private static int findWordEnd(CharSequence text, int end) {
int len = text.length();
if ( len <= end ){
return end;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));
for (; end < len; end++) {
char c = text.charAt(end);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return end;
}
protected boolean toUseGlyphRenderer(JRPrintText text)
{
String value = styledTextUtil.getTruncatedText(text);
if (value == null)
{
return false;
}
if (glyphRendererBlocks.isEmpty())
{
return false;
}
int charCount = value.length();
char[] chars = new char[charCount];
value.getChars(0, charCount, chars, 0);
for (char c : chars)
{
UnicodeBlock block = UnicodeBlock.of(c);
if (glyphRendererBlocks.contains(block))
{
if (log.isTraceEnabled())
{
log.trace("found character in block " + block + ", using the glyph renderer");
}
return true;
}
}
return false;
}
/**
* Helper method to determine if a character is a Latin-script letter or not. For our purposes,
* combining marks should also return true since we assume they have been added to a preceding
* Latin character.
*/
// @VisibleForTesting
static boolean isLatinLetter(char letter) {
// Combining marks are a subset of non-spacing-mark.
if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
return false;
}
UnicodeBlock block = UnicodeBlock.of(letter);
return block.equals(UnicodeBlock.BASIC_LATIN)
|| block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT)
|| block.equals(UnicodeBlock.LATIN_EXTENDED_A)
|| block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL)
|| block.equals(UnicodeBlock.LATIN_EXTENDED_B)
|| block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
}
@Override
public String parse(String str) {
StringBuffer result = new StringBuffer();
int i = 0;
int length = str.length();
for (i = 0; i < length; i++) {
char ch = str.charAt(i);
UnicodeBlock block = UnicodeBlock.of(ch);
if (block == UnicodeBlock.HANGUL_SYLLABLES) {
int cho, jung, jong, tmp;
tmp = ch - 0xAC00;
cho = tmp / (21 * 28);
tmp = tmp % (21 * 28);
jung = tmp / 28;
jong = tmp % 28;
result.append(ChoSung[cho]);
result.append(JungSung[jung]);
if (jong != 0) {
result.append(JongSung[jong]);
}
} else {
result.append(ch);
}
}
return result.toString();
}
private static int findWordStart(CharSequence text, int start) {
if ( text.length() <= start ){
return start;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));
for (; start > 0; start--) {
char c = text.charAt(start - 1);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return start;
}
private static int findWordEnd(CharSequence text, int end) {
int len = text.length();
if ( len <= end ){
return end;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));
for (; end < len; end++) {
char c = text.charAt(end);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return end;
}
private static boolean isChinese(char c) {
UnicodeBlock ub = UnicodeBlock.of(c);
if(ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
ub == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS||
ub == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A||
ub == UnicodeBlock.GENERAL_PUNCTUATION||
ub == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION||
ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
return true;
return false;
}
@Override
boolean isValidLabelForLanguage(String label) {
boolean requiresJapaneseNonExceptionCodepoint = false;
boolean hasJapaneseCodepoint = false;
boolean hasJapaneseNonExceptionCodepoint = false;
final int length = label.length();
int codepoints = 0;
UnicodeBlock precedingUnicodeBlock = null;
for (int i = 0; i < length; ) {
int codepoint = label.codePointAt(i);
UnicodeBlock unicodeBlock = UnicodeBlock.of(codepoint);
boolean isException = JAPANESE_EXCEPTION_CODEPOINTS.contains(codepoint);
boolean isJapanese = JAPANESE_UNICODE_BLOCKS.contains(unicodeBlock);
// A label containing KATAKANA_MIDDLE_DOT or IDEOGRAPHIC_CLOSING_MARK requires a Japanese
// language codepoint to also appear in the label.
if (codepoint == KATAKANA_MIDDLE_DOT || codepoint == IDEOGRAPHIC_CLOSING_MARK) {
requiresJapaneseNonExceptionCodepoint = true;
}
// The KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK can only occur after a HIRAGANA or KATAKANA
// character.
if (codepoint == KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK
&& !Objects.equals(precedingUnicodeBlock, HIRAGANA)
&& !Objects.equals(precedingUnicodeBlock, KATAKANA)) {
return false;
}
// If a codepoint is Japanese but not an "exception" codepoint, then it must a non-exception
// Japanese codepoint.
if (isJapanese && !isException) {
hasJapaneseNonExceptionCodepoint = true;
}
// Make a note if we've seen any Japanese codepoint. Note that this object should really only
// be used on a Japanese IDN table, and thus any non-ASCII codepoint should really be
// Japanese. But we do the additional check again the characters UnicodeBlock just in case.
if (isJapanese) {
hasJapaneseCodepoint = true;
}
// Some codepoints take up more than one character in Java strings (e.g. high and low
// surrogates).
i += Character.charCount(codepoint);
++codepoints;
precedingUnicodeBlock = unicodeBlock;
}
// A label with the KATAKANA MIDDLE DOT or IDEOGRAPHIC_CLOSING_MARK codepoint must also have
// some Japanese character in the label. The Japanese "exception" characters do not count in
// this regard.
if (requiresJapaneseNonExceptionCodepoint && !hasJapaneseNonExceptionCodepoint) {
return false;
}
// Any label with Japanese characters (including "exception" characters) can only be 15
// codepoints long.
return !(hasJapaneseCodepoint && (codepoints > MAX_LENGTH_JAPANESE_STRING));
}
/** Determines if the specified character is a Japanese syllabary. */
static boolean isJapaneseSyllabary(char c) {
UnicodeBlock block = UnicodeBlock.of(c);
return block != null
&& (block.equals(UnicodeBlock.HIRAGANA) || block.equals(UnicodeBlock.KATAKANA));
}