下面列出了java.lang.Character.UnicodeBlock# LATIN_EXTENDED_B 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Character Normalization
* @param ch
* @return Normalized character
*/
static public char normalize(char ch) {
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch<'A' || (ch<'a' && ch >'Z') || ch>'z') ch = ' ';
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch)>=0) ch = ' ';
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') ch = '\u015f'; // Small S with comma below => with cedilla
if (ch == '\u021b') ch = '\u0163'; // Small T with comma below => with cedilla
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') ch = '\u064a'; // Farsi yeh => Arabic yeh
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') ch = '\u1ec3';
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
/**
* Character Normalization
* @param ch
* @return Normalized character
*/
static public char normalize(char ch) {
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch<'A' || (ch<'a' && ch >'Z') || ch>'z') ch = ' ';
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch)>=0) ch = ' ';
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') ch = '\u015f'; // Small S with comma below => with cedilla
if (ch == '\u021b') ch = '\u0163'; // Small T with comma below => with cedilla
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') ch = '\u064a'; // Farsi yeh => Arabic yeh
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') ch = '\u1ec3';
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}