下面列出了java.lang.Character.Subset#java.lang.Character.UnicodeBlock 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private static void addAlphabet(Object base,
Character.UnicodeBlock[] alphabet, String language)
throws ResourceParseException, IOException {
boolean b = false;
for (int i = 0; !b && i < alphabet.length; i++) {
b = loadedAlphabets.contains(alphabet[i]) || b;
}
if (!b) {
TeXParser.isLoading = true;
addTeXFontDescription(base,
AjLatexMath.getAssetManager().open(language), language);
for (int i = 0; i < alphabet.length; i++) {
loadedAlphabets.add(alphabet[i]);
}
TeXParser.isLoading = false;
}
}
private static void addAlphabet(Object base,
Character.UnicodeBlock[] alphabet, String language)
throws ResourceParseException, IOException {
boolean b = false;
for (int i = 0; !b && i < alphabet.length; i++) {
b = loadedAlphabets.contains(alphabet[i]) || b;
}
if (!b) {
TeXParser.isLoading = true;
addTeXFontDescription(base,
AjLatexMath.getAssetManager().open(language), language);
for (int i = 0; i < alphabet.length; i++) {
loadedAlphabets.add(alphabet[i]);
}
TeXParser.isLoading = false;
}
}
protected void initGlyphRenderer()
{
glyphRendererBlocks = new HashSet<Character.UnicodeBlock>();
List<PropertySuffix> props = propertiesUtil.getAllProperties(getCurrentJasperPrint(),
PdfReportConfiguration.PROPERTY_PREFIX_GLYPH_RENDERER_BLOCKS);
for (PropertySuffix prop : props)
{
String blocks = prop.getValue();
for (String blockToken : blocks.split(","))
{
UnicodeBlock block = resolveUnicodeBlock(blockToken);
if (block != null)
{
if (log.isDebugEnabled())
{
log.debug("glyph renderer block " + block);
}
glyphRendererBlocks.add(block);
}
}
}
}
protected UnicodeBlock resolveUnicodeBlock(String name)
{
if (name.trim().isEmpty())
{
return null;
}
try
{
return UnicodeBlock.forName(name.trim());
}
catch (IllegalArgumentException e)
{
log.warn("Could not resolve \"" + name + "\" to a Unicode block");
return null;
}
}
public List<Pair<Character, UnitType>> parseWithType(String str) {
List<Pair<Character, UnitType>> result = new ArrayList<>();
int length = str.length();
for (int i = 0; i < length; i++) {
char ch = str.charAt(i);
UnicodeBlock block = UnicodeBlock.of(ch);
if (block == UnicodeBlock.HANGUL_SYLLABLES) {
int cho, jung, jong, tmp;
tmp = ch - 0xAC00;
cho = tmp / (21 * 28);
tmp = tmp % (21 * 28);
jung = tmp / 28;
jong = tmp % 28;
result.add(new Pair<>(ChoSung[cho], UnitType.CHOSUNG));
result.add(new Pair<>(JungSung[jung], UnitType.JUNGSUNG));
if (jong != 0) {
result.add(new Pair<>(JongSung[jong], UnitType.JONGSUNG));
}
} else {
result.add(new Pair<>(ch, UnitType.OTHER));
}
}
return result;
}
static public TTUnicodeRange of(long a_unicode) {
initList();
TTUnicodeRange retval = null;
UnicodeBlock block = UnicodeBlock.of((int) a_unicode);
if (block == null) {
return retval;
}
int i;
for (i = 0; i < s_list.size(); i++) {
TTUnicodeRange range = s_list.get(i);
if (range.m_block.equals(block)) {
return range;
}
}
return retval;
}
protected QueryBuilder buildMatchPhraseQuery(final String f, final String text) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
if (text == null || text.length() != 1
|| (!fessConfig.getIndexFieldTitle().equals(f) && !fessConfig.getIndexFieldContent().equals(f))) {
return QueryBuilders.matchPhraseQuery(f, text);
}
final UnicodeBlock block = UnicodeBlock.of(text.codePointAt(0));
if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS //
|| block == UnicodeBlock.HIRAGANA //
|| block == UnicodeBlock.KATAKANA //
|| block == UnicodeBlock.HANGUL_SYLLABLES //
) {
return QueryBuilders.prefixQuery(f, text);
}
return QueryBuilders.matchPhraseQuery(f, text);
}
private static boolean checkCharContainChinese(char checkChar){
UnicodeBlock ub = UnicodeBlock.of(checkChar);
if(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS == ub ||
UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS == ub ||
UnicodeBlock.CJK_COMPATIBILITY_FORMS == ub ||
UnicodeBlock.CJK_RADICALS_SUPPLEMENT == ub ||
UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A == ub ||
UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B == ub){
return true;
}
return false;
}
private static int findWordStart(CharSequence text, int start) {
if ( text.length() <= start ){
return start;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));
for (; start > 0; start--) {
char c = text.charAt(start - 1);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return start;
}
private static int findWordEnd(CharSequence text, int end) {
int len = text.length();
if ( len <= end ){
return end;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));
for (; end < len; end++) {
char c = text.charAt(end);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return end;
}
public static void addAlphabet(Character.UnicodeBlock alphabet,
InputStream inlanguage, String language, InputStream insymbols,
String symbols, InputStream inmappings, String mappings)
throws ResourceParseException, IOException {
if (!loadedAlphabets.contains(alphabet)) {
addTeXFontDescription(inlanguage, language);
SymbolAtom.addSymbolAtom(insymbols, symbols);
TeXFormula.addSymbolMappings(inmappings, mappings);
loadedAlphabets.add(alphabet);
}
}
public static void addAlphabet(Character.UnicodeBlock alphabet, String name)
throws ResourceParseException, IOException {
String lg = "fonts/" + name + "/language_" + name + ".xml";
String sym = "fonts/" + name + "/symbols_" + name + ".xml";
String map = "fonts/" + name + "/mappings_" + name + ".xml";
try {
DefaultTeXFont.addAlphabet(alphabet, AjLatexMath.getAssetManager()
.open(lg), lg, TeXFormula.class.getResourceAsStream(sym),
sym, TeXFormula.class.getResourceAsStream(map), map);
} catch (FontAlreadyLoadedException e) {
}
}
public static FontInfos getExternalFont(Character.UnicodeBlock block) {
FontInfos infos = externalFontMap.get(block);
if (infos == null) {
infos = new FontInfos("SansSerif", "Serif");
externalFontMap.put(block, infos);
}
return infos;
}
public static void registerExternalFont(Character.UnicodeBlock block,
String sansserif, String serif) {
if (sansserif == null && serif == null) {
externalFontMap.remove(block);
return;
}
externalFontMap.put(block, new FontInfos(sansserif, serif));
if (block.equals(Character.UnicodeBlock.BASIC_LATIN)) {
predefinedTeXFormulas.clear();
}
}
public static void addAlphabet(Character.UnicodeBlock alphabet,
InputStream inlanguage, String language, InputStream insymbols,
String symbols, InputStream inmappings, String mappings)
throws ResourceParseException, IOException {
if (!loadedAlphabets.contains(alphabet)) {
addTeXFontDescription(inlanguage, language);
SymbolAtom.addSymbolAtom(insymbols, symbols);
TeXFormula.addSymbolMappings(inmappings, mappings);
loadedAlphabets.add(alphabet);
}
}
public static void addAlphabet(Character.UnicodeBlock alphabet, String name)
throws ResourceParseException, IOException {
String lg = "fonts/" + name + "/language_" + name + ".xml";
String sym = "fonts/" + name + "/symbols_" + name + ".xml";
String map = "fonts/" + name + "/mappings_" + name + ".xml";
try {
DefaultTeXFont.addAlphabet(alphabet, AjLatexMath.getAssetManager()
.open(lg), lg, TeXFormula.class.getResourceAsStream(sym),
sym, TeXFormula.class.getResourceAsStream(map), map);
} catch (FontAlreadyLoadedException e) {
}
}
public static FontInfos getExternalFont(Character.UnicodeBlock block) {
FontInfos infos = externalFontMap.get(block);
if (infos == null) {
infos = new FontInfos("SansSerif", "Serif");
externalFontMap.put(block, infos);
}
return infos;
}
public static void registerExternalFont(Character.UnicodeBlock block,
String sansserif, String serif) {
if (sansserif == null && serif == null) {
externalFontMap.remove(block);
return;
}
externalFontMap.put(block, new FontInfos(sansserif, serif));
if (block.equals(Character.UnicodeBlock.BASIC_LATIN)) {
predefinedTeXFormulas.clear();
}
}
protected boolean toUseGlyphRenderer(JRPrintText text)
{
String value = styledTextUtil.getTruncatedText(text);
if (value == null)
{
return false;
}
if (glyphRendererBlocks.isEmpty())
{
return false;
}
int charCount = value.length();
char[] chars = new char[charCount];
value.getChars(0, charCount, chars, 0);
for (char c : chars)
{
UnicodeBlock block = UnicodeBlock.of(c);
if (glyphRendererBlocks.contains(block))
{
if (log.isTraceEnabled())
{
log.trace("found character in block " + block + ", using the glyph renderer");
}
return true;
}
}
return false;
}
protected boolean hasComplexLayout(char[] chars)
{
UnicodeBlock prevBlock = null;
for (int i = 0; i < chars.length; i++)
{
char ch = chars[i];
if (ch >= COMPEX_LAYOUT_START_CHAR && ch <= COMPEX_LAYOUT_END_CHAR)
{
//FIXME use icu4j or CharPredicateCache
UnicodeBlock chBlock = Character.UnicodeBlock.of(ch);
if (chBlock == null)
{
// being conservative
return true;
}
// if the same block as the previous block, avoid going to the hash set
// this could offer some speed improvement
if (prevBlock != chBlock)
{
prevBlock = chBlock;
if (!simpleLayoutBlocks.contains(chBlock))
{
return true;
}
}
}
}
return false;
}
/**
* Helper method to determine if a character is a Latin-script letter or not. For our purposes,
* combining marks should also return true since we assume they have been added to a preceding
* Latin character.
*/
// @VisibleForTesting
static boolean isLatinLetter(char letter) {
// Combining marks are a subset of non-spacing-mark.
if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
return false;
}
UnicodeBlock block = UnicodeBlock.of(letter);
return block.equals(UnicodeBlock.BASIC_LATIN)
|| block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT)
|| block.equals(UnicodeBlock.LATIN_EXTENDED_A)
|| block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL)
|| block.equals(UnicodeBlock.LATIN_EXTENDED_B)
|| block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
}
/**
* Loop over all the chars in given {@link UnicodeBlock}s and return a {@link Set <String>}
* containing all the possible values as their {@link String} values.
*
* @param blocks
* the {@link UnicodeBlock}s to loop over
* @return a {@link Set <String>} containing all the possible values as {@link String}
* values
*/
private static Set<String> getAllStringsFromUnicodeBlocks(final UnicodeBlock... blocks)
{
final Set<UnicodeBlock> blockSet = new HashSet<>(Arrays.asList(blocks));
final Set<String> strings = new HashSet<>();
for (int codePoint = 0; codePoint <= Character.MAX_CODE_POINT; codePoint++)
{
if (blockSet.contains(UnicodeBlock.of(codePoint)))
{
final int charCount = Character.charCount(codePoint);
final StringBuilder sb = new StringBuilder(
charCount);
if (charCount == 1)
{
sb.append(String.valueOf((char) codePoint));
}
else if (charCount == 2)
{
sb.append(Character.highSurrogate(codePoint));
sb.append(Character.lowSurrogate(codePoint));
}
else
{
throw new IllegalArgumentException("Character.charCount of "
+ charCount + " not supported.");
}
strings.add(sb.toString());
}
}
return strings;
}
private static List<String> generateTestData()
{
return new LinkedList<String>()
{
private static final long serialVersionUID = 7331717267070233454L;
{
// non-surrogate pair blocks
addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.BASIC_LATIN,
UnicodeBlock.LATIN_1_SUPPLEMENT,
UnicodeBlock.GREEK,
UnicodeBlock.LETTERLIKE_SYMBOLS));
// blocks with surrogate pairs
addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.LINEAR_B_SYLLABARY,
UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
UnicodeBlock.MUSICAL_SYMBOLS,
UnicodeBlock.EMOTICONS,
UnicodeBlock.PLAYING_CARDS,
UnicodeBlock.BOX_DRAWING,
UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
UnicodeBlock.PRIVATE_USE_AREA,
UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B));
// some additional combinations of characters that could cause problems to the encoder
String[] boxDrawing = getAllStringsFromUnicodeBlocks(UnicodeBlock.BOX_DRAWING).toArray(new String[0]);
String[] halfFullWidthForms = getAllStringsFromUnicodeBlocks(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS).toArray(new String[0]);
for (int i = 0; i < halfFullWidthForms.length; i++)
{
add(halfFullWidthForms[i] + boxDrawing[i % boxDrawing.length]);
}
}
};
}
@Override
public String parse(String str) {
StringBuffer result = new StringBuffer();
int i = 0;
int length = str.length();
for (i = 0; i < length; i++) {
char ch = str.charAt(i);
UnicodeBlock block = UnicodeBlock.of(ch);
if (block == UnicodeBlock.HANGUL_SYLLABLES) {
int cho, jung, jong, tmp;
tmp = ch - 0xAC00;
cho = tmp / (21 * 28);
tmp = tmp % (21 * 28);
jung = tmp / 28;
jong = tmp % 28;
result.append(ChoSung[cho]);
result.append(JungSung[jung]);
if (jong != 0) {
result.append(JongSung[jong]);
}
} else {
result.append(ch);
}
}
return result.toString();
}
/**
* 불규칙 사전에 추가
*
* @param paPair
*/
private void appendIrregularDictionary(ProblemAnswerPair paPair) {
if (this.isIrregular(paPair.getProblem(), paPair.getAnswerList())) {
// 자소 단위로 변환하여 불규칙 패턴 추출
List<Pair<String, String>> irrRuleList = irrParser.parse(
this.convertJaso(paPair.getProblem()),
this.convertJaso(paPair.getAnswerList()));
for (Pair<String, String> pair : irrRuleList) {
//트레이닝 셋의 오류로 인한 skip(세종 코퍼스 기준)
if (pair.getSecond().trim().length() == 0) {
} else {
//불규칙 대상에 자소 단위가 포함된 경우 skip
if (this.irrExclusiveSet.contains(pair.getFirst() + "\t" + pair.getSecond().substring(0, pair.getSecond().lastIndexOf("/")))) {
continue;
}
boolean hasJamoProblem = false;
String tmpProblem = this.unitParser.combine(pair.getFirst());
for (int i = 0; i < tmpProblem.length(); i++) {
if (StringUtil.getUnicodeBlock(tmpProblem.charAt(i)) == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO) {
hasJamoProblem = true;
break;
}
}
if (hasJamoProblem) continue;
//놓으 -> 놓+으시와 같은 경우 skip
//않으 -> 않+으시
if (pair.getFirst().endsWith("ㅇㅡ") && pair.getSecond().endsWith("ㅇㅡㅅㅣ/EP")) {
continue;
}
irrDic.append(this.unitParser.combine(pair.getFirst()), this.unitParser.combine(pair.getSecond()));
// irrDic.append(pair.getFirst(), pair.getSecond());
}
}
}
}
/**
* 단어 사전에 형태소, 품사 쌍 데이터 추가
*
* @param answerList
*/
private void appendWordDictionary(List<Pair<String, String>> answerList) {
for (Pair<String, String> pair : answerList) {
if (pair.getFirst().trim().length() == 1) {
if (StringUtil.getUnicodeBlock(pair.getFirst().trim().charAt(0)) == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
&& pair.getSecond().contains("NN")) {
continue;
}
}
if (pair.getSecond().equals("SH") ||
pair.getSecond().equals("SN") ||
pair.getSecond().equals("SL")) {
continue;
}
//analyzer와 의존성이 있는 관계로 rule parser에 해당 내용이 포함되어 있어야함
//근데 이걸 하면 빨라질까?
// if(pair.getSecond().equals("SF") //마침표, 물음표, 느낌표 . ? !
// || pair.getSecond().equals("SP") //쉼표, 가운뎃점, 콜론, 빗금 , / ; :
// || pair.getSecond().equals("SS") //따옴표, 괄호표, 줄표 " ' ` - < > { } [ ] ( )
// || pair.getSecond().equals("SO") //붙임표(물결, 숨김, 빠짐) ~
// ){ //줄임표 ...
// continue;
// }
wordDic.append(pair.getFirst(), pair.getSecond());
}
}
private static int findWordStart(CharSequence text, int start) {
if ( text.length() <= start ){
return start;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));
for (; start > 0; start--) {
char c = text.charAt(start - 1);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return start;
}
private static int findWordEnd(CharSequence text, int end) {
int len = text.length();
if ( len <= end ){
return end;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));
for (; end < len; end++) {
char c = text.charAt(end);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return end;
}
private static boolean isChinese(char c) {
UnicodeBlock ub = UnicodeBlock.of(c);
if(ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
ub == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS||
ub == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A||
ub == UnicodeBlock.GENERAL_PUNCTUATION||
ub == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION||
ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
return true;
return false;
}
public TTUnicodeRange(UnicodeBlock a_block,
long a_start,
long a_end,
int a_osTwoFlag) {
m_block = a_block;
m_start = a_start;
m_end = a_end;
m_osTwoFlag = a_osTwoFlag;
}