下面列出了java.util.regex.Pattern#UNICODE_CHARACTER_CLASS 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@VisibleForTesting
static Pattern buildPattern( boolean literalParsing, boolean caseSensitive, boolean wholeWord,
String patternString, boolean isUnicode ) {
int flags = 0;
if ( literalParsing && !wholeWord ) {
flags |= Pattern.LITERAL;
}
if ( !caseSensitive ) {
flags |= Pattern.CASE_INSENSITIVE;
}
if ( isUnicode ) {
flags |= Pattern.UNICODE_CHARACTER_CLASS;
}
/*
* XXX: I don't like this parameter. I think it would almost always be better for the user to define either word
* boundaries or ^/$ anchors explicitly in their pattern.
*/
if ( wholeWord ) {
if ( literalParsing ) {
patternString = "\\Q" + patternString + "\\E";
}
patternString = "\\b" + patternString + "\\b";
}
return Pattern.compile( patternString, flags );
}
int getCompileFlags(ProcessContext context) {
int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0)
| (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0)
| (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0)
| (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0)
| (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0)
| (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0)
| (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0)
| (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0)
| (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0);
return flags;
}
public static int parseFlags(@Nullable BytesRef flagsString) {
int flags = 0;
if (flagsString == null) {
return flags;
}
for (char flag : flagsString.utf8ToString().toCharArray()) {
switch (flag) {
case 'i':
flags = flags | Pattern.CASE_INSENSITIVE;
break;
case 'u':
flags = flags | Pattern.UNICODE_CASE;
break;
case 'U':
flags = flags | Pattern.UNICODE_CHARACTER_CLASS;
break;
case 's':
flags = flags | Pattern.DOTALL;
break;
case 'm':
flags = flags | Pattern.MULTILINE;
break;
case 'x':
flags = flags | Pattern.COMMENTS;
break;
case 'd':
flags = flags | Pattern.UNIX_LINES;
break;
default:
break;
}
}
return flags;
}
int getCompileFlags(ProcessContext context) {
int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0)
| (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0)
| (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0)
| (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0)
| (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0)
| (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0)
| (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0)
| (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0)
| (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0);
return flags;
}
@VisibleForTesting
static Pattern buildPattern( boolean literalParsing, boolean caseSensitive, boolean wholeWord,
String patternString, boolean isUnicode ) {
int flags = 0;
if ( literalParsing && !wholeWord ) {
flags |= Pattern.LITERAL;
}
if ( !caseSensitive ) {
flags |= Pattern.CASE_INSENSITIVE;
}
if ( isUnicode ) {
flags |= Pattern.UNICODE_CHARACTER_CLASS;
}
/*
* XXX: I don't like this parameter. I think it would almost always be better for the user to define either word
* boundaries or ^/$ anchors explicitly in their pattern.
*/
if ( wholeWord ) {
if ( literalParsing ) {
patternString = "\\Q" + patternString + "\\E";
}
patternString = "\\b" + patternString + "\\b";
}
return Pattern.compile( patternString, flags );
}
public static int parseFlags(@Nullable String flagsString) {
int flags = 0;
if (flagsString == null) {
return flags;
}
for (char flag : flagsString.toCharArray()) {
switch (flag) {
case 'i':
flags = flags | Pattern.CASE_INSENSITIVE;
break;
case 'u':
flags = flags | Pattern.UNICODE_CASE;
break;
case 'U':
flags = flags | Pattern.UNICODE_CHARACTER_CLASS;
break;
case 's':
flags = flags | Pattern.DOTALL;
break;
case 'm':
flags = flags | Pattern.MULTILINE;
break;
case 'x':
flags = flags | Pattern.COMMENTS;
break;
case 'd':
flags = flags | Pattern.UNIX_LINES;
break;
case ' ':
case 'g':
// handled in isGlobalFunction
break;
default:
throw new IllegalArgumentException("The regular expression flag is unknown: " + flag);
}
}
return flags;
}
private static int convertFlags(Memory _flags) {
int result = 0;
if (_flags.isNumber()) {
return _flags.toInteger();
}
String flags = _flags.toString();
if (StringMemory.toLong(flags) != null) {
return _flags.toInteger();
}
for (int i = 0; i < flags.length(); i++) {
char c = flags.charAt(i);
switch (c) {
case 'i':
result |= CASE_INSENSITIVE;
break;
case 'm':
result |= MULTILINE;
break;
case 'L':
result |= LITERAL;
break;
case 'd':
result |= UNIX_LINES;
break;
case 'u':
result |= UNICODE_CASE;
break;
case 'U':
result |= Pattern.UNICODE_CHARACTER_CLASS;
break;
case 'x':
result |= COMMENTS;
break;
case 's':
result |= DOTALL;
break;
}
}
return result;
}