下面列出了java.util.regex.Pattern#UNICODE_CASE 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
protected void edit(final GraphWriteMethods graph, final PluginInteraction interaction, final PluginParameters parameters) throws InterruptedException {
if (findString.isEmpty()) {
findString = "^$";
regex = true;
}
int elementCount = elementType.getElementCount(graph);
String searchString = regex ? findString : Pattern.quote(findString);
int caseSensitivity = ignorecase ? Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE : 0;
Pattern searchPattern = Pattern.compile(searchString, caseSensitivity);
for (Attribute a : selectedAttributes) {
for (int i = 0; i < elementCount; i++) {
int currElement = elementType.getElement(graph, i);
String value = graph.getStringValue(a.getId(), currElement);
if (value != null) {
Matcher match = searchPattern.matcher(value);
String newValue = match.replaceAll(replaceString);
if (!newValue.equals(value)) {
graph.setStringValue(a.getId(), currElement, newValue);
}
}
}
}
}
/**
* Creates a pattern element from the pattern string which is either a reg-ex expression or in
* our old 'StringMatcher' format.
*
* @param pattern The search pattern
* @param isRegex <code>true</code> if the passed string already is a reg-ex pattern
* @param isStringMatcher <code>true</code> if the passed string is in the StringMatcher format.
* @param isCaseSensitive Set to <code>true</code> to create a case insensitive pattern
* @param isWholeWord <code>true</code> to create a pattern that requires a word boundary at the
* beginning and the end.
* @return The created pattern
* @throws PatternSyntaxException if "\R" is at an illegal position
*/
public static Pattern createPattern(String pattern, boolean isRegex, boolean isStringMatcher, boolean isCaseSensitive, boolean isWholeWord) throws PatternSyntaxException {
if (isRegex) {
pattern= substituteLinebreak(pattern);
Assert.isTrue(!isWholeWord, "isWholeWord unsupported together with isRegex"); //$NON-NLS-1$
} else {
int len= pattern.length();
StringBuilder buffer= new StringBuilder(len + 10);
// don't add a word boundary if the search text does not start with
// a word char. (this works around a user input error).
if (isWholeWord && len > 0 && isWordChar(pattern.charAt(0))) {
buffer.append("\\b"); //$NON-NLS-1$
}
appendAsRegEx(isStringMatcher, pattern, buffer);
if (isWholeWord && len > 0 && isWordChar(pattern.charAt(len - 1))) {
buffer.append("\\b"); //$NON-NLS-1$
}
pattern= buffer.toString();
}
int regexOptions= Pattern.MULTILINE;
if (!isCaseSensitive) {
regexOptions|= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
}
return Pattern.compile(pattern, regexOptions);
}
/**
* Create a pattern from the string to find and with options provided
*
* This implementation is drawn from the jface implementation of
* org.eclipse.jface.text.FindReplaceDocumentAdapter
*
* @param findString
* The string to find
* @param caseSensitive
* Tells if the pattern will activate the case sensitive flag
* @param wholeWord
* Tells if the pattern will activate the whole word flag
* @param regExSearch
* Tells if the pattern will activate the regEx flag
* @return The created pattern
*/
private static @NonNull Pattern getPattern(String findString, SearchOptions options) {
String toFind = findString;
int patternFlags = 0;
if (options.regExSearch) {
toFind = substituteLinebreak(toFind);
}
if (!options.caseSensitive) {
patternFlags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
}
if (!options.regExSearch) {
toFind = asRegPattern(toFind);
}
if (options.wholeWord) {
toFind = "\\b" + toFind + "\\b"; //$NON-NLS-1$ //$NON-NLS-2$
}
Pattern pattern = Pattern.compile(toFind, patternFlags);
return pattern;
}
public RegexFilterPattern(SmsFilterPatternData data) {
super(data);
// We need to normalize the pattern ourselves since Android
// doesn't support the CANON_EQ regex flag. Note that this
// only has an effect if the pattern contains the actual
// character (e.g. \u3060), NOT the escape sequence (e.g. \\u3060)
String regexPattern = Normalizer.normalize(getPattern(), Normalizer.Form.NFC);
// If this is a wildcard pattern, convert it to regex syntax
if (getMode() == SmsFilterMode.WILDCARD) {
regexPattern = wildcardToRegex(regexPattern);
}
int regexFlags = Pattern.UNICODE_CASE;
if (!isCaseSensitive()) {
regexFlags |= Pattern.CASE_INSENSITIVE;
}
mMatcher = Pattern.compile(regexPattern, regexFlags).matcher("");
}
private int getRegexFlag() {
int flag = 0;
if (unixLinesCkb.isSelected())
flag |= Pattern.UNIX_LINES;
if (caseInsensitiveCkb.isSelected())
flag |= Pattern.CASE_INSENSITIVE;
if (commentsCkb.isSelected())
flag |= Pattern.COMMENTS;
if (multilineCkb.isSelected())
flag |= Pattern.MULTILINE;
if (literalCkb.isSelected())
flag |= Pattern.LITERAL;
if (dotallCkb.isSelected())
flag |= Pattern.DOTALL;
if (unicodeCaseCkb.isSelected())
flag |= Pattern.UNICODE_CASE;
if (canonEqCkb.isSelected())
flag |= Pattern.CANON_EQ;
return flag;
}
@Override
protected void edit(final GraphWriteMethods graph, final PluginInteraction interaction, final PluginParameters parameters) throws InterruptedException {
if (findString.isEmpty()) {
findString = "^$";
regex = true;
}
boolean found;
final int selectedAttribute = graph.getAttribute(elementType, VisualConcept.VertexAttribute.SELECTED.getName());
final int elementCount = elementType.getElementCount(graph);
// do this if add to selection
if (!addToSelection) {
clearSelection(graph);
}
String searchString = regex ? findString : Pattern.quote(findString);
int caseSensitivity = ignorecase ? Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE : 0;
Pattern searchPattern = Pattern.compile(searchString, caseSensitivity);
for (Attribute a : selectedAttributes) {
for (int i = 0; i < elementCount; i++) {
int currElement = elementType.getElement(graph, i);
String value = graph.getStringValue(a.getId(), currElement);
if (value != null) {
Matcher match = searchPattern.matcher(value);
if (matchWholeWord) {
found = match.matches();
} else {
found = match.find();
}
if (found) {
graph.setBooleanValue(selectedAttribute, currElement, true);
}
}
}
}
}
private static int calculateFlags(List<String> flagList) {
int flags = 0;
if (flagList.contains("multiline")) flags |= Pattern.MULTILINE;
if (flagList.contains("insensitive")) flags |= Pattern.CASE_INSENSITIVE;
if (flagList.contains("unicode")) flags |= Pattern.UNICODE_CASE;
return flags;
}
int getCompileFlags(ProcessContext context) {
int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0)
| (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0)
| (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0)
| (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0)
| (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0)
| (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0)
| (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0)
| (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0)
| (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0);
return flags;
}
public static int parseFlags(@Nullable BytesRef flagsString) {
int flags = 0;
if (flagsString == null) {
return flags;
}
for (char flag : flagsString.utf8ToString().toCharArray()) {
switch (flag) {
case 'i':
flags = flags | Pattern.CASE_INSENSITIVE;
break;
case 'u':
flags = flags | Pattern.UNICODE_CASE;
break;
case 'U':
flags = flags | Pattern.UNICODE_CHARACTER_CLASS;
break;
case 's':
flags = flags | Pattern.DOTALL;
break;
case 'm':
flags = flags | Pattern.MULTILINE;
break;
case 'x':
flags = flags | Pattern.COMMENTS;
break;
case 'd':
flags = flags | Pattern.UNIX_LINES;
break;
default:
break;
}
}
return flags;
}
public static int flagsFromString(String flags) {
int pFlags = 0;
for (String s : Strings.delimitedListToStringArray(flags, "|")) {
if (s.isEmpty()) {
continue;
}
s = s.toUpperCase(Locale.ROOT);
if ("CASE_INSENSITIVE".equals(s)) {
pFlags |= Pattern.CASE_INSENSITIVE;
} else if ("MULTILINE".equals(s)) {
pFlags |= Pattern.MULTILINE;
} else if ("DOTALL".equals(s)) {
pFlags |= Pattern.DOTALL;
} else if ("UNICODE_CASE".equals(s)) {
pFlags |= Pattern.UNICODE_CASE;
} else if ("CANON_EQ".equals(s)) {
pFlags |= Pattern.CANON_EQ;
} else if ("UNIX_LINES".equals(s)) {
pFlags |= Pattern.UNIX_LINES;
} else if ("LITERAL".equals(s)) {
pFlags |= Pattern.LITERAL;
} else if ("COMMENTS".equals(s)) {
pFlags |= Pattern.COMMENTS;
} else if ("UNICODE_CHAR_CLASS".equals(s)) {
pFlags |= UNICODE_CHARACTER_CLASS;
} else {
throw new IllegalArgumentException("Unknown regex flag [" + s + "]");
}
}
return pFlags;
}
public static String flagsToString(int flags) {
StringBuilder sb = new StringBuilder();
if ((flags & Pattern.CASE_INSENSITIVE) != 0) {
sb.append("CASE_INSENSITIVE|");
}
if ((flags & Pattern.MULTILINE) != 0) {
sb.append("MULTILINE|");
}
if ((flags & Pattern.DOTALL) != 0) {
sb.append("DOTALL|");
}
if ((flags & Pattern.UNICODE_CASE) != 0) {
sb.append("UNICODE_CASE|");
}
if ((flags & Pattern.CANON_EQ) != 0) {
sb.append("CANON_EQ|");
}
if ((flags & Pattern.UNIX_LINES) != 0) {
sb.append("UNIX_LINES|");
}
if ((flags & Pattern.LITERAL) != 0) {
sb.append("LITERAL|");
}
if ((flags & Pattern.COMMENTS) != 0) {
sb.append("COMMENTS|");
}
if ((flags & UNICODE_CHARACTER_CLASS) != 0) {
sb.append("UNICODE_CHAR_CLASS|");
}
return sb.toString();
}
@Test
public void testRegexpFlagParsing() {
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS_REGEXP_FLAG, "case_insensitive", "literal", "comments", "multiline", "unix_lines",
"unicode_case", "dotall", "canon_eq");
try (TermsComponent termsComponent = new TermsComponent()) {
int flags = termsComponent.resolveRegexpFlags(params);
int expected = Pattern.CASE_INSENSITIVE | Pattern.LITERAL | Pattern.COMMENTS | Pattern.MULTILINE | Pattern.UNIX_LINES
| Pattern.UNICODE_CASE | Pattern.DOTALL | Pattern.CANON_EQ;
assertEquals(expected, flags);
} catch (IOException e) {
fail("Error closing TermsComponent");
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(3);
if ((flags & Pattern.CASE_INSENSITIVE) != 0) {
sb.append('i');
}
if ((flags & Pattern.MULTILINE) != 0) {
sb.append('m');
}
if ((flags & Pattern.UNICODE_CASE) != 0) {
sb.append('u');
}
return String.format("regex=%s, flags=%s", regex, sb);
}
private Optional<Pattern> getPattern() {
String text = field.getText();
if (Objects.isNull(text) || text.isEmpty()) {
return Optional.empty();
}
String cw = checkWord.isSelected() ? "\\b" : "";
String pattern = String.format("%s%s%s", cw, text, cw);
int flags = checkCase.isSelected() ? 0 : Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
try {
return Optional.of(Pattern.compile(pattern, flags));
} catch (PatternSyntaxException ex) {
field.setBackground(WARNING_COLOR);
return Optional.empty();
}
}
public static int parseFlags(@Nullable String flagsString) {
int flags = 0;
if (flagsString == null) {
return flags;
}
for (char flag : flagsString.toCharArray()) {
switch (flag) {
case 'i':
flags = flags | Pattern.CASE_INSENSITIVE;
break;
case 'u':
flags = flags | Pattern.UNICODE_CASE;
break;
case 'U':
flags = flags | Pattern.UNICODE_CHARACTER_CLASS;
break;
case 's':
flags = flags | Pattern.DOTALL;
break;
case 'm':
flags = flags | Pattern.MULTILINE;
break;
case 'x':
flags = flags | Pattern.COMMENTS;
break;
case 'd':
flags = flags | Pattern.UNIX_LINES;
break;
case ' ':
case 'g':
// handled in isGlobalFunction
break;
default:
throw new IllegalArgumentException("The regular expression flag is unknown: " + flag);
}
}
return flags;
}
/**
* Determines whether the two operands match according to the <code>regex</code> operator.
*
* @return <tt>true</tt> if the operands match according to the <tt>regex</tt> operator, <tt>false</tt> otherwise.
*/
public Value evaluate(Regex node, BindingSet bindings)
throws QueryEvaluationException {
Value arg = evaluate(node.getArg(), bindings);
Value parg = evaluate(node.getPatternArg(), bindings);
Value farg = null;
ValueExpr flagsArg = node.getFlagsArg();
if (flagsArg != null) {
farg = evaluate(flagsArg, bindings);
}
if (QueryEvaluationUtil.isStringLiteral(arg) && QueryEvaluationUtil.isSimpleLiteral(parg)
&& (farg == null || QueryEvaluationUtil.isSimpleLiteral(farg))) {
String text = ((Literal) arg).getLabel();
String ptn = ((Literal) parg).getLabel();
String flags = "";
if (farg != null) {
flags = ((Literal) farg).getLabel();
}
// TODO should this Pattern be cached?
int f = 0;
for (char c : flags.toCharArray()) {
switch (c) {
case 's':
f |= Pattern.DOTALL;
break;
case 'm':
f |= Pattern.MULTILINE;
break;
case 'i':
f |= Pattern.CASE_INSENSITIVE;
f |= Pattern.UNICODE_CASE;
break;
case 'x':
f |= Pattern.COMMENTS;
break;
case 'd':
f |= Pattern.UNIX_LINES;
break;
case 'u':
f |= Pattern.UNICODE_CASE;
break;
case 'q':
f |= Pattern.LITERAL;
break;
default:
throw new ValueExprEvaluationException(flags);
}
}
Pattern pattern = Pattern.compile(ptn, f);
boolean result = pattern.matcher(text).find();
return BooleanLiteral.valueOf(result);
}
throw new ValueExprEvaluationException();
}
/**
* Determines whether the two operands match according to the <code>regex</code> operator.
*
* @return <tt>true</tt> if the operands match according to the
* <tt>regex</tt> operator, <tt>false</tt> otherwise.
*/
private Value evaluate(Regex node, BindingSet bindings) throws ValueExprEvaluationException, QueryEvaluationException {
Value arg = evaluate(node.getArg(), bindings);
Value parg = evaluate(node.getPatternArg(), bindings);
Value farg = null;
ValueExpr flagsArg = node.getFlagsArg();
if (flagsArg != null) {
farg = evaluate(flagsArg, bindings);
}
if (QueryEvaluationUtil.isStringLiteral(arg) && QueryEvaluationUtil.isSimpleLiteral(parg)
&& (farg == null || QueryEvaluationUtil.isSimpleLiteral(farg))) {
String text = ((Literal) arg).getLabel();
String ptn = ((Literal) parg).getLabel();
String flags = "";
if (farg != null) {
flags = ((Literal) farg).getLabel();
}
// TODO should this Pattern be cached?
int f = 0;
for (char c : flags.toCharArray()) {
switch (c) {
case 's':
f |= Pattern.DOTALL;
break;
case 'm':
f |= Pattern.MULTILINE;
break;
case 'i':
f |= Pattern.CASE_INSENSITIVE;
f |= Pattern.UNICODE_CASE;
break;
case 'x':
f |= Pattern.COMMENTS;
break;
case 'd':
f |= Pattern.UNIX_LINES;
break;
case 'u':
f |= Pattern.UNICODE_CASE;
break;
default:
throw new ValueExprEvaluationException(flags);
}
}
Pattern pattern = Pattern.compile(ptn, f);
boolean result = pattern.matcher(text).find();
return BooleanLiteral.valueOf(result);
}
throw new ValueExprEvaluationException();
}
private UEncoding getEncoding() {
if ((this.flags & Pattern.UNICODE_CASE) != 0) {
return UTF32Encoding.INSTANCE;
}
return UCS2Encoding.INSTANCE;
}
private static Pattern getPattern(final Value parg, final Value farg)
throws IllegalArgumentException {
if (debug) {
log.debug("regex pattern: " + parg);
log.debug("regex flags: " + farg);
}
//BLZG-1200 Literals with language types are not included in REGEX
if (QueryEvaluationUtil.isPlainLiteral(parg)
&& (farg == null || QueryEvaluationUtil.isPlainLiteral(farg))) {
final String ptn = ((Literal) parg).getLabel();
String flags = "";
if (farg != null) {
flags = ((Literal)farg).getLabel();
}
int f = 0;
for (char c : flags.toCharArray()) {
// See https://www.w3.org/TR/xpath-functions/#flags
switch (c) {
case 's':
f |= Pattern.DOTALL;
break;
case 'm':
f |= Pattern.MULTILINE;
break;
case 'i': {
/*
* The SPARQL REGEX operator is based on the XQuery REGEX
* operator. That operator should be Unicode clean by
* default. Therefore, when case-folding is specified, we
* also need to include the UNICODE_CASE option.
*
* @see <a
* href="https://sourceforge.net/apps/trac/bigdata/ticket/655"
* > SPARQL REGEX operator does not perform case-folding
* correctly for Unicode data </a>
*/
f |= Pattern.CASE_INSENSITIVE;
f |= Pattern.UNICODE_CASE;
break;
}
case 'x':
f |= Pattern.COMMENTS;
break;
case 'd':
f |= Pattern.UNIX_LINES;
break;
case 'u': // Implicit with 'i' flag.
// f |= Pattern.UNICODE_CASE;
break;
case 'q':
f |= Pattern.LITERAL;
break;
default:
throw new IllegalArgumentException();
}
}
final Pattern pattern = Pattern.compile(ptn, f);
return pattern;
}
throw new IllegalArgumentException();
}
private static Pattern getPattern(final Value pattern, final Value flags)
throws IllegalArgumentException {
if (!QueryEvaluationUtil.isSimpleLiteral(pattern)) {
throw new IllegalArgumentException(
"incompatible operand for REPLACE: " + pattern);
}
String flagString = null;
if (flags != null) {
if (!QueryEvaluationUtil.isSimpleLiteral(flags)) {
throw new IllegalArgumentException(
"incompatible operand for REPLACE: " + flags);
}
flagString = ((Literal) flags).getLabel();
}
String patternString = ((Literal) pattern).getLabel();
int f = 0;
if (flagString != null) {
for (char c : flagString.toCharArray()) {
// See https://www.w3.org/TR/xpath-functions/#flags
switch (c) {
case 's':
f |= Pattern.DOTALL;
break;
case 'm':
f |= Pattern.MULTILINE;
break;
case 'i':
f |= Pattern.CASE_INSENSITIVE;
break;
case 'x':
f |= Pattern.COMMENTS;
break;
case 'd':
f |= Pattern.UNIX_LINES;
break;
case 'u':
f |= Pattern.UNICODE_CASE;
break;
case 'q':
f |= Pattern.LITERAL;
break;
default:
throw new IllegalArgumentException(flagString);
}
}
}
Pattern p = Pattern.compile(patternString, f);
return p;
}