下面列出了java.util.regex.Pattern#LITERAL 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private void checkRegexp(MethodContext mc, Expression regexExpr, int flags) {
if((flags & Pattern.LITERAL) == 0) {
if(regexExpr.getCode() == AstCode.GetStatic) {
FieldReference fr = (FieldReference) regexExpr.getOperand();
if(fr.getName().equals("separator") && fr.getDeclaringType().getInternalName().equals("java/io/File")) {
mc.report("RegexFileSeparator", 0, regexExpr);
}
} else if(regexExpr.getCode() == AstCode.InvokeVirtual) {
MethodReference mr = (MethodReference)regexExpr.getOperand();
if(mr.getName().equals("getSeparator") && mr.getDeclaringType().getInternalName().equals("java/nio/file/FileSystem")) {
mc.report("RegexFileSeparator", 0, regexExpr);
}
}
}
Object regexObj = Nodes.getConstant(regexExpr);
if(!(regexObj instanceof String)) {
return;
}
String regex = (String)regexObj;
try {
Pattern.compile(regex, flags);
} catch (PatternSyntaxException e) {
mc.report("RegexBadSyntax", 0, regexExpr, Roles.REGEXP.create(regex), ERROR_MESSAGE.create(e.getMessage()));
}
}
/**
* update the finder object with data from our UI
*/
private void updateFinder() {
int flag = 0;
if (!jChkRegex.isSelected()) {
flag |= Pattern.LITERAL;
}
flag |= (jChkIgnoreCase.isSelected()) ? Pattern.CASE_INSENSITIVE : 0;
if (jChkIgnoreCase.isSelected()) {
flag |= Pattern.CASE_INSENSITIVE;
}
String regex = (String) jCmbFind.getSelectedItem();
if (regex != null && regex.length() > 0) {
Pattern pattern = Pattern.compile(regex, flag);
finder.setWrap(jChkWrap.isSelected());
finder.setPattern(pattern);
ActionUtils.insertIntoCombo(jCmbFind, regex);
} else {
finder.setPattern(null);
}
}
private static int parseOptionFlags(String opts) {
int flags = 0;
for(int i = 0; i < opts.length(); ++i) {
switch(opts.charAt(i)) {
// Standard 'special construct match flags'
case 'i': flags |= Pattern.CASE_INSENSITIVE; break;
case 'd': flags |= Pattern.UNIX_LINES; break;
case 'm': flags |= Pattern.MULTILINE; break;
case 's': flags |= Pattern.DOTALL; break;
case 'u': flags |= Pattern.UNICODE_CASE; break;
case 'x': flags |= Pattern.COMMENTS; break;
// And pick a letters for remaining flags
case 'l': flags |= Pattern.LITERAL; break;
case 'c': flags |= Pattern.CANON_EQ; break;
default:
throw new InvalidParameterValueException("Invalid option: " + opts.charAt(i));
}
}
return flags;
}
public void init(InputStream in) throws ConfigurationException {
propertiesConfiguration = new PropertiesConfiguration();
propertiesConfiguration.setDelimiterParsingDisabled(true);
propertiesConfiguration.load(in, "UTF-8");
configuration = new DataConfiguration(propertiesConfiguration);
configuration.setDelimiterParsingDisabled(true);
String pa = configuration.getString(PROP_PATTERN);
int flags = 0;
flags = flags | (configuration.getBoolean(PROP_PATTERN_CANON_EQ, false) ? Pattern.CANON_EQ : 0);
flags = flags | (configuration.getBoolean(PROP_PATTERN_CASE_INSENSITIVE, false) ? Pattern.CASE_INSENSITIVE : 0);
flags = flags | (configuration.getBoolean(PROP_PATTERN_COMMENTS, false) ? Pattern.COMMENTS : 0);
flags = flags | (configuration.getBoolean(PROP_PATTERN_DOTALL, false) ? Pattern.DOTALL : 0);
flags = flags | (configuration.getBoolean(PROP_PATTERN_LITERAL, false) ? Pattern.LITERAL : 0);
flags = flags | (configuration.getBoolean(PROP_PATTERN_MULTILINE, false) ? Pattern.MULTILINE : 0);
flags = flags | (configuration.getBoolean(PROP_PATTERN_UNICODE_CASE, false) ? Pattern.UNICODE_CASE : 0);
flags = flags | (configuration.getBoolean(PROP_PATTERN_UNIX_LINES, false) ? Pattern.UNIX_LINES : 0);
pattern = Pattern.compile(pa, flags);
groupCount = countGroups(pattern);
name = configuration.getString(PROP_NAME, "NAME NOT SET!");
description = configuration.getString(PROP_DESCRIPTION, "DESCRIPTION NOT SET!");
testMessage = configuration.getString(PROP_TEST_MESSAGE, "");
}
private int getRegexFlag() {
int flag = 0;
if (unixLinesCkb.isSelected())
flag |= Pattern.UNIX_LINES;
if (caseInsensitiveCkb.isSelected())
flag |= Pattern.CASE_INSENSITIVE;
if (commentsCkb.isSelected())
flag |= Pattern.COMMENTS;
if (multilineCkb.isSelected())
flag |= Pattern.MULTILINE;
if (literalCkb.isSelected())
flag |= Pattern.LITERAL;
if (dotallCkb.isSelected())
flag |= Pattern.DOTALL;
if (unicodeCaseCkb.isSelected())
flag |= Pattern.UNICODE_CASE;
if (canonEqCkb.isSelected())
flag |= Pattern.CANON_EQ;
return flag;
}
@VisibleForTesting
static Pattern buildPattern( boolean literalParsing, boolean caseSensitive, boolean wholeWord,
String patternString, boolean isUnicode ) {
int flags = 0;
if ( literalParsing && !wholeWord ) {
flags |= Pattern.LITERAL;
}
if ( !caseSensitive ) {
flags |= Pattern.CASE_INSENSITIVE;
}
if ( isUnicode ) {
flags |= Pattern.UNICODE_CHARACTER_CLASS;
}
/*
* XXX: I don't like this parameter. I think it would almost always be better for the user to define either word
* boundaries or ^/$ anchors explicitly in their pattern.
*/
if ( wholeWord ) {
if ( literalParsing ) {
patternString = "\\Q" + patternString + "\\E";
}
patternString = "\\b" + patternString + "\\b";
}
return Pattern.compile( patternString, flags );
}
int getCompileFlags(ProcessContext context) {
int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0)
| (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0)
| (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0)
| (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0)
| (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0)
| (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0)
| (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0)
| (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0)
| (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0);
return flags;
}
public static int flagsFromString(String flags) {
int pFlags = 0;
for (String s : Strings.delimitedListToStringArray(flags, "|")) {
if (s.isEmpty()) {
continue;
}
s = s.toUpperCase(Locale.ROOT);
if ("CASE_INSENSITIVE".equals(s)) {
pFlags |= Pattern.CASE_INSENSITIVE;
} else if ("MULTILINE".equals(s)) {
pFlags |= Pattern.MULTILINE;
} else if ("DOTALL".equals(s)) {
pFlags |= Pattern.DOTALL;
} else if ("UNICODE_CASE".equals(s)) {
pFlags |= Pattern.UNICODE_CASE;
} else if ("CANON_EQ".equals(s)) {
pFlags |= Pattern.CANON_EQ;
} else if ("UNIX_LINES".equals(s)) {
pFlags |= Pattern.UNIX_LINES;
} else if ("LITERAL".equals(s)) {
pFlags |= Pattern.LITERAL;
} else if ("COMMENTS".equals(s)) {
pFlags |= Pattern.COMMENTS;
} else if ("UNICODE_CHAR_CLASS".equals(s)) {
pFlags |= UNICODE_CHARACTER_CLASS;
} else {
throw new IllegalArgumentException("Unknown regex flag [" + s + "]");
}
}
return pFlags;
}
public static String flagsToString(int flags) {
StringBuilder sb = new StringBuilder();
if ((flags & Pattern.CASE_INSENSITIVE) != 0) {
sb.append("CASE_INSENSITIVE|");
}
if ((flags & Pattern.MULTILINE) != 0) {
sb.append("MULTILINE|");
}
if ((flags & Pattern.DOTALL) != 0) {
sb.append("DOTALL|");
}
if ((flags & Pattern.UNICODE_CASE) != 0) {
sb.append("UNICODE_CASE|");
}
if ((flags & Pattern.CANON_EQ) != 0) {
sb.append("CANON_EQ|");
}
if ((flags & Pattern.UNIX_LINES) != 0) {
sb.append("UNIX_LINES|");
}
if ((flags & Pattern.LITERAL) != 0) {
sb.append("LITERAL|");
}
if ((flags & Pattern.COMMENTS) != 0) {
sb.append("COMMENTS|");
}
if ((flags & UNICODE_CHARACTER_CLASS) != 0) {
sb.append("UNICODE_CHAR_CLASS|");
}
return sb.toString();
}
@Test
public void testRegexpFlagParsing() {
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS_REGEXP_FLAG, "case_insensitive", "literal", "comments", "multiline", "unix_lines",
"unicode_case", "dotall", "canon_eq");
try (TermsComponent termsComponent = new TermsComponent()) {
int flags = termsComponent.resolveRegexpFlags(params);
int expected = Pattern.CASE_INSENSITIVE | Pattern.LITERAL | Pattern.COMMENTS | Pattern.MULTILINE | Pattern.UNIX_LINES
| Pattern.UNICODE_CASE | Pattern.DOTALL | Pattern.CANON_EQ;
assertEquals(expected, flags);
} catch (IOException e) {
fail("Error closing TermsComponent");
}
}
private void sawRegExPattern(int stackDepth, int flags) {
if (stack.getStackDepth() < stackDepth) {
return;
}
OpcodeStack.Item it = stack.getStackItem(stackDepth);
if (it.getSpecialKind() == OpcodeStack.Item.FILE_SEPARATOR_STRING && (flags & Pattern.LITERAL) == 0) {
bugReporter.reportBug(new BugInstance(this, "RE_CANT_USE_FILE_SEPARATOR_AS_REGULAR_EXPRESSION", HIGH_PRIORITY)
.addClassAndMethod(this).addCalledMethod(this).addSourceLine(this));
return;
}
Object value = it.getConstant();
if (!(value instanceof String)) {
return;
}
String regex = (String) value;
try {
Pattern.compile(regex, flags);
} catch (IllegalArgumentException e) {
String message = e.getMessage();
int eol = message.indexOf('\n');
if (eol > 0) {
message = message.substring(0, eol);
}
BugInstance bug = new BugInstance(this, "RE_BAD_SYNTAX_FOR_REGULAR_EXPRESSION", HIGH_PRIORITY)
.addClassAndMethod(this).addCalledMethod(this).addString(message).describe(StringAnnotation.ERROR_MSG_ROLE)
.addString(regex).describe(StringAnnotation.REGEX_ROLE);
String options = getOptions(flags);
if (options.length() > 0) {
bug.addString("Regex flags: " + options).describe(StringAnnotation.STRING_MESSAGE);
}
bug.addSourceLine(this);
bugReporter.reportBug(bug);
}
}
/**
* Sets the pattern from a string and flags
* @param pat String of pattern
* @param regex true if the pattern should be a regexp
* @param ignoreCase true to ignore case
* @throws java.util.regex.PatternSyntaxException
*/
public void setPattern(String pat, boolean regex, boolean ignoreCase)
throws PatternSyntaxException {
if (pat != null && pat.length() > 0) {
int flag = (regex) ? 0 : Pattern.LITERAL;
flag |= (ignoreCase) ? Pattern.CASE_INSENSITIVE : 0;
setPattern(Pattern.compile(pat, flag));
} else {
setPattern(null);
}
}
int getCompileFlags(ProcessContext context) {
int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0)
| (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0)
| (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0)
| (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0)
| (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0)
| (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0)
| (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0)
| (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0)
| (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0);
return flags;
}
@VisibleForTesting
static Pattern buildPattern( boolean literalParsing, boolean caseSensitive, boolean wholeWord,
String patternString, boolean isUnicode ) {
int flags = 0;
if ( literalParsing && !wholeWord ) {
flags |= Pattern.LITERAL;
}
if ( !caseSensitive ) {
flags |= Pattern.CASE_INSENSITIVE;
}
if ( isUnicode ) {
flags |= Pattern.UNICODE_CHARACTER_CLASS;
}
/*
* XXX: I don't like this parameter. I think it would almost always be better for the user to define either word
* boundaries or ^/$ anchors explicitly in their pattern.
*/
if ( wholeWord ) {
if ( literalParsing ) {
patternString = "\\Q" + patternString + "\\E";
}
patternString = "\\b" + patternString + "\\b";
}
return Pattern.compile( patternString, flags );
}
public static int flagsFromString(String flags) {
int pFlags = 0;
for (String s : Strings.delimitedListToStringArray(flags, "|")) {
if (s.isEmpty()) {
continue;
}
s = s.toUpperCase(Locale.ROOT);
if ("CASE_INSENSITIVE".equals(s)) {
pFlags |= Pattern.CASE_INSENSITIVE;
} else if ("MULTILINE".equals(s)) {
pFlags |= Pattern.MULTILINE;
} else if ("DOTALL".equals(s)) {
pFlags |= Pattern.DOTALL;
} else if ("UNICODE_CASE".equals(s)) {
pFlags |= Pattern.UNICODE_CASE;
} else if ("CANON_EQ".equals(s)) {
pFlags |= Pattern.CANON_EQ;
} else if ("UNIX_LINES".equals(s)) {
pFlags |= Pattern.UNIX_LINES;
} else if ("LITERAL".equals(s)) {
pFlags |= Pattern.LITERAL;
} else if ("COMMENTS".equals(s)) {
pFlags |= Pattern.COMMENTS;
} else if (("UNICODE_CHAR_CLASS".equals(s)) || ("UNICODE_CHARACTER_CLASS".equals(s))) {
pFlags |= UNICODE_CHARACTER_CLASS;
} else {
throw new IllegalArgumentException("Unknown regex flag [" + s + "]");
}
}
return pFlags;
}
/**
* Setup all patterns for text and column text filters
*/
public void update() {
parentMatches.clear();
// Update text filter pattern
if (!Strings.isValid(xViewer.getCustomizeMgr().getFilterText())) {
textPattern = null;
} else {
int flags = Pattern.CASE_INSENSITIVE;
if (!xViewer.getCustomizeMgr().isFilterTextRegularExpression()) {
flags = Pattern.LITERAL | flags;
}
textPattern = Pattern.compile(xViewer.getCustomizeMgr().getFilterText(), flags);
}
// Update column filter patterns
colIdToPattern.clear();
colIdToDateFilter.clear();
for (String colId : xViewer.getCustomizeMgr().getColumnFilterData().getColIds()) {
String colFilterText = xViewer.getCustomizeMgr().getColumnFilterText(colId);
if (colFilterText != null) {
boolean isWrapped = (colFilterText.matches("^\\(.*\\)$"));
boolean isNot;
if (isWrapped) {
colFilterText = colFilterText.substring(1, colFilterText.length() - 1);
}
isNot = colFilterText.startsWith("!");
if (isNot) {
colFilterText = colFilterText.replaceFirst("^!", "");
}
colFilterText = Pattern.quote(colFilterText);
// Handle != case ^(.(?<!big))*$
if (isNot) {
if (colFilterText.equals("")) {
colIdToPattern.put(colId, NOT_EMPTY_STR_PATTERN);
} else {
colIdToPattern.put(colId,
Pattern.compile("^(.(?<!" + colFilterText + "))*$", Pattern.CASE_INSENSITIVE));
}
}
// Handle normal case
else {
if (colFilterText.equals("")) {
colIdToPattern.put(colId, EMPTY_STR_PATTERN);
} else {
colIdToPattern.put(colId, Pattern.compile(colFilterText, Pattern.CASE_INSENSITIVE));
}
}
}
ColumnDateFilter dateFilter = xViewer.getCustomizeMgr().getColumnDateFilter(colId);
if (dateFilter != null) {
colIdToDateFilter.put(colId, dateFilter);
}
}
}
/**
* Determines whether the two operands match according to the <code>regex</code> operator.
*
* @return <tt>true</tt> if the operands match according to the <tt>regex</tt> operator, <tt>false</tt> otherwise.
*/
public Value evaluate(Regex node, BindingSet bindings)
throws QueryEvaluationException {
Value arg = evaluate(node.getArg(), bindings);
Value parg = evaluate(node.getPatternArg(), bindings);
Value farg = null;
ValueExpr flagsArg = node.getFlagsArg();
if (flagsArg != null) {
farg = evaluate(flagsArg, bindings);
}
if (QueryEvaluationUtil.isStringLiteral(arg) && QueryEvaluationUtil.isSimpleLiteral(parg)
&& (farg == null || QueryEvaluationUtil.isSimpleLiteral(farg))) {
String text = ((Literal) arg).getLabel();
String ptn = ((Literal) parg).getLabel();
String flags = "";
if (farg != null) {
flags = ((Literal) farg).getLabel();
}
// TODO should this Pattern be cached?
int f = 0;
for (char c : flags.toCharArray()) {
switch (c) {
case 's':
f |= Pattern.DOTALL;
break;
case 'm':
f |= Pattern.MULTILINE;
break;
case 'i':
f |= Pattern.CASE_INSENSITIVE;
f |= Pattern.UNICODE_CASE;
break;
case 'x':
f |= Pattern.COMMENTS;
break;
case 'd':
f |= Pattern.UNIX_LINES;
break;
case 'u':
f |= Pattern.UNICODE_CASE;
break;
case 'q':
f |= Pattern.LITERAL;
break;
default:
throw new ValueExprEvaluationException(flags);
}
}
Pattern pattern = Pattern.compile(ptn, f);
boolean result = pattern.matcher(text).find();
return BooleanLiteral.valueOf(result);
}
throw new ValueExprEvaluationException();
}
private static Pattern getPattern(final Value parg, final Value farg)
throws IllegalArgumentException {
if (debug) {
log.debug("regex pattern: " + parg);
log.debug("regex flags: " + farg);
}
//BLZG-1200 Literals with language types are not included in REGEX
if (QueryEvaluationUtil.isPlainLiteral(parg)
&& (farg == null || QueryEvaluationUtil.isPlainLiteral(farg))) {
final String ptn = ((Literal) parg).getLabel();
String flags = "";
if (farg != null) {
flags = ((Literal)farg).getLabel();
}
int f = 0;
for (char c : flags.toCharArray()) {
// See https://www.w3.org/TR/xpath-functions/#flags
switch (c) {
case 's':
f |= Pattern.DOTALL;
break;
case 'm':
f |= Pattern.MULTILINE;
break;
case 'i': {
/*
* The SPARQL REGEX operator is based on the XQuery REGEX
* operator. That operator should be Unicode clean by
* default. Therefore, when case-folding is specified, we
* also need to include the UNICODE_CASE option.
*
* @see <a
* href="https://sourceforge.net/apps/trac/bigdata/ticket/655"
* > SPARQL REGEX operator does not perform case-folding
* correctly for Unicode data </a>
*/
f |= Pattern.CASE_INSENSITIVE;
f |= Pattern.UNICODE_CASE;
break;
}
case 'x':
f |= Pattern.COMMENTS;
break;
case 'd':
f |= Pattern.UNIX_LINES;
break;
case 'u': // Implicit with 'i' flag.
// f |= Pattern.UNICODE_CASE;
break;
case 'q':
f |= Pattern.LITERAL;
break;
default:
throw new IllegalArgumentException();
}
}
final Pattern pattern = Pattern.compile(ptn, f);
return pattern;
}
throw new IllegalArgumentException();
}
private static Pattern getPattern(final Value pattern, final Value flags)
throws IllegalArgumentException {
if (!QueryEvaluationUtil.isSimpleLiteral(pattern)) {
throw new IllegalArgumentException(
"incompatible operand for REPLACE: " + pattern);
}
String flagString = null;
if (flags != null) {
if (!QueryEvaluationUtil.isSimpleLiteral(flags)) {
throw new IllegalArgumentException(
"incompatible operand for REPLACE: " + flags);
}
flagString = ((Literal) flags).getLabel();
}
String patternString = ((Literal) pattern).getLabel();
int f = 0;
if (flagString != null) {
for (char c : flagString.toCharArray()) {
// See https://www.w3.org/TR/xpath-functions/#flags
switch (c) {
case 's':
f |= Pattern.DOTALL;
break;
case 'm':
f |= Pattern.MULTILINE;
break;
case 'i':
f |= Pattern.CASE_INSENSITIVE;
break;
case 'x':
f |= Pattern.COMMENTS;
break;
case 'd':
f |= Pattern.UNIX_LINES;
break;
case 'u':
f |= Pattern.UNICODE_CASE;
break;
case 'q':
f |= Pattern.LITERAL;
break;
default:
throw new IllegalArgumentException(flagString);
}
}
}
Pattern p = Pattern.compile(patternString, f);
return p;
}
/**
* Find the location of the given String in the document. returns -1
* if the search string is not found starting at position <code>start</code>
* @param search The String to search for
* @param start The beginning index of search
* @return
* @deprecated use {@link getMatcher} instead
*/
@Deprecated
public int getIndexOf(String search, int start) {
int flag = Pattern.LITERAL;
Pattern pattern = Pattern.compile(search, flag);
return getIndexOf(pattern, start);
}