下面列出了java.util.regex.MatchResult#end ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
int skipQuoted(String line, Matcher quote, MatchResult q) {
// First is a quote, skip until next (not escaped) quote is seen
int p = q.end();
int p1 = quoteRestart(q);
MatchResult q1 = find(quote, p1);
if (q1 == null) {
// No more quotes in the line, we are inside a string not closed in this line
eventHandler.onQuoted(line.substring(p), true, false);
isInsideString = true;
p = line.length();
} else {
// Emit this string and advance
eventHandler.onQuoted(line.substring(p, quoteStart(q1)), true, true);
p = q1.end();
}
return p;
}
int skipBlockComment(String line, Matcher blockCommentEnd, MatchResult bcs) {
// We go through a block comment start and look for a block comment end
int p = bcs.end();
MatchResult bce = find(blockCommentEnd, p);
if (bce == null) {
// No block comment end found in this line, start a multi-line comment
eventHandler.onBlockComment(line.substring(p), true, false);
isInsideBlockComment = true;
p = line.length();
} else {
// Emit this block comment and advance
eventHandler.onBlockComment(line.substring(p, bce.start()), true, true);
p = bce.end();
}
return p;
}
/**
* Check whether the part of speech constraint defined in a rule is satisfied.
*
* @param s
* @param posConstraint
* @param m
* @param jcas
* @return
*/
public boolean checkPosConstraint(Sentence s, String posConstraint, MatchResult m, JCas jcas) {
Pattern paConstraint = Pattern.compile("group\\(([0-9]+)\\):(.*?):");
for (MatchResult mr : Toolbox.findMatches(paConstraint, posConstraint)) {
int groupNumber = Integer.parseInt(mr.group(1));
int tokenBegin = s.getBegin() + m.start(groupNumber);
int tokenEnd = s.getBegin() + m.end(groupNumber);
String pos = mr.group(2);
String pos_as_is = getPosFromMatchResult(tokenBegin, tokenEnd, s, jcas);
if (pos_as_is.matches(pos)) {
Logger.printDetail("POS CONSTRAINT IS VALID: pos should be " + pos + " and is " + pos_as_is);
} else {
return false;
}
}
return true;
}
public String replaceMatches(CharSequence charSequence, Callback callback) throws CallbackMatcherException {
StringBuilder result = new StringBuilder(charSequence);
final Matcher matcher = this.pattern.matcher(charSequence);
int offset = 0;
while (matcher.find()) {
final MatchResult matchResult = matcher.toMatchResult();
final String replacement = callback.foundMatch(matchResult);
if (replacement == null) {
continue;
}
int matchStart = offset + matchResult.start();
int matchEnd = offset + matchResult.end();
result.replace(matchStart, matchEnd, replacement);
int matchLength = matchResult.end() - matchResult.start();
int lengthChange = replacement.length() - matchLength;
offset += lengthChange;
}
return result.toString();
}
/**
* Do a content substitution by looking at the array size and looking for {0}...{n} strings and replace them with
* the array's content.<br>
* (Note - we use this method and not the NLS.bind() because it does not handle well code blocks existence)
*
* @param content
* @param substitutions
* @return A string, substituted with the array's content.
*/
private static String substitute(String content, String[] substitutions)
{
StringBuilder buffer = new StringBuilder(content);
Matcher matcher = SUBSTITUTION_PATTERN.matcher(content);
int offset = 0;
while (matcher.find())
{
MatchResult matchResult = matcher.toMatchResult();
int beginIndex = matchResult.start();
int endIndex = matchResult.end();
int index = Integer.parseInt(content.substring(beginIndex + 1, endIndex - 1));
if (index >= 0 && index < substitutions.length)
{
String replacement = substitutions[index];
int matchLength = endIndex - beginIndex;
buffer.replace(offset + beginIndex, offset + endIndex, replacement);
offset += (replacement.length() - matchLength);
}
}
return buffer.toString();
}
/**
* Parse the given filter string and return transformed filter expression.
*
* <p>Automatically calculate <code>LEFT JOIN</code> for association path expressions and the
* path expressions are replaced with the join variables.
*
* @param filter the filter expression
* @return the transformed filter expression
*/
private String parse(String filter) {
String result = "";
Matcher matcher = pathPattern.matcher(filter);
int last = 0;
while (matcher.find()) {
MatchResult matchResult = matcher.toMatchResult();
String alias = joinName(matchResult.group(1));
if (alias == null) {
alias = "self." + matchResult.group(1);
}
result += filter.substring(last, matchResult.start()) + alias;
last = matchResult.end();
}
if (last < filter.length()) result += filter.substring(last);
return result;
}
@Override
public void annotate(@NotNull final PsiElement element, @NotNull AnnotationHolder holder) {
if (element instanceof DustOpenTag) {
DustOpenTag openTag = (DustOpenTag) element;
checkMatchingCloseTag(openTag, holder);
}
if (element.getNode().getElementType() == DustTypes.COMMENT) {
String commentStr = element.getText();
if (commentStr.length() >= 8) {
commentStr = commentStr.substring(0, commentStr.length() - 2);
Pattern p = Pattern.compile("TODO[^\n]*");
Matcher m = p.matcher(commentStr);
int startOffset = element.getTextRange().getStartOffset();
while (m.find()) {
MatchResult mr = m.toMatchResult();
TextRange tr = new TextRange(startOffset + mr.start(), startOffset + mr.end());
holder.createInfoAnnotation(tr, null).setTextAttributes(DustSyntaxHighlighter.TODO);
}
}
}
}
private String replaceStrings(List<MatchResult> results, String document) {
StringBuffer sb = new StringBuffer(document);
for(MatchResult m : results) {
int startOffset =m.start();
int endOffset = m.end();
StringBuffer outputBuffer = new StringBuffer();
for (int i = 0; i < (endOffset - startOffset); i++) {
outputBuffer.append("X");
}
sb.replace(startOffset, endOffset, outputBuffer.toString());
}
return sb.toString();
}
int skipStart(String line, Matcher quote, Matcher blockCommentEnd) {
MatchResult m = null;
if (isInsideString) {
m = find(quote, 0);
} else if (isInsideBlockComment) {
m = find(blockCommentEnd, 0);
}
int p = 0;
if (m != null) {
// If we were inside block comment emit a block comment, remove the flag
if (isInsideBlockComment) {
if (m.start() > 0) {
eventHandler.onBlockComment(line.substring(0, m.start()), false, true);
}
isInsideBlockComment = false;
} else if (isInsideString) {
// If we were inside string emit a quoted string, remove the flag
if (m.start() > 0) {
eventHandler.onQuoted(line.substring(0, quoteStart(m)), false, true);
}
isInsideString = false;
}
p = m.end();
}
return p;
}
int quoteRestart(MatchResult q) {
// If we want to restart the search for quotes from current result we must start search from end - 1
// because the regular expression for searching quotes looks for sequence non-quote + quote
// This fix allows to match empty quoted strings
// Only when end > 1, re-starting from 1 is wrong
return q.end() > 1 ? q.end() - 1 : q.end();
}
/**
* Check token boundaries of expressions.
*
* @param r
* MatchResult
* @param s
* Respective sentence
* @return whether or not the MatchResult is a clean one
*/
public static Boolean checkInfrontBehind(MatchResult r, Sentence s) {
Boolean ok = true;
// get rid of expressions such as "1999" in 53453.1999
if (r.start() > 1) {
if ((s.getCoveredText().substring(r.start() - 2, r.start()).matches("\\d\\."))) {
ok = false;
}
}
// get rid of expressions if there is a character or symbol ($+)
// directly in front of the expression
if (r.start() > 0) {
if (((s.getCoveredText().substring(r.start() - 1, r.start()).matches("[\\w\\$\\+]")))
&& (!(s.getCoveredText().substring(r.start() - 1, r.start()).matches("\\(")))) {
ok = false;
}
}
if (r.end() < s.getCoveredText().length()) {
if ((s.getCoveredText().substring(r.end(), r.end() + 1).matches("[°\\w]"))
&& (!(s.getCoveredText().substring(r.end(), r.end() + 1).matches("\\)")))) {
ok = false;
}
if (r.end() + 1 < s.getCoveredText().length()) {
if (s.getCoveredText().substring(r.end(), r.end() + 2).matches("[\\.,]\\d")) {
ok = false;
}
}
}
return ok;
}
public String replaceVariables(String raw) throws Exception {
if (raw == null) {
return null;
}
Matcher matcher = PATTERN.matcher(raw);
StringBuilder replaced = new StringBuilder();
int cur = 0;
while (matcher.find()) {
MatchResult result = matcher.toMatchResult();
replaced.append(raw.substring(cur, result.start(1)));
String name = result.group(2);
Object value = this.supplier.valueOf(name);
if (value == null) {
value = "${" + name + "}";
}
replaced.append(value);
cur = result.end();
}
replaced.append(raw.substring(cur));
return replaced.toString();
}
public String replace(Function1<? super CharSequence, ? extends CharSequence> notMatched, Function1<? super MatchResult, ? extends CharSequence> matched) {
StringBuilder builder = new StringBuilder();
int position = 0;
for (MatchResult matchResult : this) {
CharSequence before = text.subSequence(position, matchResult.start());
if (before.length() > 0) builder.append(filterNull(call(notMatched, before)));
builder.append(filterNull(call(matched, (matchResult))));
position = matchResult.end();
}
CharSequence after = text.subSequence(position, text.length());
if (after.length() > 0) builder.append(filterNull(call(notMatched, after)));
return builder.toString();
}
private int updateStateAfterFound(MatchResult matchResult, int offset) {
int end = matchResult.end() + offset;
int found = matchResult.start() + offset;
target.select(found, end);
target.getCaret().setSelectionVisible(true);
// update state variables
lastFoundIndex = found;
lastMatchResult = matchResult;
lastRegex = ((Matcher) lastMatchResult).pattern().pattern();
return found;
}
/**
* Attempt to match a complete address in content, starting with
* houseNumberMatch.
*
* @param content The string to search.
* @param houseNumberMatch A matching house number to start extending.
* @return +ve: the end of the match
* +ve: the position to restart searching for house numbers, negated.
*/
private static int attemptMatch(String content, MatchResult houseNumberMatch) {
int restartPos = -1;
int nonZipMatch = -1;
int it = houseNumberMatch.end();
int numLines = 1;
boolean consecutiveHouseNumbers = true;
boolean foundLocationName = false;
int wordCount = 1;
String lastWord = "";
Matcher matcher = sWordRe.matcher(content);
for (; it < content.length(); lastWord = matcher.group(0), it = matcher.end()) {
if (!matcher.find(it)) {
// No more words in the input sequence.
return -content.length();
}
if (matcher.end() - matcher.start() > kMaxAddressNameWordLength) {
// Word is too long to be part of an address. Fail.
return -matcher.end();
}
// Count the number of newlines we just consumed.
while (it < matcher.start()) {
if (NL.indexOf(content.charAt(it++)) != -1) ++numLines;
}
// Consumed too many lines. Fail.
if (numLines > MAX_ADDRESS_LINES) break;
// Consumed too many words. Fail.
if (++wordCount > MAX_ADDRESS_WORDS) break;
if (matchHouseNumber(content, it) != null) {
if (consecutiveHouseNumbers && numLines > 1) {
// Last line ended with a number, and this this line starts with one.
// Restart at this number.
return -it;
}
// Remember the position of this match as the restart position.
if (restartPos == -1) restartPos = it;
continue;
}
consecutiveHouseNumbers = false;
if (isValidLocationName(matcher.group(0))) {
foundLocationName = true;
continue;
}
if (wordCount == MAX_LOCATION_NAME_DISTANCE && !foundLocationName) {
// Didn't find a location name in time. Fail.
it = matcher.end();
break;
}
if (foundLocationName && wordCount > MIN_ADDRESS_WORDS) {
// We can now attempt to match a state.
MatchResult stateMatch = matchState(content, it);
if (stateMatch != null) {
if (lastWord.equals("et") && stateMatch.group(0).equals("al")) {
// Reject "et al" as a false postitive.
it = stateMatch.end();
break;
}
// At this point we've matched a state; try to match a zip code after it.
Matcher zipMatcher = sWordRe.matcher(content);
if (zipMatcher.find(stateMatch.end())) {
if (isValidZipCode(zipMatcher.group(0), stateMatch)) {
return zipMatcher.end();
}
} else {
// The content ends with a state but no zip
// code. This is a legal match according to the
// documentation. N.B. This is equivalent to the
// original c++ implementation, which only allowed
// the zip code to be optional at the end of the
// string, which presumably is a bug. We tried
// relaxing this to work in other places but it
// caused too many false positives.
nonZipMatch = stateMatch.end();
}
}
}
}
if (nonZipMatch > 0) return nonZipMatch;
return -(restartPos > 0 ? restartPos : it);
}
void scan(String line) {
// We will allow comment characters inside a constant string
// We will also quoted strings inside a comment
// We will emit block comments that span multiple lines as a sequence of comment blocks, one for every line
// Same for multiple line quoted strings
Matcher quote = quoteRegex.matcher(line);
Matcher lineComment = lineCommentRegex.matcher(line);
Matcher blockCommentStart = blockCommentStartRegex.matcher(line);
Matcher blockCommentEnd = blockCommentEndRegex.matcher(line);
// Look for the line starting point, taking into account special situations where
// we are inside a string, line really begins at first occurrence of quote character (if it occurs in the line)
// we are inside a block comment, line really begins at first occurrence of block comment end (if it occurs in the line)
int p = skipStart(line, quote, blockCommentEnd);
// If after skipping the start we continue inside a block comment emit whole line as a block comment and finish
// Similar if we are still (totally) inside a string, emit whole line as a string and finish scan
if (isInsideBlockComment) {
eventHandler.onBlockComment(line, false, false);
return;
}
if (isInsideString) {
eventHandler.onQuoted(line, false, false);
return;
}
// Go across the line
while (p < line.length()) {
MatchResult q;
MatchResult bcs;
MatchResult lc;
// From current position find first of occurrence of {string start, block comment start, line comment start}
q = find(quote, p);
bcs = find(blockCommentStart, p);
lc = find(lineComment, p);
// Check what comes first
boolean isQuote = q != null && (bcs == null || q.start() < bcs.start()) && (lc == null || q.start() < lc.start());
boolean isBlockComment = bcs != null && (q == null || bcs.start() < q.start()) && (lc == null || bcs.start() < lc.start());
boolean isLineComment = lc != null && (q == null || lc.start() < q.start()) && (bcs == null || lc.start() < bcs.start());
if (isQuote) {
// Emit as text from last pointer to here
if (q.start() > 0) {
eventHandler.onText(line.substring(p, quoteStart(q)));
}
p = skipQuoted(line, quote, q);
} else if (isBlockComment) {
// Emit as text from last pointer to here
if (bcs.start() > 0) {
eventHandler.onText(line.substring(p, bcs.start()));
}
p = skipBlockComment(line, blockCommentEnd, bcs);
} else if (isLineComment) {
if (lc.start() > 0) {
eventHandler.onText(line.substring(p, lc.start()));
}
if (lc.end() < line.length()) {
eventHandler.onLineComment(line.substring(lc.end()));
}
break;
} else {
// Emit the rest of the line as text and finish
eventHandler.onText(line.substring(p));
break;
}
}
}
/**
* Check token boundaries using token information
*
* @param r
* MatchResult
* @param s
* respective Sentence
* @param jcas
* current CAS object
* @return whether or not the MatchResult is a clean one
*/
public static Boolean checkTokenBoundaries(MatchResult r, Sentence s, JCas jcas) {
Boolean beginOK = false;
Boolean endOK = false;
// whole expression is marked as a sentence
if ((r.end() - r.start()) == (s.getEnd() - s.getBegin())) {
return true;
}
// Only check Token boundaries if no white-spaces in front of and behind
// the match-result
if ((r.start() > 0) && ((s.getCoveredText().subSequence(r.start() - 1, r.start()).equals(" ")))
&& ((r.end() < s.getCoveredText().length())
&& ((s.getCoveredText().subSequence(r.end(), r.end() + 1).equals(" "))))) {
return true;
}
// other token boundaries than white-spaces
else {
FSIterator iterToken = jcas.getAnnotationIndex(Token.type).subiterator(s);
while (iterToken.hasNext()) {
Token t = (Token) iterToken.next();
// Check begin
if ((r.start() + s.getBegin()) == t.getBegin()) {
beginOK = true;
}
// Tokenizer does not split number from some symbols (".", "/",
// "-", "–"),
// e.g., "...12 August-24 Augsut..."
else if ((r.start() > 0) && ((s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("."))
|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("/"))
|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("–"))
|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("-")))) {
beginOK = true;
}
// Check end
if ((r.end() + s.getBegin()) == t.getEnd()) {
endOK = true;
}
// Tokenizer does not split number from some symbols (".", "/",
// "-", "–"),
// e.g., "... in 1990. New Sentence ..."
else if ((r.end() < s.getCoveredText().length())
&& ((s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("."))
|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("/"))
|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("–"))
|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("-")))) {
endOK = true;
}
if (beginOK && endOK)
return true;
}
}
return false;
}
/**
* <p>Create a new {@link #PercentConversion} instance.</p>
*
* <p>A <tt>PercentConversion</tt> instance is created from one
* particular specifier match result and is fixed after creation.</p>
*
* This is because for some format strings, it is expected that
* multiple <tt>PercentConversions</tt> will be needed to fully convert
* the format string and each <tt>PercentConversion</tt> should represent
* one specifier and one specifier only in the format string.
*
* @param aConverter - the enclosing {@link #PercentToBraceConverter} instance
* @param aMatch - a specific {@link java.util.regex#MatchResult MatchResult} that holds
* information about the matched specifier token.
* @throws IllegalArgumentException
* if <tt>aConverter</tt> or <tt>aMatch</tt> is <tt>null</tt>
*
* @throws IllegalStateException
* if <tt>aMatch</tt> is passed before a successful match could be made
* it is said to have inconsistent state.
*/
public PercentConversion(PercentToBraceConverter aConverter, MatchResult aMatch)
throws IllegalArgumentException, IllegalStateException {
if (null == aConverter) {
throw new IllegalArgumentException("Converter can't be null!");
}
if (null == aMatch) {
throw new IllegalArgumentException("Match can't be null!");
}
source = aMatch.group(0);
span = new int[] { aMatch.start(), aMatch.end() };
final Map<String, String> groups = extractTokenGroups(aMatch);
String spec = groups.get("Key");
if (null == spec) {
if ("%%".equals(source)) {
key = "";
} else {
key = aConverter.nextIndex();
}
} else {
key = spec;
}
spec = groups.get("Width");
if (null != spec && "*".equals(spec)) {
// TODO: {} representation is hard-wired, could generalize this if needed
width = String.format("{%s}", aConverter.nextIndex());
} else {
width = spec;
}
spec = groups.get("Precision");
if (null != spec && "*".equals(spec)) {
precision = String.format("{%s}", aConverter.nextIndex());
} else {
precision = spec;
}
flags = groups.get("Flags");
conversion = groups.get("Conversion");
}