下面列出了java.io.StreamTokenizer#lowerCaseMode ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public void testLowerCase() throws Exception {
Locale.setDefault(Locale.US);
StreamTokenizer st = new StreamTokenizer(new StringReader("aIb aIb"));
st.lowerCaseMode(true);
st.nextToken();
assertEquals("aib", st.sval);
Locale oldDefault = Locale.getDefault();
try {
Locale.setDefault(new Locale("tr", "TR"));
st.nextToken();
assertEquals("a\u0131b", st.sval);
} finally {
Locale.setDefault(oldDefault);
}
}
/**
* Sets up the stream tokenizer
*/
private void setup() {
st = new StreamTokenizer(this);
st.resetSyntax();
st.eolIsSignificant(false);
st.lowerCaseMode(true);
// Parse numbers as words
st.wordChars('0', '9');
st.wordChars('-', '.');
// Characters as words
st.wordChars('\u0000', '\u00FF');
// Skip comments
st.commentChar('%');
// Skip whitespace and newlines
st.whitespaceChars(' ', ' ');
st.whitespaceChars('\u0009', '\u000e');
}
/**
* Creates a new lexical analyzer for the specified SQL statement.
*
* @param s
* the SQL statement
*/
public Lexer(String s) {
initKeywords();
tok = new StreamTokenizer(new StringReader(s));
tok.wordChars('_', '_');
tok.ordinaryChar('.');
/*
* Tokens in TT_WORD type like ids and keywords are converted into lower
* case.
*/
tok.lowerCaseMode(true);
nextToken();
}
public Lexer(Reader r, String systemId) {
this.systemId = systemId;
st = new StreamTokenizer(r);
st.eolIsSignificant(false);
st.lowerCaseMode(false);
st.slashSlashComments(true);
st.slashStarComments(true);
st.wordChars('a', 'z');
st.wordChars('A', 'Z');
st.wordChars(':', ':');
st.wordChars('_', '_');
st.quoteChar(SINGLE_QUOTE);
st.quoteChar(DOUBLE_QUOTE);
st.ordinaryChar(BEGIN_NODE_TYPE_NAME);
st.ordinaryChar(END_NODE_TYPE_NAME);
st.ordinaryChar(EXTENDS);
st.ordinaryChar(LIST_DELIMITER);
st.ordinaryChar(PROPERTY_DEFINITION);
st.ordinaryChar(CHILD_NODE_DEFINITION);
st.ordinaryChar(BEGIN_TYPE);
st.ordinaryChar(END_TYPE);
st.ordinaryChar(DEFAULT);
st.ordinaryChar(CONSTRAINT);
}
/** Parses the argument as if it were the content of a <code>robots.txt</code> file,
* and returns a sorted array of prefixes of URLs that the agent should not follow.
*
* @param content the content of the <code>robots.txt</code> file.
* @param userAgent the string representing the user agent of interest.
* @return an array of character arrays, which are prefixes of the URLs not to follow, in sorted order.
*/
public static char[][] parseRobotsReader(final Reader content, final String userAgent) throws IOException {
/* The set of disallowed paths specifically aimed at userAgent. */
Set<String> set = new ObjectOpenHashSet<>();
/* The set of disallowed paths specifically aimed at *. */
Set<String> setStar = new ObjectOpenHashSet<>();
/* True if the currently examined record is targetted to us. */
boolean doesMatter = false;
/* True if we have seen a section targetted to our agent. */
boolean specific = false;
/* True if we have seen a section targetted to *. */
boolean generic = false;
/* True if we are in a star section. */
boolean starSection = false;
StreamTokenizer st = new StreamTokenizer(new FastBufferedReader(content));
int token;
st.resetSyntax();
st.eolIsSignificant(true); // We need EOLs to separate records
st.wordChars(33, 255); // All characters may appear
st.whitespaceChars(0, 32);
st.ordinaryChar('#'); // We must manually simulate comments 8^(
st.lowerCaseMode(false);
while (true) {
int lineFirstToken = st.nextToken();
if (lineFirstToken == StreamTokenizer.TT_EOF) break;
switch (lineFirstToken) {
// Blank line: a new block is starting
case StreamTokenizer.TT_EOL:
doesMatter = false;
break;
// Comment or number: ignore until the end of line
case StreamTokenizer.TT_NUMBER:
case '#':
do {
token = st.nextToken();
} while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF);
break;
// A string
case StreamTokenizer.TT_WORD:
if (st.sval.equalsIgnoreCase("user-agent:")) {
token = st.nextToken();
if (token == StreamTokenizer.TT_WORD)
if (StringUtils.startsWithIgnoreCase(userAgent, st.sval)) {
doesMatter = true;
specific = true;
starSection = false;
}
else if (st.sval.equals("*")) {
starSection = true;
generic = true;
} else starSection = false;
// Ignore the rest of the line
while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF)
token = st.nextToken();
} else if (st.sval.equalsIgnoreCase("disallow:")) {
token = st.nextToken();
//System.out.println(st.sval + " " + starSection + " " + set + " " + setStar);
if (token == StreamTokenizer.TT_EOL) {
if (doesMatter) set.clear();
else if (starSection) setStar.clear();
} else if (token == StreamTokenizer.TT_WORD) {
String disallowed = st.sval;
if (disallowed.endsWith("*")) disallowed = disallowed.substring(0, disallowed.length()-1); // Someone (erroneously) uses * to denote any suffix
if (doesMatter) set.add(disallowed);
else if (starSection) setStar.add(disallowed);
}
// Ignore the rest of the line
while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF)
token = st.nextToken();
} else if (LOGGER.isTraceEnabled()) LOGGER.trace("Line first token {} ununderstandable in robots.txt", st.sval);
break;
// Something else: a syntax error
default:
if (LOGGER.isTraceEnabled()) LOGGER.trace("Found unknown token type {} in robots.txt", Integer.valueOf(lineFirstToken));
}
}
if (specific) return toSortedPrefixFreeCharArrays(set); // Some instructions specific to us
if (! specific && generic) return toSortedPrefixFreeCharArrays(setStar); // No specific instruction, but some generic ones
return toSortedPrefixFreeCharArrays(set);
}
/**
* Parse a filter definition string. The format is as follows:<br>
* "Exact words" "including:colons" id:100 name:fighter*
* @param filterStr the filter string
* @return the list of fields and patterns to check
*/
public static List<Pair<String, Pattern>> parseFilter(String filterStr) {
List<Pair<String, Pattern>> result = new ArrayList<>();
StreamTokenizer st = new StreamTokenizer(new StringReader(filterStr));
st.slashSlashComments(false);
st.slashStarComments(false);
st.lowerCaseMode(true);
st.wordChars('*', '*');
st.wordChars('?', '?');
st.wordChars('.', '.');
st.wordChars('@', '@');
st.wordChars('-', '-');
st.wordChars('_', '_');
st.quoteChar('"');
List<String> tokens = new ArrayList<>();
try {
while (true) {
int tok = st.nextToken();
if (tok == StreamTokenizer.TT_EOF) {
break;
} else
if (tok == StreamTokenizer.TT_WORD || tok == '"') {
tokens.add(st.sval);
} else {
tokens.add(String.valueOf((char)tok));
}
}
} catch (IOException ex) {
// ignored
}
for (int i = 0; i < tokens.size(); i++) {
String key = tokens.get(i);
if (i < tokens.size() - 1 && tokens.get(i + 1).equals(":")) {
if (i < tokens.size() - 2) {
result.add(Pair.of(key, wildcardToRegex(tokens.get(i + 2))));
i += 2;
} else {
result.add(Pair.of(key, wildcardToRegex("")));
}
} else {
result.add(Pair.of("", wildcardToRegex(key)));
}
}
return result;
}
public static void main(String args[]) throws IOException {
Hashtable<String, Double> variables = new Hashtable<String, Double>();
@SuppressWarnings("deprecation")
StreamTokenizer st = new StreamTokenizer(System.in);
st.eolIsSignificant(true);
st.lowerCaseMode(true);
st.ordinaryChar('/');
st.ordinaryChar('-');
while (true) {
Expression res;
int c = StreamTokenizer.TT_EOL;
String varName = null;
System.out.println("Enter an expression...");
try {
while (true) {
c = st.nextToken();
if (c == StreamTokenizer.TT_EOF) {
System.exit(1);
} else if (c == StreamTokenizer.TT_EOL) {
continue;
} else if (c == StreamTokenizer.TT_WORD) {
if (st.sval.compareTo("dump") == 0) {
dumpVariables(variables);
continue;
} else if (st.sval.compareTo("clear") == 0) {
variables = new Hashtable<String, Double>();
continue;
} else if (st.sval.compareTo("quit") == 0) {
System.exit(0);
} else if (st.sval.compareTo("exit") == 0) {
System.exit(0);
} else if (st.sval.compareTo("help") == 0) {
help();
continue;
}
varName = st.sval;
c = st.nextToken();
}
break;
}
if (c != '=') {
throw new SyntaxError("missing initial '=' sign.");
}
res = ParseExpression.expression(st);
} catch (SyntaxError se) {
res = null;
varName = null;
System.out.println("\nSyntax Error detected! - "+se.getMsg());
while (c != StreamTokenizer.TT_EOL)
c = st.nextToken();
continue;
}
c = st.nextToken();
if (c != StreamTokenizer.TT_EOL) {
if (c == ')')
System.out.println("\nSyntax Error detected! - To many closing parens.");
else
System.out.println("\nBogus token on input - "+c);
while (c != StreamTokenizer.TT_EOL)
c = st.nextToken();
} else {
try {
Double z;
System.out.println("Parsed expression : "+res.unparse());
z = new Double(res.value(variables));
System.out.println("Value is : "+z);
if (varName != null) {
variables.put(varName, z);
System.out.println("Assigned to : "+varName);
}
} catch (ExecError ee) {
System.out.println("Execution error, "+ee.getMsg()+"!");
}
}
}
}
public void testStreamTokenizer() throws Exception {
String str = "Testing 12345 \n alpha \r\n omega";
String strb = "-3.8 'BLIND mice' \r sEe /* how */ they run";
StringReader aa = new StringReader(str);
StringReader ba = new StringReader(strb);
StreamTokenizer a = new StreamTokenizer(aa);
StreamTokenizer b = new StreamTokenizer(ba);
assertEquals(1, a.lineno());
assertEquals(StreamTokenizer.TT_WORD, a.nextToken());
assertEquals("Token[Testing], line 1", a.toString());
assertEquals(StreamTokenizer.TT_NUMBER, a.nextToken());
assertEquals("Token[n=12345.0], line 1", a.toString());
assertEquals(StreamTokenizer.TT_WORD, a.nextToken());
assertEquals("Token[alpha], line 2", a.toString());
assertEquals(StreamTokenizer.TT_WORD, a.nextToken());
assertEquals("Token[omega], line 3", a.toString());
assertEquals(StreamTokenizer.TT_EOF, a.nextToken());
assertEquals("Token[EOF], line 3", a.toString());
b.commentChar('u');
b.eolIsSignificant(true);
b.lowerCaseMode(true);
b.ordinaryChar('y');
b.slashStarComments(true);
assertEquals(StreamTokenizer.TT_NUMBER, b.nextToken());
assertEquals(-3.8, b.nval);
assertEquals("Token[n=-3.8], line 1", b.toString());
assertEquals(39, b.nextToken()); // '
assertEquals("Token[BLIND mice], line 1", b.toString());
assertEquals(10, b.nextToken()); // \n
assertEquals("Token[EOL], line 2", b.toString());
assertEquals(StreamTokenizer.TT_WORD, b.nextToken());
assertEquals("Token[see], line 2", b.toString());
assertEquals(StreamTokenizer.TT_WORD, b.nextToken());
assertEquals("Token[the], line 2", b.toString());
assertEquals(121, b.nextToken()); // y
assertEquals("Token['y'], line 2", b.toString());
assertEquals(StreamTokenizer.TT_WORD, b.nextToken());
assertEquals("Token[r], line 2", b.toString());
assertEquals(StreamTokenizer.TT_EOF, b.nextToken());
assertEquals("Token[EOF], line 2", b.toString());
// A harmony regression test
byte[] data = new byte[]{(byte) '-'};
StreamTokenizer tokenizer = new StreamTokenizer(new ByteArrayInputStream(data));
tokenizer.nextToken();
String result = tokenizer.toString();
assertEquals("Token['-'], line 1", result);
// another harmony regression test
byte[] data2 = new byte[]{(byte) '"',
(byte) 'H',
(byte) 'e',
(byte) 'l',
(byte) 'l',
(byte) 'o',
(byte) '"'};
StreamTokenizer tokenizer2 = new StreamTokenizer(new ByteArrayInputStream(data2));
tokenizer2.nextToken();
result = tokenizer2.toString();
assertEquals("Token[Hello], line 1", result);
}
public void test_basicStringTokenizerMethods() throws IOException {
String str = "Testing 12345 \n alpha \r\n omega";
String strb = "-3.8 'BLIND mice' \r sEe /* how */ they run";
StringReader aa = new StringReader(str);
StringReader ba = new StringReader(strb);
StreamTokenizer a = new StreamTokenizer(aa);
StreamTokenizer b = new StreamTokenizer(ba);
Assert.assertTrue(a.lineno() == 1);
Assert.assertTrue(a.nextToken() == StreamTokenizer.TT_WORD);
Assert.assertTrue(a.toString().equals("Token[Testing], line 1"));
Assert.assertTrue(a.nextToken() == StreamTokenizer.TT_NUMBER);
Assert.assertTrue(a.toString().equals("Token[n=12345.0], line 1"));
Assert.assertTrue(a.nextToken() == StreamTokenizer.TT_WORD);
Assert.assertTrue(a.toString().equals("Token[alpha], line 2"));
Assert.assertTrue(a.nextToken() == StreamTokenizer.TT_WORD);
Assert.assertTrue(a.toString().equals("Token[omega], line 3"));
Assert.assertTrue(a.nextToken() == StreamTokenizer.TT_EOF);
Assert.assertTrue(a.toString().equals("Token[EOF], line 3"));
b.commentChar('u');
b.eolIsSignificant(true);
b.lowerCaseMode(true);
b.ordinaryChar('y');
b.slashStarComments(true);
Assert.assertTrue(b.nextToken() == StreamTokenizer.TT_NUMBER);
Assert.assertTrue(b.nval == -3.8);
Assert.assertTrue(b.toString().equals("Token[n=-3.8], line 1"));
Assert.assertTrue(b.nextToken() == 39); // '
Assert.assertTrue(b.toString().equals("Token[BLIND mice], line 1"));
Assert.assertTrue(b.nextToken() == 10); // \n
Assert.assertTrue(b.toString().equals("Token[EOL], line 2"));
Assert.assertTrue(b.nextToken() == StreamTokenizer.TT_WORD);
Assert.assertTrue(b.toString().equals("Token[see], line 2"));
Assert.assertTrue(b.nextToken() == StreamTokenizer.TT_WORD);
Assert.assertTrue(b.toString().equals("Token[the], line 2"));
Assert.assertTrue(b.nextToken() == 121); // y
Assert.assertTrue(b.toString().equals("Token['y'], line 2"));
Assert.assertTrue(b.nextToken() == StreamTokenizer.TT_WORD);
Assert.assertTrue(b.toString().equals("Token[r], line 2"));
Assert.assertTrue(b.nextToken() == StreamTokenizer.TT_EOF);
Assert.assertTrue(b.toString().equals("Token[EOF], line 2"));
}