use of java.io.StreamTokenizer in project lucene-solr by apache.
the class AnalyzerFactoryTask method setParams.
/**
* Sets the params.
* Analysis component factory names may optionally include the "Factory" suffix.
*
* @param params analysis pipeline specification: name, (optional) positionIncrementGap,
* (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
* and 0+ TokenFilterFactory's
*/
@Override
@SuppressWarnings("fallthrough")
public void setParams(String params) {
super.setParams(params);
ArgType expectedArgType = ArgType.ANALYZER_ARG;
final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
stok.commentChar('#');
stok.quoteChar('"');
stok.quoteChar('\'');
stok.eolIsSignificant(false);
stok.ordinaryChar('(');
stok.ordinaryChar(')');
stok.ordinaryChar(':');
stok.ordinaryChar(',');
try {
while (stok.nextToken() != StreamTokenizer.TT_EOF) {
switch(stok.ttype) {
case ',':
{
// Do nothing
break;
}
case StreamTokenizer.TT_WORD:
{
if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
final String argName = stok.sval;
if (!argName.equalsIgnoreCase("name") && !argName.equalsIgnoreCase("positionIncrementGap") && !argName.equalsIgnoreCase("offsetGap")) {
throw new RuntimeException("Line #" + lineno(stok) + ": Missing 'name' param to AnalyzerFactory: '" + params + "'");
}
stok.nextToken();
if (stok.ttype != ':') {
throw new RuntimeException("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
}
stok.nextToken();
String argValue = stok.sval;
switch(stok.ttype) {
case StreamTokenizer.TT_NUMBER:
{
argValue = Double.toString(stok.nval);
// Drop the ".0" from numbers, for integer arguments
argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
// Intentional fallthrough
}
case '"':
case '\'':
case StreamTokenizer.TT_WORD:
{
if (argName.equalsIgnoreCase("name")) {
factoryName = argValue;
expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
} else {
int intArgValue = 0;
try {
intArgValue = Integer.parseInt(argValue);
} catch (NumberFormatException e) {
throw new RuntimeException("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
}
if (argName.equalsIgnoreCase("positionIncrementGap")) {
positionIncrementGap = intArgValue;
} else if (argName.equalsIgnoreCase("offsetGap")) {
offsetGap = intArgValue;
}
}
break;
}
case StreamTokenizer.TT_EOF:
{
throw new RuntimeException("Unexpected EOF: " + stok.toString());
}
default:
{
throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
}
}
} else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
final String argName = stok.sval;
if (argName.equalsIgnoreCase("positionIncrementGap") || argName.equalsIgnoreCase("offsetGap")) {
stok.nextToken();
if (stok.ttype != ':') {
throw new RuntimeException("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
}
stok.nextToken();
int intArgValue = (int) stok.nval;
switch(stok.ttype) {
case '"':
case '\'':
case StreamTokenizer.TT_WORD:
{
intArgValue = 0;
try {
intArgValue = Integer.parseInt(stok.sval.trim());
} catch (NumberFormatException e) {
throw new RuntimeException("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + stok.sval + "'", e);
}
// Intentional fall-through
}
case StreamTokenizer.TT_NUMBER:
{
if (argName.equalsIgnoreCase("positionIncrementGap")) {
positionIncrementGap = intArgValue;
} else if (argName.equalsIgnoreCase("offsetGap")) {
offsetGap = intArgValue;
}
break;
}
case StreamTokenizer.TT_EOF:
{
throw new RuntimeException("Unexpected EOF: " + stok.toString());
}
default:
{
throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
}
}
break;
}
try {
final Class<? extends CharFilterFactory> clazz;
clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
createAnalysisPipelineComponent(stok, clazz);
} catch (IllegalArgumentException e) {
try {
final Class<? extends TokenizerFactory> clazz;
clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
createAnalysisPipelineComponent(stok, clazz);
expectedArgType = ArgType.TOKENFILTER;
} catch (IllegalArgumentException e2) {
throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '" + argName + "' as CharFilterFactory or TokenizerFactory");
}
}
} else {
// expectedArgType = ArgType.TOKENFILTER
final String className = stok.sval;
final Class<? extends TokenFilterFactory> clazz;
try {
clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
} catch (IllegalArgumentException e) {
throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '" + className + "' as TokenFilterFactory");
}
createAnalysisPipelineComponent(stok, clazz);
}
break;
}
default:
{
throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
}
}
}
} catch (RuntimeException e) {
if (e.getMessage().startsWith("Line #")) {
throw e;
} else {
throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
}
} catch (Throwable t) {
throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
}
final AnalyzerFactory analyzerFactory = new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories);
analyzerFactory.setPositionIncrementGap(positionIncrementGap);
analyzerFactory.setOffsetGap(offsetGap);
getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
}
use of java.io.StreamTokenizer in project lucene-solr by apache.
the class NewAnalyzerTask method setParams.
/**
* Set the params (analyzerName only), Comma-separate list of Analyzer class names. If the Analyzer lives in
* org.apache.lucene.analysis, the name can be shortened by dropping the o.a.l.a part of the Fully Qualified Class Name.
* <p>
* Analyzer names may also refer to previously defined AnalyzerFactory's.
* <p>
* Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
* <p>
* Example AnalyzerFactory usage:
* <pre>
* -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
* -NewAnalyzer('whitespace tokenized')
* </pre>
* @param params analyzerClassName, or empty for the StandardAnalyzer
*/
@Override
public void setParams(String params) {
super.setParams(params);
final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
stok.quoteChar('"');
stok.quoteChar('\'');
stok.eolIsSignificant(false);
stok.ordinaryChar(',');
try {
while (stok.nextToken() != StreamTokenizer.TT_EOF) {
switch(stok.ttype) {
case ',':
{
// Do nothing
break;
}
case '\'':
case '\"':
case StreamTokenizer.TT_WORD:
{
analyzerNames.add(stok.sval);
break;
}
default:
{
throw new RuntimeException("Unexpected token: " + stok.toString());
}
}
}
} catch (RuntimeException e) {
if (e.getMessage().startsWith("Line #")) {
throw e;
} else {
throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", e);
}
} catch (Throwable t) {
throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", t);
}
}
use of java.io.StreamTokenizer in project ceylon-compiler by ceylon.
the class CommandLine method loadCmdFile.
private static void loadCmdFile(String name, ListBuffer<String> args) throws IOException {
Reader r = new BufferedReader(new FileReader(name));
StreamTokenizer st = new StreamTokenizer(r);
st.resetSyntax();
st.wordChars(' ', 255);
st.whitespaceChars(0, ' ');
st.commentChar('#');
st.quoteChar('"');
st.quoteChar('\'');
while (st.nextToken() != StreamTokenizer.TT_EOF) {
args.append(st.sval);
}
r.close();
}
use of java.io.StreamTokenizer in project cdap by caskdata.
the class FilterParser method parse.
public static Filter parse(String expression) {
StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(expression));
// Treat : as part of word
tokenizer.wordChars((int) ':', (int) ':');
// Treat End of Line as whitespace
tokenizer.eolIsSignificant(false);
// Reset special handling of numerals
tokenizer.ordinaryChars((int) '0', (int) '9');
tokenizer.ordinaryChar((int) '.');
tokenizer.ordinaryChar((int) '-');
tokenizer.wordChars((int) '0', (int) '9');
tokenizer.wordChars((int) '.', (int) '.');
// Check if empty expression
try {
tokenizer.nextToken();
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
return Filter.EMPTY_FILTER;
} else if (tokenizer.ttype == (int) '\'' || tokenizer.ttype == (int) '"') {
// Empty quoted string - '' or ""
if (tokenizer.sval.isEmpty()) {
return Filter.EMPTY_FILTER;
}
}
} catch (IOException e) {
throw Throwables.propagate(e);
}
// Not an empty expression
tokenizer.pushBack();
return parseExpression(tokenizer);
}
use of java.io.StreamTokenizer in project android_frameworks_base by crdroidandroid.
the class TypedProperties method initTokenizer.
/**
* Instantiates a {@link java.io.StreamTokenizer} and sets its syntax tables
* appropriately for the {@code TypedProperties} file format.
*
* @param r The {@code Reader} that the {@code StreamTokenizer} will read from
* @return a newly-created and initialized {@code StreamTokenizer}
*/
static StreamTokenizer initTokenizer(Reader r) {
StreamTokenizer st = new StreamTokenizer(r);
// Treat everything we don't specify as "ordinary".
st.resetSyntax();
/* The only non-quoted-string words we'll be reading are:
* - property names: [._$a-zA-Z0-9]
* - type names: [a-zS]
* - number literals: [-0-9.eExXA-Za-z] ('x' for 0xNNN hex literals. "NaN", "Infinity")
* - "true" or "false" (case insensitive): [a-zA-Z]
*/
st.wordChars('0', '9');
st.wordChars('A', 'Z');
st.wordChars('a', 'z');
st.wordChars('_', '_');
st.wordChars('$', '$');
st.wordChars('.', '.');
st.wordChars('-', '-');
st.wordChars('+', '+');
// Single-character tokens
st.ordinaryChar('=');
// Other special characters
st.whitespaceChars(' ', ' ');
st.whitespaceChars('\t', '\t');
st.whitespaceChars('\n', '\n');
st.whitespaceChars('\r', '\r');
st.quoteChar('"');
// Java-style comments
st.slashStarComments(true);
st.slashSlashComments(true);
return st;
}
Aggregations