Search in sources :

Example 21 with StreamTokenizer

use of java.io.StreamTokenizer in project lucene-solr by apache.

the class AnalyzerFactoryTask method setParams.

/**
   * Sets the params.
   * Analysis component factory names may optionally include the "Factory" suffix.
   *
   * @param params analysis pipeline specification: name, (optional) positionIncrementGap,
   *               (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
   *               and 0+ TokenFilterFactory's
   */
@Override
@SuppressWarnings("fallthrough")
public void setParams(String params) {
    super.setParams(params);
    ArgType expectedArgType = ArgType.ANALYZER_ARG;
    final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
    stok.commentChar('#');
    stok.quoteChar('"');
    stok.quoteChar('\'');
    stok.eolIsSignificant(false);
    stok.ordinaryChar('(');
    stok.ordinaryChar(')');
    stok.ordinaryChar(':');
    stok.ordinaryChar(',');
    try {
        while (stok.nextToken() != StreamTokenizer.TT_EOF) {
            switch(stok.ttype) {
                case ',':
                    {
                        // Do nothing
                        break;
                    }
                case StreamTokenizer.TT_WORD:
                    {
                        if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
                            final String argName = stok.sval;
                            if (!argName.equalsIgnoreCase("name") && !argName.equalsIgnoreCase("positionIncrementGap") && !argName.equalsIgnoreCase("offsetGap")) {
                                throw new RuntimeException("Line #" + lineno(stok) + ": Missing 'name' param to AnalyzerFactory: '" + params + "'");
                            }
                            stok.nextToken();
                            if (stok.ttype != ':') {
                                throw new RuntimeException("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                            }
                            stok.nextToken();
                            String argValue = stok.sval;
                            switch(stok.ttype) {
                                case StreamTokenizer.TT_NUMBER:
                                    {
                                        argValue = Double.toString(stok.nval);
                                        // Drop the ".0" from numbers, for integer arguments
                                        argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
                                    // Intentional fallthrough
                                    }
                                case '"':
                                case '\'':
                                case StreamTokenizer.TT_WORD:
                                    {
                                        if (argName.equalsIgnoreCase("name")) {
                                            factoryName = argValue;
                                            expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                                        } else {
                                            int intArgValue = 0;
                                            try {
                                                intArgValue = Integer.parseInt(argValue);
                                            } catch (NumberFormatException e) {
                                                throw new RuntimeException("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                                            }
                                            if (argName.equalsIgnoreCase("positionIncrementGap")) {
                                                positionIncrementGap = intArgValue;
                                            } else if (argName.equalsIgnoreCase("offsetGap")) {
                                                offsetGap = intArgValue;
                                            }
                                        }
                                        break;
                                    }
                                case StreamTokenizer.TT_EOF:
                                    {
                                        throw new RuntimeException("Unexpected EOF: " + stok.toString());
                                    }
                                default:
                                    {
                                        throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                                    }
                            }
                        } else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
                            final String argName = stok.sval;
                            if (argName.equalsIgnoreCase("positionIncrementGap") || argName.equalsIgnoreCase("offsetGap")) {
                                stok.nextToken();
                                if (stok.ttype != ':') {
                                    throw new RuntimeException("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                                }
                                stok.nextToken();
                                int intArgValue = (int) stok.nval;
                                switch(stok.ttype) {
                                    case '"':
                                    case '\'':
                                    case StreamTokenizer.TT_WORD:
                                        {
                                            intArgValue = 0;
                                            try {
                                                intArgValue = Integer.parseInt(stok.sval.trim());
                                            } catch (NumberFormatException e) {
                                                throw new RuntimeException("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + stok.sval + "'", e);
                                            }
                                        // Intentional fall-through
                                        }
                                    case StreamTokenizer.TT_NUMBER:
                                        {
                                            if (argName.equalsIgnoreCase("positionIncrementGap")) {
                                                positionIncrementGap = intArgValue;
                                            } else if (argName.equalsIgnoreCase("offsetGap")) {
                                                offsetGap = intArgValue;
                                            }
                                            break;
                                        }
                                    case StreamTokenizer.TT_EOF:
                                        {
                                            throw new RuntimeException("Unexpected EOF: " + stok.toString());
                                        }
                                    default:
                                        {
                                            throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                                        }
                                }
                                break;
                            }
                            try {
                                final Class<? extends CharFilterFactory> clazz;
                                clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
                                createAnalysisPipelineComponent(stok, clazz);
                            } catch (IllegalArgumentException e) {
                                try {
                                    final Class<? extends TokenizerFactory> clazz;
                                    clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
                                    createAnalysisPipelineComponent(stok, clazz);
                                    expectedArgType = ArgType.TOKENFILTER;
                                } catch (IllegalArgumentException e2) {
                                    throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '" + argName + "' as CharFilterFactory or TokenizerFactory");
                                }
                            }
                        } else {
                            // expectedArgType = ArgType.TOKENFILTER
                            final String className = stok.sval;
                            final Class<? extends TokenFilterFactory> clazz;
                            try {
                                clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
                            } catch (IllegalArgumentException e) {
                                throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '" + className + "' as TokenFilterFactory");
                            }
                            createAnalysisPipelineComponent(stok, clazz);
                        }
                        break;
                    }
                default:
                    {
                        throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                    }
            }
        }
    } catch (RuntimeException e) {
        if (e.getMessage().startsWith("Line #")) {
            throw e;
        } else {
            throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
        }
    } catch (Throwable t) {
        throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
    }
    final AnalyzerFactory analyzerFactory = new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories);
    analyzerFactory.setPositionIncrementGap(positionIncrementGap);
    analyzerFactory.setOffsetGap(offsetGap);
    getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
}
Also used : TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) StringReader(java.io.StringReader) StreamTokenizer(java.io.StreamTokenizer) AnalyzerFactory(org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory)

Example 22 with StreamTokenizer

use of java.io.StreamTokenizer in project lucene-solr by apache.

the class NewAnalyzerTask method setParams.

/**
   * Set the params (analyzerName only),  Comma-separate list of Analyzer class names.  If the Analyzer lives in
   * org.apache.lucene.analysis, the name can be shortened by dropping the o.a.l.a part of the Fully Qualified Class Name.
   * <p>
   * Analyzer names may also refer to previously defined AnalyzerFactory's.
   * <p>
   * Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) &gt;
   * <p>
   * Example AnalyzerFactory usage:
   * <pre>
   * -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
   * -NewAnalyzer('whitespace tokenized')
   * </pre>
   * @param params analyzerClassName, or empty for the StandardAnalyzer
   */
@Override
public void setParams(String params) {
    super.setParams(params);
    final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
    stok.quoteChar('"');
    stok.quoteChar('\'');
    stok.eolIsSignificant(false);
    stok.ordinaryChar(',');
    try {
        while (stok.nextToken() != StreamTokenizer.TT_EOF) {
            switch(stok.ttype) {
                case ',':
                    {
                        // Do nothing
                        break;
                    }
                case '\'':
                case '\"':
                case StreamTokenizer.TT_WORD:
                    {
                        analyzerNames.add(stok.sval);
                        break;
                    }
                default:
                    {
                        throw new RuntimeException("Unexpected token: " + stok.toString());
                    }
            }
        }
    } catch (RuntimeException e) {
        if (e.getMessage().startsWith("Line #")) {
            throw e;
        } else {
            throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", e);
        }
    } catch (Throwable t) {
        throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", t);
    }
}
Also used : StringReader(java.io.StringReader) StreamTokenizer(java.io.StreamTokenizer)

Example 23 with StreamTokenizer

use of java.io.StreamTokenizer in project ceylon-compiler by ceylon.

the class CommandLine method loadCmdFile.

private static void loadCmdFile(String name, ListBuffer<String> args) throws IOException {
    Reader r = new BufferedReader(new FileReader(name));
    StreamTokenizer st = new StreamTokenizer(r);
    st.resetSyntax();
    st.wordChars(' ', 255);
    st.whitespaceChars(0, ' ');
    st.commentChar('#');
    st.quoteChar('"');
    st.quoteChar('\'');
    while (st.nextToken() != StreamTokenizer.TT_EOF) {
        args.append(st.sval);
    }
    r.close();
}
Also used : BufferedReader(java.io.BufferedReader) Reader(java.io.Reader) FileReader(java.io.FileReader) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) StreamTokenizer(java.io.StreamTokenizer)

Example 24 with StreamTokenizer

use of java.io.StreamTokenizer in project cdap by caskdata.

the class FilterParser method parse.

public static Filter parse(String expression) {
    StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(expression));
    // Treat : as part of word
    tokenizer.wordChars((int) ':', (int) ':');
    // Treat End of Line as whitespace
    tokenizer.eolIsSignificant(false);
    // Reset special handling of numerals
    tokenizer.ordinaryChars((int) '0', (int) '9');
    tokenizer.ordinaryChar((int) '.');
    tokenizer.ordinaryChar((int) '-');
    tokenizer.wordChars((int) '0', (int) '9');
    tokenizer.wordChars((int) '.', (int) '.');
    // Check if empty expression
    try {
        tokenizer.nextToken();
        if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
            return Filter.EMPTY_FILTER;
        } else if (tokenizer.ttype == (int) '\'' || tokenizer.ttype == (int) '"') {
            // Empty quoted string - '' or ""
            if (tokenizer.sval.isEmpty()) {
                return Filter.EMPTY_FILTER;
            }
        }
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
    // Not an empty expression
    tokenizer.pushBack();
    return parseExpression(tokenizer);
}
Also used : StringReader(java.io.StringReader) IOException(java.io.IOException) StreamTokenizer(java.io.StreamTokenizer)

Example 25 with StreamTokenizer

use of java.io.StreamTokenizer in project android_frameworks_base by crdroidandroid.

the class TypedProperties method initTokenizer.

/**
     * Instantiates a {@link java.io.StreamTokenizer} and sets its syntax tables
     * appropriately for the {@code TypedProperties} file format.
     *
     * @param r The {@code Reader} that the {@code StreamTokenizer} will read from
     * @return a newly-created and initialized {@code StreamTokenizer}
     */
static StreamTokenizer initTokenizer(Reader r) {
    StreamTokenizer st = new StreamTokenizer(r);
    // Treat everything we don't specify as "ordinary".
    st.resetSyntax();
    /* The only non-quoted-string words we'll be reading are:
         * - property names: [._$a-zA-Z0-9]
         * - type names: [a-zS]
         * - number literals: [-0-9.eExXA-Za-z]  ('x' for 0xNNN hex literals. "NaN", "Infinity")
         * - "true" or "false" (case insensitive): [a-zA-Z]
         */
    st.wordChars('0', '9');
    st.wordChars('A', 'Z');
    st.wordChars('a', 'z');
    st.wordChars('_', '_');
    st.wordChars('$', '$');
    st.wordChars('.', '.');
    st.wordChars('-', '-');
    st.wordChars('+', '+');
    // Single-character tokens
    st.ordinaryChar('=');
    // Other special characters
    st.whitespaceChars(' ', ' ');
    st.whitespaceChars('\t', '\t');
    st.whitespaceChars('\n', '\n');
    st.whitespaceChars('\r', '\r');
    st.quoteChar('"');
    // Java-style comments
    st.slashStarComments(true);
    st.slashSlashComments(true);
    return st;
}
Also used : StreamTokenizer(java.io.StreamTokenizer)

Aggregations

StreamTokenizer (java.io.StreamTokenizer)58 IOException (java.io.IOException)22 StringReader (java.io.StringReader)16 BufferedReader (java.io.BufferedReader)9 FileReader (java.io.FileReader)8 Reader (java.io.Reader)7 ArrayList (java.util.ArrayList)7 Pattern (java.util.regex.Pattern)7 ByteArrayInputStream (java.io.ByteArrayInputStream)6 InputStreamReader (java.io.InputStreamReader)4 Support_StringReader (tests.support.Support_StringReader)4 HashMap (java.util.HashMap)3 FileOutputStream (java.io.FileOutputStream)2 ParseException (java.text.ParseException)2 Locale (java.util.Locale)2 Attribute (smile.data.Attribute)2 DateAttribute (smile.data.DateAttribute)2 NominalAttribute (smile.data.NominalAttribute)2 NumericAttribute (smile.data.NumericAttribute)2 StringAttribute (smile.data.StringAttribute)2