Search in sources :

Example 1 with AnalyzerFactory

use of org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory in project lucene-solr by apache.

the class AnalyzerFactoryTask method setParams.

/**
   * Sets the params.
   * Analysis component factory names may optionally include the "Factory" suffix.
   *
   * @param params analysis pipeline specification: name, (optional) positionIncrementGap,
   *               (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
   *               and 0+ TokenFilterFactory's
   */
@Override
@SuppressWarnings("fallthrough")
public void setParams(String params) {
    super.setParams(params);
    ArgType expectedArgType = ArgType.ANALYZER_ARG;
    final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
    stok.commentChar('#');
    stok.quoteChar('"');
    stok.quoteChar('\'');
    stok.eolIsSignificant(false);
    stok.ordinaryChar('(');
    stok.ordinaryChar(')');
    stok.ordinaryChar(':');
    stok.ordinaryChar(',');
    try {
        while (stok.nextToken() != StreamTokenizer.TT_EOF) {
            switch(stok.ttype) {
                case ',':
                    {
                        // Do nothing
                        break;
                    }
                case StreamTokenizer.TT_WORD:
                    {
                        if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
                            final String argName = stok.sval;
                            if (!argName.equalsIgnoreCase("name") && !argName.equalsIgnoreCase("positionIncrementGap") && !argName.equalsIgnoreCase("offsetGap")) {
                                throw new RuntimeException("Line #" + lineno(stok) + ": Missing 'name' param to AnalyzerFactory: '" + params + "'");
                            }
                            stok.nextToken();
                            if (stok.ttype != ':') {
                                throw new RuntimeException("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                            }
                            stok.nextToken();
                            String argValue = stok.sval;
                            switch(stok.ttype) {
                                case StreamTokenizer.TT_NUMBER:
                                    {
                                        argValue = Double.toString(stok.nval);
                                        // Drop the ".0" from numbers, for integer arguments
                                        argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
                                    // Intentional fallthrough
                                    }
                                case '"':
                                case '\'':
                                case StreamTokenizer.TT_WORD:
                                    {
                                        if (argName.equalsIgnoreCase("name")) {
                                            factoryName = argValue;
                                            expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
                                        } else {
                                            int intArgValue = 0;
                                            try {
                                                intArgValue = Integer.parseInt(argValue);
                                            } catch (NumberFormatException e) {
                                                throw new RuntimeException("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
                                            }
                                            if (argName.equalsIgnoreCase("positionIncrementGap")) {
                                                positionIncrementGap = intArgValue;
                                            } else if (argName.equalsIgnoreCase("offsetGap")) {
                                                offsetGap = intArgValue;
                                            }
                                        }
                                        break;
                                    }
                                case StreamTokenizer.TT_EOF:
                                    {
                                        throw new RuntimeException("Unexpected EOF: " + stok.toString());
                                    }
                                default:
                                    {
                                        throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                                    }
                            }
                        } else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
                            final String argName = stok.sval;
                            if (argName.equalsIgnoreCase("positionIncrementGap") || argName.equalsIgnoreCase("offsetGap")) {
                                stok.nextToken();
                                if (stok.ttype != ':') {
                                    throw new RuntimeException("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
                                }
                                stok.nextToken();
                                int intArgValue = (int) stok.nval;
                                switch(stok.ttype) {
                                    case '"':
                                    case '\'':
                                    case StreamTokenizer.TT_WORD:
                                        {
                                            intArgValue = 0;
                                            try {
                                                intArgValue = Integer.parseInt(stok.sval.trim());
                                            } catch (NumberFormatException e) {
                                                throw new RuntimeException("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + stok.sval + "'", e);
                                            }
                                        // Intentional fall-through
                                        }
                                    case StreamTokenizer.TT_NUMBER:
                                        {
                                            if (argName.equalsIgnoreCase("positionIncrementGap")) {
                                                positionIncrementGap = intArgValue;
                                            } else if (argName.equalsIgnoreCase("offsetGap")) {
                                                offsetGap = intArgValue;
                                            }
                                            break;
                                        }
                                    case StreamTokenizer.TT_EOF:
                                        {
                                            throw new RuntimeException("Unexpected EOF: " + stok.toString());
                                        }
                                    default:
                                        {
                                            throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                                        }
                                }
                                break;
                            }
                            try {
                                final Class<? extends CharFilterFactory> clazz;
                                clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
                                createAnalysisPipelineComponent(stok, clazz);
                            } catch (IllegalArgumentException e) {
                                try {
                                    final Class<? extends TokenizerFactory> clazz;
                                    clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
                                    createAnalysisPipelineComponent(stok, clazz);
                                    expectedArgType = ArgType.TOKENFILTER;
                                } catch (IllegalArgumentException e2) {
                                    throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '" + argName + "' as CharFilterFactory or TokenizerFactory");
                                }
                            }
                        } else {
                            // expectedArgType = ArgType.TOKENFILTER
                            final String className = stok.sval;
                            final Class<? extends TokenFilterFactory> clazz;
                            try {
                                clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
                            } catch (IllegalArgumentException e) {
                                throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '" + className + "' as TokenFilterFactory");
                            }
                            createAnalysisPipelineComponent(stok, clazz);
                        }
                        break;
                    }
                default:
                    {
                        throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                    }
            }
        }
    } catch (RuntimeException e) {
        if (e.getMessage().startsWith("Line #")) {
            throw e;
        } else {
            throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
        }
    } catch (Throwable t) {
        throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
    }
    final AnalyzerFactory analyzerFactory = new AnalyzerFactory(charFilterFactories, tokenizerFactory, tokenFilterFactories);
    analyzerFactory.setPositionIncrementGap(positionIncrementGap);
    analyzerFactory.setOffsetGap(offsetGap);
    getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
}
Also used : TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) StringReader(java.io.StringReader) StreamTokenizer(java.io.StreamTokenizer) AnalyzerFactory(org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory)

Example 2 with AnalyzerFactory

use of org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory in project lucene-solr by apache.

the class NewAnalyzerTask method doLogic.

@Override
public int doLogic() throws IOException {
    String analyzerName = null;
    try {
        if (current >= analyzerNames.size()) {
            current = 0;
        }
        analyzerName = analyzerNames.get(current++);
        Analyzer analyzer = null;
        if (null == analyzerName || 0 == analyzerName.length()) {
            analyzerName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
        }
        // First, lookup analyzerName as a named analyzer factory
        AnalyzerFactory factory = getRunData().getAnalyzerFactories().get(analyzerName);
        if (null != factory) {
            analyzer = factory.create();
        } else {
            if (analyzerName.contains(".")) {
                if (analyzerName.startsWith("standard.")) {
                    analyzerName = "org.apache.lucene.analysis." + analyzerName;
                }
                analyzer = createAnalyzer(analyzerName);
            } else {
                // No package
                try {
                    // Attempt to instantiate a core analyzer
                    String coreClassName = "org.apache.lucene.analysis.core." + analyzerName;
                    analyzer = createAnalyzer(coreClassName);
                    analyzerName = coreClassName;
                } catch (ClassNotFoundException e) {
                    // If not a core analyzer, try the base analysis package
                    analyzerName = "org.apache.lucene.analysis." + analyzerName;
                    analyzer = createAnalyzer(analyzerName);
                }
            }
        }
        getRunData().setAnalyzer(analyzer);
    } catch (Exception e) {
        throw new RuntimeException("Error creating Analyzer: " + analyzerName, e);
    }
    return 1;
}
Also used : Analyzer(org.apache.lucene.analysis.Analyzer) IOException(java.io.IOException) AnalyzerFactory(org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory)

Aggregations

AnalyzerFactory (org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory)2 IOException (java.io.IOException)1 StreamTokenizer (java.io.StreamTokenizer)1 StringReader (java.io.StringReader)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)1 TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)1 TokenizerFactory (org.apache.lucene.analysis.util.TokenizerFactory)1