Search in sources :

Example 6 with AbstractAnalysisFactory

use of org.apache.lucene.analysis.util.AbstractAnalysisFactory in project lucene-solr by apache.

the class TestFactories method doTestTokenFilter.

private void doTestTokenFilter(String tokenfilter) throws IOException {
    Class<? extends TokenFilterFactory> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
    TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
    if (factory != null) {
        // if it implements MultiTermAware, sanity check its impl
        if (factory instanceof MultiTermAwareComponent) {
            AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
            assertNotNull(mtc);
            // it's not ok to return a charfilter or tokenizer here, this makes no sense
            assertTrue(mtc instanceof TokenFilterFactory);
        }
        // beast it just a little, it shouldnt throw exceptions:
        // (it should have thrown them in initialize)
        Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
        checkRandomData(random(), a, 20, 20, false, false);
        a.close();
    }
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory) Analyzer(org.apache.lucene.analysis.Analyzer) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory)

Example 7 with AbstractAnalysisFactory

use of org.apache.lucene.analysis.util.AbstractAnalysisFactory in project lucene-solr by apache.

the class TestFactories method doTestCharFilter.

private void doTestCharFilter(String charfilter) throws IOException {
    Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter);
    CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
    if (factory != null) {
        // if it implements MultiTermAware, sanity check its impl
        if (factory instanceof MultiTermAwareComponent) {
            AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
            assertNotNull(mtc);
            // it's not ok to return a tokenizer or tokenfilter here, this makes no sense
            assertTrue(mtc instanceof CharFilterFactory);
        }
        // beast it just a little, it shouldnt throw exceptions:
        // (it should have thrown them in initialize)
        Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
        checkRandomData(random(), a, 20, 20, false, false);
        a.close();
    }
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory) Analyzer(org.apache.lucene.analysis.Analyzer)

Example 8 with AbstractAnalysisFactory

use of org.apache.lucene.analysis.util.AbstractAnalysisFactory in project lucene-solr by apache.

the class AnalyzerFactoryTask method createAnalysisPipelineComponent.

/**
   * Instantiates the given analysis factory class after pulling params from
   * the given stream tokenizer, then stores the result in the appropriate
   * pipeline component list.
   *
   * @param stok stream tokenizer from which to draw analysis factory params
   * @param clazz analysis factory class to instantiate
   */
@SuppressWarnings("fallthrough")
private void createAnalysisPipelineComponent(StreamTokenizer stok, Class<? extends AbstractAnalysisFactory> clazz) {
    Map<String, String> argMap = new HashMap<>();
    boolean parenthetical = false;
    try {
        WHILE_LOOP: while (stok.nextToken() != StreamTokenizer.TT_EOF) {
            switch(stok.ttype) {
                case ',':
                    {
                        if (parenthetical) {
                            // Do nothing
                            break;
                        } else {
                            // Finished reading this analysis factory configuration
                            break WHILE_LOOP;
                        }
                    }
                case '(':
                    {
                        if (parenthetical) {
                            throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected opening parenthesis.");
                        }
                        parenthetical = true;
                        break;
                    }
                case ')':
                    {
                        if (parenthetical) {
                            parenthetical = false;
                        } else {
                            throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected closing parenthesis.");
                        }
                        break;
                    }
                case StreamTokenizer.TT_WORD:
                    {
                        if (!parenthetical) {
                            throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token '" + stok.sval + "'");
                        }
                        String argName = stok.sval;
                        stok.nextToken();
                        if (stok.ttype != ':') {
                            throw new RuntimeException("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to " + clazz.getSimpleName());
                        }
                        stok.nextToken();
                        String argValue = stok.sval;
                        switch(stok.ttype) {
                            case StreamTokenizer.TT_NUMBER:
                                {
                                    argValue = Double.toString(stok.nval);
                                    // Drop the ".0" from numbers, for integer arguments
                                    argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
                                // Intentional fall-through
                                }
                            case '"':
                            case '\'':
                            case StreamTokenizer.TT_WORD:
                                {
                                    argMap.put(argName, argValue);
                                    break;
                                }
                            case StreamTokenizer.TT_EOF:
                                {
                                    throw new RuntimeException("Unexpected EOF: " + stok.toString());
                                }
                            default:
                                {
                                    throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                                }
                        }
                    }
            }
        }
        if (!argMap.containsKey("luceneMatchVersion")) {
            argMap.put("luceneMatchVersion", Version.LATEST.toString());
        }
        final AbstractAnalysisFactory instance;
        try {
            instance = clazz.getConstructor(Map.class).newInstance(argMap);
        } catch (Exception e) {
            throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
        }
        if (instance instanceof ResourceLoaderAware) {
            Path baseDir = Paths.get(getRunData().getConfig().get("work.dir", "work"));
            if (!Files.isDirectory(baseDir)) {
                baseDir = Paths.get(".");
            }
            ((ResourceLoaderAware) instance).inform(new FilesystemResourceLoader(baseDir));
        }
        if (CharFilterFactory.class.isAssignableFrom(clazz)) {
            charFilterFactories.add((CharFilterFactory) instance);
        } else if (TokenizerFactory.class.isAssignableFrom(clazz)) {
            tokenizerFactory = (TokenizerFactory) instance;
        } else if (TokenFilterFactory.class.isAssignableFrom(clazz)) {
            tokenFilterFactories.add((TokenFilterFactory) instance);
        }
    } catch (RuntimeException e) {
        if (e.getMessage().startsWith("Line #")) {
            throw (e);
        } else {
            throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
        }
    } catch (Throwable t) {
        throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
    }
}
Also used : Path(java.nio.file.Path) FilesystemResourceLoader(org.apache.lucene.analysis.util.FilesystemResourceLoader) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) HashMap(java.util.HashMap) ResourceLoaderAware(org.apache.lucene.analysis.util.ResourceLoaderAware) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory)

Example 9 with AbstractAnalysisFactory

use of org.apache.lucene.analysis.util.AbstractAnalysisFactory in project lucene-solr by apache.

the class TestFactories method doTestCharFilter.

private void doTestCharFilter(String charfilter) throws IOException {
    Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter);
    CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
    if (factory != null) {
        // if it implements MultiTermAware, sanity check its impl
        if (factory instanceof MultiTermAwareComponent) {
            AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
            assertNotNull(mtc);
            // it's not ok to return a tokenizer or tokenfilter here, this makes no sense
            assertTrue(mtc instanceof CharFilterFactory);
        }
        // beast it just a little, it shouldnt throw exceptions:
        // (it should have thrown them in initialize)
        Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
        checkRandomData(random(), a, 20, 20, false, false);
        a.close();
    }
}
Also used : MultiTermAwareComponent(org.apache.lucene.analysis.util.MultiTermAwareComponent) CharFilterFactory(org.apache.lucene.analysis.util.CharFilterFactory) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory) Analyzer(org.apache.lucene.analysis.Analyzer)

Aggregations

AbstractAnalysisFactory (org.apache.lucene.analysis.util.AbstractAnalysisFactory)9 Analyzer (org.apache.lucene.analysis.Analyzer)6 MultiTermAwareComponent (org.apache.lucene.analysis.util.MultiTermAwareComponent)6 CharFilterFactory (org.apache.lucene.analysis.util.CharFilterFactory)4 HashMap (java.util.HashMap)3 ResourceLoaderAware (org.apache.lucene.analysis.util.ResourceLoaderAware)3 TokenizerFactory (org.apache.lucene.analysis.util.TokenizerFactory)3 IOException (java.io.IOException)2 InvocationTargetException (java.lang.reflect.InvocationTargetException)2 Map (java.util.Map)2 TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)2 Path (java.nio.file.Path)1 FilesystemResourceLoader (org.apache.lucene.analysis.util.FilesystemResourceLoader)1 StringMockResourceLoader (org.apache.lucene.analysis.util.StringMockResourceLoader)1