Search in sources :

Example 1 with FilesystemResourceLoader

use of org.apache.lucene.analysis.util.FilesystemResourceLoader in project lucene-solr by apache.

the class AnalyzerFactoryTask method createAnalysisPipelineComponent.

/**
   * Instantiates the given analysis factory class after pulling params from
   * the given stream tokenizer, then stores the result in the appropriate
   * pipeline component list.
   *
   * @param stok stream tokenizer from which to draw analysis factory params
   * @param clazz analysis factory class to instantiate
   */
@SuppressWarnings("fallthrough")
private void createAnalysisPipelineComponent(StreamTokenizer stok, Class<? extends AbstractAnalysisFactory> clazz) {
    Map<String, String> argMap = new HashMap<>();
    boolean parenthetical = false;
    try {
        WHILE_LOOP: while (stok.nextToken() != StreamTokenizer.TT_EOF) {
            switch(stok.ttype) {
                case ',':
                    {
                        if (parenthetical) {
                            // Do nothing
                            break;
                        } else {
                            // Finished reading this analysis factory configuration
                            break WHILE_LOOP;
                        }
                    }
                case '(':
                    {
                        if (parenthetical) {
                            throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected opening parenthesis.");
                        }
                        parenthetical = true;
                        break;
                    }
                case ')':
                    {
                        if (parenthetical) {
                            parenthetical = false;
                        } else {
                            throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected closing parenthesis.");
                        }
                        break;
                    }
                case StreamTokenizer.TT_WORD:
                    {
                        if (!parenthetical) {
                            throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token '" + stok.sval + "'");
                        }
                        String argName = stok.sval;
                        stok.nextToken();
                        if (stok.ttype != ':') {
                            throw new RuntimeException("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to " + clazz.getSimpleName());
                        }
                        stok.nextToken();
                        String argValue = stok.sval;
                        switch(stok.ttype) {
                            case StreamTokenizer.TT_NUMBER:
                                {
                                    argValue = Double.toString(stok.nval);
                                    // Drop the ".0" from numbers, for integer arguments
                                    argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
                                // Intentional fall-through
                                }
                            case '"':
                            case '\'':
                            case StreamTokenizer.TT_WORD:
                                {
                                    argMap.put(argName, argValue);
                                    break;
                                }
                            case StreamTokenizer.TT_EOF:
                                {
                                    throw new RuntimeException("Unexpected EOF: " + stok.toString());
                                }
                            default:
                                {
                                    throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
                                }
                        }
                    }
            }
        }
        if (!argMap.containsKey("luceneMatchVersion")) {
            argMap.put("luceneMatchVersion", Version.LATEST.toString());
        }
        final AbstractAnalysisFactory instance;
        try {
            instance = clazz.getConstructor(Map.class).newInstance(argMap);
        } catch (Exception e) {
            throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
        }
        if (instance instanceof ResourceLoaderAware) {
            Path baseDir = Paths.get(getRunData().getConfig().get("work.dir", "work"));
            if (!Files.isDirectory(baseDir)) {
                baseDir = Paths.get(".");
            }
            ((ResourceLoaderAware) instance).inform(new FilesystemResourceLoader(baseDir));
        }
        if (CharFilterFactory.class.isAssignableFrom(clazz)) {
            charFilterFactories.add((CharFilterFactory) instance);
        } else if (TokenizerFactory.class.isAssignableFrom(clazz)) {
            tokenizerFactory = (TokenizerFactory) instance;
        } else if (TokenFilterFactory.class.isAssignableFrom(clazz)) {
            tokenFilterFactories.add((TokenFilterFactory) instance);
        }
    } catch (RuntimeException e) {
        if (e.getMessage().startsWith("Line #")) {
            throw (e);
        } else {
            throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
        }
    } catch (Throwable t) {
        throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
    }
}
Also used : Path(java.nio.file.Path) FilesystemResourceLoader(org.apache.lucene.analysis.util.FilesystemResourceLoader) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) HashMap(java.util.HashMap) ResourceLoaderAware(org.apache.lucene.analysis.util.ResourceLoaderAware) AbstractAnalysisFactory(org.apache.lucene.analysis.util.AbstractAnalysisFactory)

Example 2 with FilesystemResourceLoader

use of org.apache.lucene.analysis.util.FilesystemResourceLoader in project lucene-solr by apache.

the class TestICUCollationField method setupSolrHome.

/**
   * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
   * These are largish files, and jvm-specific (as our documentation says, you should always
   * look out for jvm differences with collation).
   * So it's preferable to create this file on-the-fly.
   */
public static String setupSolrHome() throws Exception {
    String tmpFile = createTempDir().toFile().getAbsolutePath();
    // make data and conf dirs
    new File(tmpFile + "/collection1", "data").mkdirs();
    File confDir = new File(tmpFile + "/collection1", "conf");
    confDir.mkdirs();
    // copy over configuration files
    FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
    FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
    // generate custom collation rules (DIN 5007-2), saving to customrules.dat
    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
    String DIN5007_2_tailorings = "& ae , ä & AE , Ä" + "& oe , ö & OE , Ö" + "& ue , ü & UE , ü";
    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
    String tailoredRules = tailoredCollator.getRules();
    final String osFileName = "customrules.dat";
    final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
    IOUtils.write(tailoredRules, os, "UTF-8");
    os.close();
    final ResourceLoader loader;
    if (random().nextBoolean()) {
        loader = new StringMockResourceLoader(tailoredRules);
    } else {
        loader = new FilesystemResourceLoader(confDir.toPath());
    }
    final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
    assertEquals(tailoredCollator, readCollator);
    return tmpFile;
}
Also used : ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader) FilesystemResourceLoader(org.apache.lucene.analysis.util.FilesystemResourceLoader) RuleBasedCollator(com.ibm.icu.text.RuleBasedCollator) FilesystemResourceLoader(org.apache.lucene.analysis.util.FilesystemResourceLoader) ULocale(com.ibm.icu.util.ULocale) FileOutputStream(java.io.FileOutputStream) File(java.io.File) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader) RuleBasedCollator(com.ibm.icu.text.RuleBasedCollator) Collator(com.ibm.icu.text.Collator)

Aggregations

FilesystemResourceLoader (org.apache.lucene.analysis.util.FilesystemResourceLoader)2 Collator (com.ibm.icu.text.Collator)1 RuleBasedCollator (com.ibm.icu.text.RuleBasedCollator)1 ULocale (com.ibm.icu.util.ULocale)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 Path (java.nio.file.Path)1 HashMap (java.util.HashMap)1 AbstractAnalysisFactory (org.apache.lucene.analysis.util.AbstractAnalysisFactory)1 ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)1 ResourceLoaderAware (org.apache.lucene.analysis.util.ResourceLoaderAware)1 StringMockResourceLoader (org.apache.lucene.analysis.util.StringMockResourceLoader)1 TokenizerFactory (org.apache.lucene.analysis.util.TokenizerFactory)1