Search in sources :

Example 6 with ResourceLoader

use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.

the class TestSuggestStopFilterFactory method testInform.

public void testInform() throws Exception {
    ResourceLoader loader = new ClasspathResourceLoader(getClass());
    assertTrue("loader is null and it shouldn't be", loader != null);
    SuggestStopFilterFactory factory = createFactory("words", "stop-1.txt", "ignoreCase", "true");
    CharArraySet words = factory.getStopWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = createFactory("words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
    words = factory.getStopWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = createFactory("words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
    words = factory.getStopWords();
    assertEquals(8, words.size());
    assertTrue(words.contains("he"));
    assertTrue(words.contains("him"));
    assertTrue(words.contains("his"));
    assertTrue(words.contains("himself"));
    assertTrue(words.contains("she"));
    assertTrue(words.contains("her"));
    assertTrue(words.contains("hers"));
    assertTrue(words.contains("herself"));
    // defaults
    factory = createFactory();
    assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
    assertEquals(false, factory.isIgnoreCase());
}
Also used : ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) CharArraySet(org.apache.lucene.analysis.CharArraySet) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader)

Example 7 with ResourceLoader

use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.

the class ManagedSynonymGraphFilterFactory method onManagedResourceInitialized.

/**
   * Called once, during core initialization, to initialize any analysis components
   * that depend on the data managed by this resource. It is important that the
   * analysis component is only initialized once during core initialization so that
   * text analysis is consistent, especially in a distributed environment, as we
   * don't want one server applying a different set of stop words than other servers.
   */
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res) throws SolrException {
    NamedList<Object> args = (NamedList<Object>) initArgs;
    args.add("synonyms", getResourceId());
    args.add("expand", "false");
    args.add("format", "solr");
    Map<String, String> filtArgs = new HashMap<>();
    for (Map.Entry<String, ?> entry : args) {
        filtArgs.put(entry.getKey(), entry.getValue().toString());
    }
    // create the actual filter factory that pulls the synonym mappings
    // from synonymMappings using a custom parser implementation
    delegate = new SynonymGraphFilterFactory(filtArgs) {

        @Override
        protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
            ManagedSynonymParser parser = new ManagedSynonymParser((SynonymManager) res, dedup, analyzer);
            // null is safe here because there's no actual parsing done against a input Reader
            parser.parse(null);
            return parser.build();
        }
    };
    try {
        delegate.inform(res.getResourceLoader());
    } catch (IOException e) {
        throw new SolrException(ErrorCode.SERVER_ERROR, e);
    }
}
Also used : ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) SolrResourceLoader(org.apache.solr.core.SolrResourceLoader) HashMap(java.util.HashMap) NamedList(org.apache.solr.common.util.NamedList) IOException(java.io.IOException) Analyzer(org.apache.lucene.analysis.Analyzer) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) SynonymGraphFilterFactory(org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory) ParseException(java.text.ParseException) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SolrException(org.apache.solr.common.SolrException)

Example 8 with ResourceLoader

use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.

the class XPathEntityProcessor method initXpathReader.

private void initXpathReader(VariableResolver resolver) {
    reinitXPathReader = false;
    useSolrAddXml = Boolean.parseBoolean(context.getEntityAttribute(USE_SOLR_ADD_SCHEMA));
    streamRows = Boolean.parseBoolean(context.getEntityAttribute(STREAM));
    if (context.getResolvedEntityAttribute("batchSize") != null) {
        blockingQueueSize = Integer.parseInt(context.getEntityAttribute("batchSize"));
    }
    if (context.getResolvedEntityAttribute("readTimeOut") != null) {
        blockingQueueTimeOut = Integer.parseInt(context.getEntityAttribute("readTimeOut"));
    }
    String xslt = context.getEntityAttribute(XSL);
    if (xslt != null) {
        xslt = context.replaceTokens(xslt);
        try {
            // create an instance of TransformerFactory
            TransformerFactory transFact = TransformerFactory.newInstance();
            final SolrCore core = context.getSolrCore();
            final StreamSource xsltSource;
            if (core != null) {
                final ResourceLoader loader = core.getResourceLoader();
                transFact.setURIResolver(new SystemIdResolver(loader).asURIResolver());
                xsltSource = new StreamSource(loader.openResource(xslt), SystemIdResolver.createSystemIdFromResourceName(xslt));
            } else {
                // fallback for tests
                xsltSource = new StreamSource(xslt);
            }
            transFact.setErrorListener(xmllog);
            try {
                xslTransformer = transFact.newTransformer(xsltSource);
            } finally {
                // some XML parsers are broken and don't close the byte stream (but they should according to spec)
                IOUtils.closeQuietly(xsltSource.getInputStream());
            }
            LOG.info("Using xslTransformer: " + xslTransformer.getClass().getName());
        } catch (Exception e) {
            throw new DataImportHandlerException(SEVERE, "Error initializing XSL ", e);
        }
    }
    if (useSolrAddXml) {
        // Support solr add documents
        xpathReader = new XPathRecordReader("/add/doc");
        xpathReader.addField("name", "/add/doc/field/@name", true);
        xpathReader.addField("value", "/add/doc/field", true);
    } else {
        String forEachXpath = context.getResolvedEntityAttribute(FOR_EACH);
        if (forEachXpath == null)
            throw new DataImportHandlerException(SEVERE, "Entity : " + context.getEntityAttribute("name") + " must have a 'forEach' attribute");
        if (forEachXpath.equals(context.getEntityAttribute(FOR_EACH)))
            reinitXPathReader = true;
        try {
            xpathReader = new XPathRecordReader(forEachXpath);
            for (Map<String, String> field : context.getAllEntityFields()) {
                if (field.get(XPATH) == null)
                    continue;
                int flags = 0;
                if ("true".equals(field.get("flatten"))) {
                    flags = XPathRecordReader.FLATTEN;
                }
                String xpath = field.get(XPATH);
                xpath = context.replaceTokens(xpath);
                //for each xml
                if (!xpath.equals(field.get(XPATH)) && !context.isRootEntity())
                    reinitXPathReader = true;
                xpathReader.addField(field.get(DataImporter.COLUMN), xpath, Boolean.parseBoolean(field.get(DataImporter.MULTI_VALUED)), flags);
            }
        } catch (RuntimeException e) {
            throw new DataImportHandlerException(SEVERE, "Exception while reading xpaths for fields", e);
        }
    }
    String url = context.getEntityAttribute(URL);
    List<String> l = url == null ? Collections.EMPTY_LIST : resolver.getVariables(url);
    for (String s : l) {
        if (s.startsWith(entityName + ".")) {
            if (placeHolderVariables == null)
                placeHolderVariables = new ArrayList<>();
            placeHolderVariables.add(s.substring(entityName.length() + 1));
        }
    }
    for (Map<String, String> fld : context.getAllEntityFields()) {
        if (fld.get(COMMON_FIELD) != null && "true".equals(fld.get(COMMON_FIELD))) {
            if (commonFields == null)
                commonFields = new ArrayList<>();
            commonFields.add(fld.get(DataImporter.COLUMN));
        }
    }
}
Also used : ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) TransformerFactory(javax.xml.transform.TransformerFactory) SolrCore(org.apache.solr.core.SolrCore) StreamSource(javax.xml.transform.stream.StreamSource) TransformerException(javax.xml.transform.TransformerException) SystemIdResolver(org.apache.solr.util.SystemIdResolver)

Example 9 with ResourceLoader

use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.

the class TestStopFilterFactory method testInform.

public void testInform() throws Exception {
    ResourceLoader loader = new ClasspathResourceLoader(getClass());
    assertTrue("loader is null and it shouldn't be", loader != null);
    StopFilterFactory factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true");
    CharArraySet words = factory.getStopWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
    words = factory.getStopWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
    assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
    factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
    words = factory.getStopWords();
    assertEquals(8, words.size());
    assertTrue(words.contains("he"));
    assertTrue(words.contains("him"));
    assertTrue(words.contains("his"));
    assertTrue(words.contains("himself"));
    assertTrue(words.contains("she"));
    assertTrue(words.contains("her"));
    assertTrue(words.contains("hers"));
    assertTrue(words.contains("herself"));
    // defaults
    factory = (StopFilterFactory) tokenFilterFactory("Stop");
    assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
    assertEquals(false, factory.isIgnoreCase());
}
Also used : ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) CharArraySet(org.apache.lucene.analysis.CharArraySet) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader)

Example 10 with ResourceLoader

use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.

the class TestMorfologikFilterFactory method testMissingDictionary.

public void testMissingDictionary() throws Exception {
    final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);
    IOException expected = expectThrows(IOException.class, () -> {
        Map<String, String> params = new HashMap<>();
        params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "missing-dictionary-resource.dict");
        MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
        factory.inform(loader);
    });
    assertTrue(expected.getMessage().contains("Resource not found"));
}
Also used : ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) HashMap(java.util.HashMap) ClasspathResourceLoader(org.apache.lucene.analysis.util.ClasspathResourceLoader) IOException(java.io.IOException)

Aggregations

ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)13 ClasspathResourceLoader (org.apache.lucene.analysis.util.ClasspathResourceLoader)7 HashMap (java.util.HashMap)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 SolrResourceLoader (org.apache.solr.core.SolrResourceLoader)4 IOException (java.io.IOException)3 File (java.io.File)2 ParseException (java.text.ParseException)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 SynonymMap (org.apache.lucene.analysis.synonym.SynonymMap)2 SolrException (org.apache.solr.common.SolrException)2 NamedList (org.apache.solr.common.util.NamedList)2 Collator (com.ibm.icu.text.Collator)1 RuleBasedCollator (com.ibm.icu.text.RuleBasedCollator)1 ULocale (com.ibm.icu.util.ULocale)1 FileOutputStream (java.io.FileOutputStream)1 StringReader (java.io.StringReader)1