Search in sources :

Example 11 with ResourceLoader

use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.

the class TestWordDelimiterFilterFactory method testCustomTypes.

@Test
public void testCustomTypes() throws Exception {
    String testText = "I borrowed $5,400.00 at 25% interest-rate";
    ResourceLoader loader = new SolrResourceLoader(TEST_PATH().resolve("collection1"));
    Map<String, String> args = new HashMap<>();
    args.put("luceneMatchVersion", Version.LATEST.toString());
    args.put("generateWordParts", "1");
    args.put("generateNumberParts", "1");
    args.put("catenateWords", "1");
    args.put("catenateNumbers", "1");
    args.put("catenateAll", "0");
    args.put("splitOnCaseChange", "1");
    /* default behavior */
    WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory(args);
    factoryDefault.inform(loader);
    TokenStream ts = factoryDefault.create(whitespaceMockTokenizer(testText));
    BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "5", "540000", "400", "00", "at", "25", "interest", "interestrate", "rate" });
    ts = factoryDefault.create(whitespaceMockTokenizer("foo‍bar"));
    BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo", "foobar", "bar" });
    /* custom behavior */
    args = new HashMap<>();
    // use a custom type mapping
    args.put("luceneMatchVersion", Version.LATEST.toString());
    args.put("generateWordParts", "1");
    args.put("generateNumberParts", "1");
    args.put("catenateWords", "1");
    args.put("catenateNumbers", "1");
    args.put("catenateAll", "0");
    args.put("splitOnCaseChange", "1");
    args.put("types", "wdftypes.txt");
    WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory(args);
    factoryCustom.inform(loader);
    ts = factoryCustom.create(whitespaceMockTokenizer(testText));
    BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "interestrate", "rate" });
    /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
    ts = factoryCustom.create(whitespaceMockTokenizer("foo‍bar"));
    BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo‍bar" });
}
Also used : SolrResourceLoader(org.apache.solr.core.SolrResourceLoader) SolrResourceLoader(org.apache.solr.core.SolrResourceLoader) ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) TokenStream(org.apache.lucene.analysis.TokenStream) HashMap(java.util.HashMap) WordDelimiterFilterFactory(org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory) Test(org.junit.Test)

Example 12 with ResourceLoader

use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.

the class TestICUCollationField method setupSolrHome.

/**
   * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
   * These are largish files, and jvm-specific (as our documentation says, you should always
   * look out for jvm differences with collation).
   * So it's preferable to create this file on-the-fly.
   */
public static String setupSolrHome() throws Exception {
    String tmpFile = createTempDir().toFile().getAbsolutePath();
    // make data and conf dirs
    new File(tmpFile + "/collection1", "data").mkdirs();
    File confDir = new File(tmpFile + "/collection1", "conf");
    confDir.mkdirs();
    // copy over configuration files
    FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
    FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
    // generate custom collation rules (DIN 5007-2), saving to customrules.dat
    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
    String DIN5007_2_tailorings = "& ae , ä & AE , Ä" + "& oe , ö & OE , Ö" + "& ue , ü & UE , ü";
    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
    String tailoredRules = tailoredCollator.getRules();
    final String osFileName = "customrules.dat";
    final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
    IOUtils.write(tailoredRules, os, "UTF-8");
    os.close();
    final ResourceLoader loader;
    if (random().nextBoolean()) {
        loader = new StringMockResourceLoader(tailoredRules);
    } else {
        loader = new FilesystemResourceLoader(confDir.toPath());
    }
    final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
    assertEquals(tailoredCollator, readCollator);
    return tmpFile;
}
Also used : ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader) FilesystemResourceLoader(org.apache.lucene.analysis.util.FilesystemResourceLoader) RuleBasedCollator(com.ibm.icu.text.RuleBasedCollator) FilesystemResourceLoader(org.apache.lucene.analysis.util.FilesystemResourceLoader) ULocale(com.ibm.icu.util.ULocale) FileOutputStream(java.io.FileOutputStream) File(java.io.File) StringMockResourceLoader(org.apache.lucene.analysis.util.StringMockResourceLoader) RuleBasedCollator(com.ibm.icu.text.RuleBasedCollator) Collator(com.ibm.icu.text.Collator)

Example 13 with ResourceLoader

use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.

the class ManagedSynonymFilterFactory method onManagedResourceInitialized.

/**
   * Called once, during core initialization, to initialize any analysis components
   * that depend on the data managed by this resource. It is important that the
   * analysis component is only initialized once during core initialization so that
   * text analysis is consistent, especially in a distributed environment, as we
   * don't want one server applying a different set of stop words than other servers.
   */
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res) throws SolrException {
    NamedList<Object> args = (NamedList<Object>) initArgs;
    args.add("synonyms", getResourceId());
    args.add("expand", "false");
    args.add("format", "solr");
    Map<String, String> filtArgs = new HashMap<>();
    for (Map.Entry<String, ?> entry : args) {
        filtArgs.put(entry.getKey(), entry.getValue().toString());
    }
    // create the actual filter factory that pulls the synonym mappings
    // from synonymMappings using a custom parser implementation
    delegate = new SynonymFilterFactory(filtArgs) {

        @Override
        protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
            ManagedSynonymParser parser = new ManagedSynonymParser((SynonymManager) res, dedup, analyzer);
            // null is safe here because there's no actual parsing done against a input Reader
            parser.parse(null);
            return parser.build();
        }
    };
    try {
        delegate.inform(res.getResourceLoader());
    } catch (IOException e) {
        throw new SolrException(ErrorCode.SERVER_ERROR, e);
    }
}
Also used : ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) SolrResourceLoader(org.apache.solr.core.SolrResourceLoader) HashMap(java.util.HashMap) NamedList(org.apache.solr.common.util.NamedList) IOException(java.io.IOException) Analyzer(org.apache.lucene.analysis.Analyzer) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) SynonymFilterFactory(org.apache.lucene.analysis.synonym.SynonymFilterFactory) ParseException(java.text.ParseException) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SolrException(org.apache.solr.common.SolrException)

Aggregations

ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)13 ClasspathResourceLoader (org.apache.lucene.analysis.util.ClasspathResourceLoader)7 HashMap (java.util.HashMap)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 SolrResourceLoader (org.apache.solr.core.SolrResourceLoader)4 IOException (java.io.IOException)3 File (java.io.File)2 ParseException (java.text.ParseException)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 SynonymMap (org.apache.lucene.analysis.synonym.SynonymMap)2 SolrException (org.apache.solr.common.SolrException)2 NamedList (org.apache.solr.common.util.NamedList)2 Collator (com.ibm.icu.text.Collator)1 RuleBasedCollator (com.ibm.icu.text.RuleBasedCollator)1 ULocale (com.ibm.icu.util.ULocale)1 FileOutputStream (java.io.FileOutputStream)1 StringReader (java.io.StringReader)1