use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.
the class TestWordDelimiterFilterFactory method testCustomTypes.
@Test
public void testCustomTypes() throws Exception {
String testText = "I borrowed $5,400.00 at 25% interest-rate";
ResourceLoader loader = new SolrResourceLoader(TEST_PATH().resolve("collection1"));
Map<String, String> args = new HashMap<>();
args.put("luceneMatchVersion", Version.LATEST.toString());
args.put("generateWordParts", "1");
args.put("generateNumberParts", "1");
args.put("catenateWords", "1");
args.put("catenateNumbers", "1");
args.put("catenateAll", "0");
args.put("splitOnCaseChange", "1");
/* default behavior */
WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory(args);
factoryDefault.inform(loader);
TokenStream ts = factoryDefault.create(whitespaceMockTokenizer(testText));
BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "5", "540000", "400", "00", "at", "25", "interest", "interestrate", "rate" });
ts = factoryDefault.create(whitespaceMockTokenizer("foobar"));
BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo", "foobar", "bar" });
/* custom behavior */
args = new HashMap<>();
// use a custom type mapping
args.put("luceneMatchVersion", Version.LATEST.toString());
args.put("generateWordParts", "1");
args.put("generateNumberParts", "1");
args.put("catenateWords", "1");
args.put("catenateNumbers", "1");
args.put("catenateAll", "0");
args.put("splitOnCaseChange", "1");
args.put("types", "wdftypes.txt");
WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory(args);
factoryCustom.inform(loader);
ts = factoryCustom.create(whitespaceMockTokenizer(testText));
BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "interestrate", "rate" });
/* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
ts = factoryCustom.create(whitespaceMockTokenizer("foobar"));
BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foobar" });
}
use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.
the class TestICUCollationField method setupSolrHome.
/**
* Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
* These are largish files, and jvm-specific (as our documentation says, you should always
* look out for jvm differences with collation).
* So it's preferable to create this file on-the-fly.
*/
public static String setupSolrHome() throws Exception {
String tmpFile = createTempDir().toFile().getAbsolutePath();
// make data and conf dirs
new File(tmpFile + "/collection1", "data").mkdirs();
File confDir = new File(tmpFile + "/collection1", "conf");
confDir.mkdirs();
// copy over configuration files
FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
// generate custom collation rules (DIN 5007-2), saving to customrules.dat
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
String DIN5007_2_tailorings = "& ae , ä & AE , Ä" + "& oe , ö & OE , Ö" + "& ue , ü & UE , ü";
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
String tailoredRules = tailoredCollator.getRules();
final String osFileName = "customrules.dat";
final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
IOUtils.write(tailoredRules, os, "UTF-8");
os.close();
final ResourceLoader loader;
if (random().nextBoolean()) {
loader = new StringMockResourceLoader(tailoredRules);
} else {
loader = new FilesystemResourceLoader(confDir.toPath());
}
final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
assertEquals(tailoredCollator, readCollator);
return tmpFile;
}
use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.
the class ManagedSynonymFilterFactory method onManagedResourceInitialized.
/**
* Called once, during core initialization, to initialize any analysis components
* that depend on the data managed by this resource. It is important that the
* analysis component is only initialized once during core initialization so that
* text analysis is consistent, especially in a distributed environment, as we
* don't want one server applying a different set of stop words than other servers.
*/
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res) throws SolrException {
NamedList<Object> args = (NamedList<Object>) initArgs;
args.add("synonyms", getResourceId());
args.add("expand", "false");
args.add("format", "solr");
Map<String, String> filtArgs = new HashMap<>();
for (Map.Entry<String, ?> entry : args) {
filtArgs.put(entry.getKey(), entry.getValue().toString());
}
// create the actual filter factory that pulls the synonym mappings
// from synonymMappings using a custom parser implementation
delegate = new SynonymFilterFactory(filtArgs) {
@Override
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
ManagedSynonymParser parser = new ManagedSynonymParser((SynonymManager) res, dedup, analyzer);
// null is safe here because there's no actual parsing done against a input Reader
parser.parse(null);
return parser.build();
}
};
try {
delegate.inform(res.getResourceLoader());
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
Aggregations