Search in sources :

Example 1 with SynonymMap

use of org.apache.lucene.analysis.synonym.SynonymMap in project lucene-solr by apache.

the class ManagedSynonymGraphFilterFactory method onManagedResourceInitialized.

/**
   * Called once, during core initialization, to initialize any analysis components
   * that depend on the data managed by this resource. It is important that the
   * analysis component is only initialized once during core initialization so that
   * text analysis is consistent, especially in a distributed environment, as we
   * don't want one server applying a different set of stop words than other servers.
   */
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res) throws SolrException {
    NamedList<Object> args = (NamedList<Object>) initArgs;
    args.add("synonyms", getResourceId());
    args.add("expand", "false");
    args.add("format", "solr");
    Map<String, String> filtArgs = new HashMap<>();
    for (Map.Entry<String, ?> entry : args) {
        filtArgs.put(entry.getKey(), entry.getValue().toString());
    }
    // create the actual filter factory that pulls the synonym mappings
    // from synonymMappings using a custom parser implementation
    delegate = new SynonymGraphFilterFactory(filtArgs) {

        @Override
        protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
            ManagedSynonymParser parser = new ManagedSynonymParser((SynonymManager) res, dedup, analyzer);
            // null is safe here because there's no actual parsing done against a input Reader
            parser.parse(null);
            return parser.build();
        }
    };
    try {
        delegate.inform(res.getResourceLoader());
    } catch (IOException e) {
        throw new SolrException(ErrorCode.SERVER_ERROR, e);
    }
}
Also used : ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) SolrResourceLoader(org.apache.solr.core.SolrResourceLoader) HashMap(java.util.HashMap) NamedList(org.apache.solr.common.util.NamedList) IOException(java.io.IOException) Analyzer(org.apache.lucene.analysis.Analyzer) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) SynonymGraphFilterFactory(org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory) ParseException(java.text.ParseException) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SolrException(org.apache.solr.common.SolrException)

Example 2 with SynonymMap

use of org.apache.lucene.analysis.synonym.SynonymMap in project lucene-solr by apache.

the class TestLimitTokenPositionFilter method testMaxPosition3WithSynomyms.

public void testMaxPosition3WithSynomyms() throws IOException {
    for (final boolean consumeAll : new boolean[] { true, false }) {
        MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
        // if we are consuming all tokens, we can use the checks, otherwise we can't
        tokenizer.setEnableChecks(consumeAll);
        SynonymMap.Builder builder = new SynonymMap.Builder(true);
        builder.add(new CharsRef("one"), new CharsRef("first"), true);
        builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
        builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
        CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
        SynonymMap.Builder.join(new String[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
        builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
        SynonymMap.Builder.join(new String[] { "dopple", "ganger" }, multiWordCharsRef);
        builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
        SynonymMap synonymMap = builder.build();
        TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
        stream = new LimitTokenPositionFilter(stream, 3, consumeAll);
        // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
        assertTokenStreamContents(stream, new String[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" }, new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 });
    }
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) SynonymFilter(org.apache.lucene.analysis.synonym.SynonymFilter) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) CharsRef(org.apache.lucene.util.CharsRef) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap)

Example 3 with SynonymMap

use of org.apache.lucene.analysis.synonym.SynonymMap in project lucene-solr by apache.

the class TestRemoveDuplicatesTokenFilter method testRandomStrings.

/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
    final int numIters = atLeast(10);
    for (int i = 0; i < numIters; i++) {
        SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
        final int numEntries = atLeast(10);
        for (int j = 0; j < numEntries; j++) {
            add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
        }
        final SynonymMap map = b.build();
        final boolean ignoreCase = random().nextBoolean();
        final Analyzer analyzer = new Analyzer() {

            @Override
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
                TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
                return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
            }
        };
        checkRandomData(random(), analyzer, 200);
        analyzer.close();
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) SynonymFilter(org.apache.lucene.analysis.synonym.SynonymFilter) Analyzer(org.apache.lucene.analysis.Analyzer) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Example 4 with SynonymMap

use of org.apache.lucene.analysis.synonym.SynonymMap in project lucene-solr by apache.

the class ManagedSynonymFilterFactory method onManagedResourceInitialized.

/**
   * Called once, during core initialization, to initialize any analysis components
   * that depend on the data managed by this resource. It is important that the
   * analysis component is only initialized once during core initialization so that
   * text analysis is consistent, especially in a distributed environment, as we
   * don't want one server applying a different set of stop words than other servers.
   */
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res) throws SolrException {
    NamedList<Object> args = (NamedList<Object>) initArgs;
    args.add("synonyms", getResourceId());
    args.add("expand", "false");
    args.add("format", "solr");
    Map<String, String> filtArgs = new HashMap<>();
    for (Map.Entry<String, ?> entry : args) {
        filtArgs.put(entry.getKey(), entry.getValue().toString());
    }
    // create the actual filter factory that pulls the synonym mappings
    // from synonymMappings using a custom parser implementation
    delegate = new SynonymFilterFactory(filtArgs) {

        @Override
        protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
            ManagedSynonymParser parser = new ManagedSynonymParser((SynonymManager) res, dedup, analyzer);
            // null is safe here because there's no actual parsing done against a input Reader
            parser.parse(null);
            return parser.build();
        }
    };
    try {
        delegate.inform(res.getResourceLoader());
    } catch (IOException e) {
        throw new SolrException(ErrorCode.SERVER_ERROR, e);
    }
}
Also used : ResourceLoader(org.apache.lucene.analysis.util.ResourceLoader) SolrResourceLoader(org.apache.solr.core.SolrResourceLoader) HashMap(java.util.HashMap) NamedList(org.apache.solr.common.util.NamedList) IOException(java.io.IOException) Analyzer(org.apache.lucene.analysis.Analyzer) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) SynonymFilterFactory(org.apache.lucene.analysis.synonym.SynonymFilterFactory) ParseException(java.text.ParseException) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SolrException(org.apache.solr.common.SolrException)

Aggregations

SynonymMap (org.apache.lucene.analysis.synonym.SynonymMap)4 Analyzer (org.apache.lucene.analysis.Analyzer)3 IOException (java.io.IOException)2 ParseException (java.text.ParseException)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 SynonymFilter (org.apache.lucene.analysis.synonym.SynonymFilter)2 ResourceLoader (org.apache.lucene.analysis.util.ResourceLoader)2 SolrException (org.apache.solr.common.SolrException)2 NamedList (org.apache.solr.common.util.NamedList)2 SolrResourceLoader (org.apache.solr.core.SolrResourceLoader)2 Tokenizer (org.apache.lucene.analysis.Tokenizer)1 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)1 SynonymFilterFactory (org.apache.lucene.analysis.synonym.SynonymFilterFactory)1 SynonymGraphFilterFactory (org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory)1 CharsRef (org.apache.lucene.util.CharsRef)1 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)1