Search in sources :

Example 71 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project jackrabbit-oak by apache.

the class LucenePropertyIndex method getLuceneRequest.

/**
     * Get the Lucene query for the given filter.
     *
     * @param plan index plan containing filter details
     * @param reader the Lucene reader
     * @return the Lucene query
     */
private static LuceneRequestFacade getLuceneRequest(IndexPlan plan, IndexAugmentorFactory augmentorFactory, IndexReader reader) {
    FulltextQueryTermsProvider augmentor = getIndexAgumentor(plan, augmentorFactory);
    List<Query> qs = new ArrayList<Query>();
    Filter filter = plan.getFilter();
    FullTextExpression ft = filter.getFullTextConstraint();
    PlanResult planResult = getPlanResult(plan);
    IndexDefinition defn = planResult.indexDefinition;
    Analyzer analyzer = defn.getAnalyzer();
    if (ft == null) {
    // there might be no full-text constraint
    // when using the LowCostLuceneIndexProvider
    // which is used for testing
    } else {
        qs.add(getFullTextQuery(plan, ft, analyzer, augmentor));
    }
    //Check if native function is supported
    PropertyRestriction pr = null;
    if (defn.hasFunctionDefined()) {
        pr = filter.getPropertyRestriction(defn.getFunctionName());
    }
    if (pr != null) {
        String query = String.valueOf(pr.first.getValue(pr.first.getType()));
        QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
        if (query.startsWith("mlt?")) {
            String mltQueryString = query.replace("mlt?", "");
            if (reader != null) {
                Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
                if (moreLikeThis != null) {
                    qs.add(moreLikeThis);
                }
            }
        } else if (query.startsWith("spellcheck?")) {
            String spellcheckQueryString = query.replace("spellcheck?", "");
            if (reader != null) {
                return new LuceneRequestFacade<SpellcheckHelper.SpellcheckQuery>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString, reader));
            }
        } else if (query.startsWith("suggest?")) {
            String suggestQueryString = query.replace("suggest?", "");
            if (reader != null) {
                return new LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString));
            }
        } else {
            try {
                qs.add(queryParser.parse(query));
            } catch (ParseException e) {
                throw new RuntimeException(e);
            }
        }
    } else if (planResult.evaluateNonFullTextConstraints()) {
        addNonFullTextConstraints(qs, plan, reader);
    }
    if (qs.size() == 0 && plan.getSortOrder() != null) {
        //This case indicates that query just had order by and no
        //property restriction defined. In this case property
        //existence queries for each sort entry
        List<OrderEntry> orders = removeNativeSort(plan.getSortOrder());
        for (int i = 0; i < orders.size(); i++) {
            OrderEntry oe = orders.get(i);
            PropertyDefinition pd = planResult.getOrderedProperty(i);
            PropertyRestriction orderRest = new PropertyRestriction();
            orderRest.propertyName = oe.getPropertyName();
            Query q = createQuery(orderRest, pd);
            if (q != null) {
                qs.add(q);
            }
        }
    }
    if (qs.size() == 0) {
        if (reader == null) {
            //just return match all queries
            return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
        }
        //be returned (if the index definition has a single rule)
        if (planResult.evaluateNodeTypeRestriction()) {
            return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
        }
        throw new IllegalStateException("No query created for filter " + filter);
    }
    return performAdditionalWraps(qs);
}
Also used : PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) PlanResult(org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) NumericRangeQuery(org.apache.lucene.search.NumericRangeQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SuggestHelper(org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper) ArrayList(java.util.ArrayList) FulltextQueryTermsProvider(org.apache.jackrabbit.oak.plugins.index.lucene.spi.FulltextQueryTermsProvider) Analyzer(org.apache.lucene.analysis.Analyzer) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) StandardQueryParser(org.apache.lucene.queryparser.flexible.standard.StandardQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Filter(org.apache.jackrabbit.oak.spi.query.Filter) FullTextExpression(org.apache.jackrabbit.oak.query.fulltext.FullTextExpression) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 72 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.

the class SynonymFilterFactory method inform.

@Override
public void inform(ResourceLoader loader) throws IOException {
    final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
    Analyzer analyzer;
    if (analyzerName != null) {
        analyzer = loadAnalyzer(loader, analyzerName);
    } else {
        analyzer = new Analyzer() {

            @Override
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create();
                TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
                return new TokenStreamComponents(tokenizer, stream);
            }
        };
    }
    try (Analyzer a = analyzer) {
        String formatClass = format;
        if (format == null || format.equals("solr")) {
            formatClass = SolrSynonymParser.class.getName();
        } else if (format.equals("wordnet")) {
            formatClass = WordnetSynonymParser.class.getName();
        }
        // TODO: expose dedup as a parameter?
        map = loadSynonyms(loader, formatClass, true, a);
    } catch (ParseException e) {
        throw new IOException("Error parsing synonyms file:", e);
    }
}
Also used : WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) ParseException(java.text.ParseException) IOException(java.io.IOException) Analyzer(org.apache.lucene.analysis.Analyzer) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) Tokenizer(org.apache.lucene.analysis.Tokenizer) LowerCaseFilter(org.apache.lucene.analysis.LowerCaseFilter)

Example 73 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.

the class SynonymGraphFilterFactory method inform.

@Override
public void inform(ResourceLoader loader) throws IOException {
    final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
    Analyzer analyzer;
    if (analyzerName != null) {
        analyzer = loadAnalyzer(loader, analyzerName);
    } else {
        analyzer = new Analyzer() {

            @Override
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create();
                TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
                return new TokenStreamComponents(tokenizer, stream);
            }
        };
    }
    try (Analyzer a = analyzer) {
        String formatClass = format;
        if (format == null || format.equals("solr")) {
            formatClass = SolrSynonymParser.class.getName();
        } else if (format.equals("wordnet")) {
            formatClass = WordnetSynonymParser.class.getName();
        }
        // TODO: expose dedup as a parameter?
        map = loadSynonyms(loader, formatClass, true, a);
    } catch (ParseException e) {
        throw new IOException("Error parsing synonyms file:", e);
    }
}
Also used : WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) ParseException(java.text.ParseException) IOException(java.io.IOException) Analyzer(org.apache.lucene.analysis.Analyzer) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) Tokenizer(org.apache.lucene.analysis.Tokenizer) LowerCaseFilter(org.apache.lucene.analysis.core.LowerCaseFilter)

Example 74 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.

the class TestCatalanAnalyzer method testBasics.

/** test stopwords and stemming */
public void testBasics() throws IOException {
    Analyzer a = new CatalanAnalyzer();
    // stemming
    checkOneTerm(a, "llengües", "llengu");
    checkOneTerm(a, "llengua", "llengu");
    // stopword
    assertAnalyzesTo(a, "un", new String[] {});
    a.close();
}
Also used : Analyzer(org.apache.lucene.analysis.Analyzer)

Example 75 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.

the class TestCatalanAnalyzer method testExclude.

/** test use of exclusion set */
public void testExclude() throws IOException {
    CharArraySet exclusionSet = new CharArraySet(asSet("llengües"), false);
    Analyzer a = new CatalanAnalyzer(CatalanAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTerm(a, "llengües", "llengües");
    checkOneTerm(a, "llengua", "llengu");
    a.close();
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet) Analyzer(org.apache.lucene.analysis.Analyzer)

Aggregations

Analyzer (org.apache.lucene.analysis.Analyzer)1020 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)396 Tokenizer (org.apache.lucene.analysis.Tokenizer)265 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)228 Document (org.apache.lucene.document.Document)207 Directory (org.apache.lucene.store.Directory)192 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)176 BytesRef (org.apache.lucene.util.BytesRef)122 Test (org.junit.Test)119 TokenStream (org.apache.lucene.analysis.TokenStream)107 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)92 Term (org.apache.lucene.index.Term)92 IndexReader (org.apache.lucene.index.IndexReader)67 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)65 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)64 Input (org.apache.lucene.search.suggest.Input)63 CharArraySet (org.apache.lucene.analysis.CharArraySet)58 ArrayList (java.util.ArrayList)57 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)57 TextField (org.apache.lucene.document.TextField)55