Search in sources :

Example 6 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project elasticsearch by elastic.

the class PercolateQueryBuilderTests method testCreateMultiDocumentSearcher.

public void testCreateMultiDocumentSearcher() throws Exception {
    int numDocs = randomIntBetween(2, 8);
    List<ParseContext.Document> docs = new ArrayList<>(numDocs);
    for (int i = 0; i < numDocs; i++) {
        docs.add(new ParseContext.Document());
    }
    Analyzer analyzer = new WhitespaceAnalyzer();
    ParsedDocument parsedDocument = new ParsedDocument(null, null, "_id", "_type", null, docs, null, null, null);
    IndexSearcher indexSearcher = PercolateQueryBuilder.createMultiDocumentSearcher(analyzer, parsedDocument);
    assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(numDocs));
    // ensure that any query get modified so that the nested docs are never included as hits:
    Query query = new MatchAllDocsQuery();
    BooleanQuery result = (BooleanQuery) indexSearcher.createNormalizedWeight(query, true).getQuery();
    assertThat(result.clauses().size(), equalTo(2));
    assertThat(result.clauses().get(0).getQuery(), sameInstance(query));
    assertThat(result.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST));
    assertThat(result.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.MUST_NOT));
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ArrayList(java.util.ArrayList) ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) ParseContext(org.elasticsearch.index.mapper.ParseContext)

Example 7 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project elasticsearch by elastic.

the class SimpleUkrainianAnalyzerTests method testAnalyzer.

private static void testAnalyzer(String source, String... expected_terms) throws IOException {
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisUkrainianPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("ukrainian").analyzer();
    TokenStream ts = analyzer.tokenStream("test", source);
    CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    for (String expected : expected_terms) {
        assertThat(ts.incrementToken(), equalTo(true));
        assertThat(term1.toString(), equalTo(expected));
    }
    assertThat(ts.incrementToken(), equalTo(false));
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) Index(org.elasticsearch.index.Index) Analyzer(org.apache.lucene.analysis.Analyzer) AnalysisUkrainianPlugin(org.elasticsearch.plugin.analysis.ukrainian.AnalysisUkrainianPlugin)

Example 8 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project elasticsearch by elastic.

the class PolishAnalysisTests method testDefaultsPolishAnalysis.

public void testDefaultsPolishAnalysis() throws IOException {
    final TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisStempelPlugin());
    TokenFilterFactory tokenizerFactory = analysis.tokenFilter.get("polish_stem");
    MatcherAssert.assertThat(tokenizerFactory, instanceOf(PolishStemTokenFilterFactory.class));
    Analyzer analyzer = analysis.indexAnalyzers.get("polish").analyzer();
    MatcherAssert.assertThat(analyzer, instanceOf(PolishAnalyzer.class));
}
Also used : PolishAnalyzer(org.apache.lucene.analysis.pl.PolishAnalyzer) Index(org.elasticsearch.index.Index) PolishStemTokenFilterFactory(org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory) AnalysisStempelPlugin(org.elasticsearch.plugin.analysis.stempel.AnalysisStempelPlugin) PolishAnalyzer(org.apache.lucene.analysis.pl.PolishAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) PolishStemTokenFilterFactory(org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory)

Example 9 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project che by eclipse.

the class LuceneSearcher method makeAnalyzer.

protected Analyzer makeAnalyzer() {
    return new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new WhitespaceTokenizer();
            TokenStream filter = new LowerCaseFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, filter);
        }
    };
}
Also used : WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) LowerCaseFilter(org.apache.lucene.analysis.core.LowerCaseFilter)

Example 10 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project elasticsearch by elastic.

the class CommonGramsTokenFilterFactoryTests method testQueryModeCommonGramsAnalysis.

public void testQueryModeCommonGramsAnalysis() throws IOException {
    String json = "/org/elasticsearch/index/analysis/commongrams/commongrams_query_mode.json";
    Settings settings = Settings.builder().loadFromStream(json, getClass().getResourceAsStream(json)).put(Environment.PATH_HOME_SETTING.getKey(), createHome()).build();
    {
        IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings).indexAnalyzers;
        Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
        String source = "the quick brown is a fox or not";
        String[] expected = new String[] { "the", "quick_brown", "brown_is", "is", "a_fox", "fox_or", "or", "not" };
        assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
    }
    {
        IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings).indexAnalyzers;
        Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
        String source = "the quick brown is a fox or not";
        String[] expected = new String[] { "the", "quick_brown", "brown_is", "is", "a_fox", "fox_or", "or", "not" };
        assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
    }
}
Also used : IndexAnalyzers(org.elasticsearch.index.analysis.IndexAnalyzers) Analyzer(org.apache.lucene.analysis.Analyzer) Settings(org.elasticsearch.common.settings.Settings)

Aggregations

Analyzer (org.apache.lucene.analysis.Analyzer)1020 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)396 Tokenizer (org.apache.lucene.analysis.Tokenizer)265 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)228 Document (org.apache.lucene.document.Document)207 Directory (org.apache.lucene.store.Directory)192 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)176 BytesRef (org.apache.lucene.util.BytesRef)122 Test (org.junit.Test)119 TokenStream (org.apache.lucene.analysis.TokenStream)107 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)92 Term (org.apache.lucene.index.Term)92 IndexReader (org.apache.lucene.index.IndexReader)67 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)65 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)64 Input (org.apache.lucene.search.suggest.Input)63 CharArraySet (org.apache.lucene.analysis.CharArraySet)58 ArrayList (java.util.ArrayList)57 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)57 TextField (org.apache.lucene.document.TextField)55