use of org.apache.lucene.analysis.Analyzer in project elasticsearch by elastic.
the class PercolateQueryBuilderTests method testCreateMultiDocumentSearcher.
public void testCreateMultiDocumentSearcher() throws Exception {
int numDocs = randomIntBetween(2, 8);
List<ParseContext.Document> docs = new ArrayList<>(numDocs);
for (int i = 0; i < numDocs; i++) {
docs.add(new ParseContext.Document());
}
Analyzer analyzer = new WhitespaceAnalyzer();
ParsedDocument parsedDocument = new ParsedDocument(null, null, "_id", "_type", null, docs, null, null, null);
IndexSearcher indexSearcher = PercolateQueryBuilder.createMultiDocumentSearcher(analyzer, parsedDocument);
assertThat(indexSearcher.getIndexReader().numDocs(), equalTo(numDocs));
// ensure that any query get modified so that the nested docs are never included as hits:
Query query = new MatchAllDocsQuery();
BooleanQuery result = (BooleanQuery) indexSearcher.createNormalizedWeight(query, true).getQuery();
assertThat(result.clauses().size(), equalTo(2));
assertThat(result.clauses().get(0).getQuery(), sameInstance(query));
assertThat(result.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST));
assertThat(result.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.MUST_NOT));
}
use of org.apache.lucene.analysis.Analyzer in project elasticsearch by elastic.
the class SimpleUkrainianAnalyzerTests method testAnalyzer.
private static void testAnalyzer(String source, String... expected_terms) throws IOException {
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisUkrainianPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("ukrainian").analyzer();
TokenStream ts = analyzer.tokenStream("test", source);
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
ts.reset();
for (String expected : expected_terms) {
assertThat(ts.incrementToken(), equalTo(true));
assertThat(term1.toString(), equalTo(expected));
}
assertThat(ts.incrementToken(), equalTo(false));
}
use of org.apache.lucene.analysis.Analyzer in project elasticsearch by elastic.
the class PolishAnalysisTests method testDefaultsPolishAnalysis.
public void testDefaultsPolishAnalysis() throws IOException {
final TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisStempelPlugin());
TokenFilterFactory tokenizerFactory = analysis.tokenFilter.get("polish_stem");
MatcherAssert.assertThat(tokenizerFactory, instanceOf(PolishStemTokenFilterFactory.class));
Analyzer analyzer = analysis.indexAnalyzers.get("polish").analyzer();
MatcherAssert.assertThat(analyzer, instanceOf(PolishAnalyzer.class));
}
use of org.apache.lucene.analysis.Analyzer in project che by eclipse.
the class LuceneSearcher method makeAnalyzer.
protected Analyzer makeAnalyzer() {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new WhitespaceTokenizer();
TokenStream filter = new LowerCaseFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
};
}
use of org.apache.lucene.analysis.Analyzer in project elasticsearch by elastic.
the class CommonGramsTokenFilterFactoryTests method testQueryModeCommonGramsAnalysis.
public void testQueryModeCommonGramsAnalysis() throws IOException {
String json = "/org/elasticsearch/index/analysis/commongrams/commongrams_query_mode.json";
Settings settings = Settings.builder().loadFromStream(json, getClass().getResourceAsStream(json)).put(Environment.PATH_HOME_SETTING.getKey(), createHome()).build();
{
IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings).indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick_brown", "brown_is", "is", "a_fox", "fox_or", "or", "not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
{
IndexAnalyzers indexAnalyzers = AnalysisTestsHelper.createTestAnalysisFromSettings(settings).indexAnalyzers;
Analyzer analyzer = indexAnalyzers.get("commongramsAnalyzer_file").analyzer();
String source = "the quick brown is a fox or not";
String[] expected = new String[] { "the", "quick_brown", "brown_is", "is", "a_fox", "fox_or", "or", "not" };
assertTokenStreamContents(analyzer.tokenStream("test", source), expected);
}
}
Aggregations