use of org.elasticsearch.index.analysis.CustomAnalyzer in project elasticsearch by elastic.
the class FragmentBuilderHelper method containsBrokenAnalysis.
private static boolean containsBrokenAnalysis(Analyzer analyzer) {
// TODO maybe we need a getter on Namedanalyzer that tells if this uses broken Analysis
if (analyzer instanceof NamedAnalyzer) {
analyzer = ((NamedAnalyzer) analyzer).analyzer();
}
if (analyzer instanceof CustomAnalyzer) {
final CustomAnalyzer a = (CustomAnalyzer) analyzer;
TokenFilterFactory[] tokenFilters = a.tokenFilters();
for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
if (tokenFilterFactory instanceof WordDelimiterTokenFilterFactory || tokenFilterFactory instanceof EdgeNGramTokenFilterFactory) {
return true;
}
}
}
return false;
}
use of org.elasticsearch.index.analysis.CustomAnalyzer in project elasticsearch by elastic.
the class PhraseSuggestionBuilder method getShingleFilterFactory.
private static ShingleTokenFilterFactory.Factory getShingleFilterFactory(Analyzer analyzer) {
if (analyzer instanceof NamedAnalyzer) {
analyzer = ((NamedAnalyzer) analyzer).analyzer();
}
if (analyzer instanceof CustomAnalyzer) {
final CustomAnalyzer a = (CustomAnalyzer) analyzer;
final TokenFilterFactory[] tokenFilters = a.tokenFilters();
for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
if (tokenFilterFactory instanceof ShingleTokenFilterFactory) {
return ((ShingleTokenFilterFactory) tokenFilterFactory).getInnerFactory();
} else if (tokenFilterFactory instanceof ShingleTokenFilterFactory.Factory) {
return (ShingleTokenFilterFactory.Factory) tokenFilterFactory;
}
}
}
return null;
}
use of org.elasticsearch.index.analysis.CustomAnalyzer in project elasticsearch by elastic.
the class TransportAnalyzeAction method detailAnalyze.
private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
DetailAnalyzeResponse detailResponse;
final Set<String> includeAttributes = new HashSet<>();
if (request.attributes() != null) {
for (String attribute : request.attributes()) {
includeAttributes.add(attribute.toLowerCase(Locale.ROOT));
}
}
CustomAnalyzer customAnalyzer = null;
if (analyzer instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) analyzer;
} else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
}
if (customAnalyzer != null) {
// customAnalyzer = divide charfilter, tokenizer tokenfilters
CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ? tokenFilterFactories.length : 0];
TokenListCreator tokenizerTokenListCreator = new TokenListCreator();
for (int textIndex = 0; textIndex < request.text().length; textIndex++) {
String charFilteredSource = request.text()[textIndex];
Reader reader = new FastStringReader(charFilteredSource);
if (charFilterFactories != null) {
for (int charFilterIndex = 0; charFilterIndex < charFilterFactories.length; charFilterIndex++) {
reader = charFilterFactories[charFilterIndex].create(reader);
Reader readerForWriteOut = new FastStringReader(charFilteredSource);
readerForWriteOut = charFilterFactories[charFilterIndex].create(readerForWriteOut);
charFilteredSource = writeCharStream(readerForWriteOut);
charFiltersTexts[charFilterIndex][textIndex] = charFilteredSource;
}
}
// analyzing only tokenizer
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(reader);
tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, field, includeAttributes);
// analyzing each tokenfilter
if (tokenFilterFactories != null) {
for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFilterFactories.length; tokenFilterIndex++) {
if (tokenFiltersTokenListCreator[tokenFilterIndex] == null) {
tokenFiltersTokenListCreator[tokenFilterIndex] = new TokenListCreator();
}
TokenStream stream = createStackedTokenStream(request.text()[textIndex], charFilterFactories, tokenizerFactory, tokenFilterFactories, tokenFilterIndex + 1);
tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, customAnalyzer, field, includeAttributes);
}
}
}
DetailAnalyzeResponse.CharFilteredText[] charFilteredLists = new DetailAnalyzeResponse.CharFilteredText[charFiltersTexts.length];
if (charFilterFactories != null) {
for (int charFilterIndex = 0; charFilterIndex < charFiltersTexts.length; charFilterIndex++) {
charFilteredLists[charFilterIndex] = new DetailAnalyzeResponse.CharFilteredText(charFilterFactories[charFilterIndex].name(), charFiltersTexts[charFilterIndex]);
}
}
DetailAnalyzeResponse.AnalyzeTokenList[] tokenFilterLists = new DetailAnalyzeResponse.AnalyzeTokenList[tokenFiltersTokenListCreator.length];
if (tokenFilterFactories != null) {
for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFiltersTokenListCreator.length; tokenFilterIndex++) {
tokenFilterLists[tokenFilterIndex] = new DetailAnalyzeResponse.AnalyzeTokenList(tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
}
}
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
} else {
String name;
if (analyzer instanceof NamedAnalyzer) {
name = ((NamedAnalyzer) analyzer).name();
} else {
name = analyzer.getClass().getName();
}
TokenListCreator tokenListCreator = new TokenListCreator();
for (String text : request.text()) {
tokenListCreator.analyze(analyzer.tokenStream(field, text), analyzer, field, includeAttributes);
}
detailResponse = new DetailAnalyzeResponse(new DetailAnalyzeResponse.AnalyzeTokenList(name, tokenListCreator.getArrayTokens()));
}
return detailResponse;
}
use of org.elasticsearch.index.analysis.CustomAnalyzer in project elasticsearch by elastic.
the class AnalysisModuleTests method testSimpleConfiguration.
private void testSimpleConfiguration(Settings settings) throws IOException {
IndexAnalyzers indexAnalyzers = getIndexAnalyzers(settings);
Analyzer analyzer = indexAnalyzers.get("custom1").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom1 = (CustomAnalyzer) analyzer;
assertThat(custom1.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
assertThat(custom1.tokenFilters().length, equalTo(2));
StopTokenFilterFactory stop1 = (StopTokenFilterFactory) custom1.tokenFilters()[0];
assertThat(stop1.stopWords().size(), equalTo(1));
analyzer = indexAnalyzers.get("custom2").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
// verify position increment gap
analyzer = indexAnalyzers.get("custom6").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
assertThat(custom6.getPositionIncrementGap("any_string"), equalTo(256));
// verify characters mapping
analyzer = indexAnalyzers.get("custom5").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom5 = (CustomAnalyzer) analyzer;
assertThat(custom5.charFilters()[0], instanceOf(MappingCharFilterFactory.class));
// check custom pattern replace filter
analyzer = indexAnalyzers.get("custom3").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom3 = (CustomAnalyzer) analyzer;
PatternReplaceCharFilterFactory patternReplaceCharFilterFactory = (PatternReplaceCharFilterFactory) custom3.charFilters()[0];
assertThat(patternReplaceCharFilterFactory.getPattern().pattern(), equalTo("sample(.*)"));
assertThat(patternReplaceCharFilterFactory.getReplacement(), equalTo("replacedSample $1"));
// check custom class name (my)
analyzer = indexAnalyzers.get("custom4").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom4 = (CustomAnalyzer) analyzer;
assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class));
// // verify Czech stemmer
// analyzer = analysisService.analyzer("czechAnalyzerWithStemmer").analyzer();
// assertThat(analyzer, instanceOf(CustomAnalyzer.class));
// CustomAnalyzer czechstemmeranalyzer = (CustomAnalyzer) analyzer;
// assertThat(czechstemmeranalyzer.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
// assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4));
// assertThat(czechstemmeranalyzer.tokenFilters()[3], instanceOf(CzechStemTokenFilterFactory.class));
//
// // check dictionary decompounder
// analyzer = analysisService.analyzer("decompoundingAnalyzer").analyzer();
// assertThat(analyzer, instanceOf(CustomAnalyzer.class));
// CustomAnalyzer dictionaryDecompounderAnalyze = (CustomAnalyzer) analyzer;
// assertThat(dictionaryDecompounderAnalyze.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
// assertThat(dictionaryDecompounderAnalyze.tokenFilters().length, equalTo(1));
// assertThat(dictionaryDecompounderAnalyze.tokenFilters()[0], instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
Set<?> wordList = Analysis.getWordSet(null, Version.CURRENT, settings, "index.analysis.filter.dict_dec.word_list");
MatcherAssert.assertThat(wordList.size(), equalTo(6));
// MatcherAssert.assertThat(wordList, hasItems("donau", "dampf", "schiff", "spargel", "creme", "suppe"));
}
Aggregations