use of org.elasticsearch.index.analysis.TokenFilterFactory in project elasticsearch by elastic.
the class CommonGramsTokenFilterFactoryTests method testWithoutCommonWordsMatch.
public void testWithoutCommonWordsMatch() throws IOException {
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_default.type", "common_grams").putArray("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein").put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
{
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
String source = "the quick brown is a fox Or noT";
String[] expected = new String[] { "the", "quick", "brown", "is", "a", "fox", "Or", "noT" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
}
{
Settings settings = Settings.builder().put("index.analysis.filter.common_grams_default.type", "common_grams").put("index.analysis.filter.common_grams_default.query_mode", false).put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).putArray("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein").build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
{
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("common_grams_default");
String source = "the quick brown is a fox Or noT";
String[] expected = new String[] { "the", "quick", "brown", "is", "a", "fox", "Or", "noT" };
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
}
}
}
use of org.elasticsearch.index.analysis.TokenFilterFactory in project elasticsearch by elastic.
the class FragmentBuilderHelper method containsBrokenAnalysis.
private static boolean containsBrokenAnalysis(Analyzer analyzer) {
// TODO maybe we need a getter on Namedanalyzer that tells if this uses broken Analysis
if (analyzer instanceof NamedAnalyzer) {
analyzer = ((NamedAnalyzer) analyzer).analyzer();
}
if (analyzer instanceof CustomAnalyzer) {
final CustomAnalyzer a = (CustomAnalyzer) analyzer;
TokenFilterFactory[] tokenFilters = a.tokenFilters();
for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
if (tokenFilterFactory instanceof WordDelimiterTokenFilterFactory || tokenFilterFactory instanceof EdgeNGramTokenFilterFactory) {
return true;
}
}
}
return false;
}
use of org.elasticsearch.index.analysis.TokenFilterFactory in project elasticsearch by elastic.
the class PhraseSuggestionBuilder method getShingleFilterFactory.
private static ShingleTokenFilterFactory.Factory getShingleFilterFactory(Analyzer analyzer) {
if (analyzer instanceof NamedAnalyzer) {
analyzer = ((NamedAnalyzer) analyzer).analyzer();
}
if (analyzer instanceof CustomAnalyzer) {
final CustomAnalyzer a = (CustomAnalyzer) analyzer;
final TokenFilterFactory[] tokenFilters = a.tokenFilters();
for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
if (tokenFilterFactory instanceof ShingleTokenFilterFactory) {
return ((ShingleTokenFilterFactory) tokenFilterFactory).getInnerFactory();
} else if (tokenFilterFactory instanceof ShingleTokenFilterFactory.Factory) {
return (ShingleTokenFilterFactory.Factory) tokenFilterFactory;
}
}
}
return null;
}
use of org.elasticsearch.index.analysis.TokenFilterFactory in project elasticsearch by elastic.
the class TransportAnalyzeAction method getTokenFilterFactories.
private static TokenFilterFactory[] getTokenFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry, Environment environment, TokenFilterFactory[] tokenFilterFactories) throws IOException {
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().size()];
for (int i = 0; i < request.tokenFilters().size(); i++) {
final AnalyzeRequest.NameOrDefinition tokenFilter = request.tokenFilters().get(i);
// parse anonymous settings
if (tokenFilter.definition != null) {
Settings settings = getAnonymousSettings(tokenFilter.definition);
String filterTypeName = settings.get("type");
if (filterTypeName == null) {
throw new IllegalArgumentException("Missing [type] setting for anonymous token filter: " + tokenFilter.definition);
}
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(filterTypeName);
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global token filter under [" + filterTypeName + "]");
}
// Need to set anonymous "name" of tokenfilter
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings);
} else {
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory;
if (indexSettings == null) {
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]");
}
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name);
} else {
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name, indexSettings);
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]");
}
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(indexSettings, environment, tokenFilter.name, AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_FILTER + "." + tokenFilter.name));
}
}
if (tokenFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]");
}
}
}
return tokenFilterFactories;
}
use of org.elasticsearch.index.analysis.TokenFilterFactory in project elasticsearch by elastic.
the class TransportAnalyzeAction method detailAnalyze.
private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
DetailAnalyzeResponse detailResponse;
final Set<String> includeAttributes = new HashSet<>();
if (request.attributes() != null) {
for (String attribute : request.attributes()) {
includeAttributes.add(attribute.toLowerCase(Locale.ROOT));
}
}
CustomAnalyzer customAnalyzer = null;
if (analyzer instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) analyzer;
} else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
}
if (customAnalyzer != null) {
// customAnalyzer = divide charfilter, tokenizer tokenfilters
CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ? tokenFilterFactories.length : 0];
TokenListCreator tokenizerTokenListCreator = new TokenListCreator();
for (int textIndex = 0; textIndex < request.text().length; textIndex++) {
String charFilteredSource = request.text()[textIndex];
Reader reader = new FastStringReader(charFilteredSource);
if (charFilterFactories != null) {
for (int charFilterIndex = 0; charFilterIndex < charFilterFactories.length; charFilterIndex++) {
reader = charFilterFactories[charFilterIndex].create(reader);
Reader readerForWriteOut = new FastStringReader(charFilteredSource);
readerForWriteOut = charFilterFactories[charFilterIndex].create(readerForWriteOut);
charFilteredSource = writeCharStream(readerForWriteOut);
charFiltersTexts[charFilterIndex][textIndex] = charFilteredSource;
}
}
// analyzing only tokenizer
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(reader);
tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, field, includeAttributes);
// analyzing each tokenfilter
if (tokenFilterFactories != null) {
for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFilterFactories.length; tokenFilterIndex++) {
if (tokenFiltersTokenListCreator[tokenFilterIndex] == null) {
tokenFiltersTokenListCreator[tokenFilterIndex] = new TokenListCreator();
}
TokenStream stream = createStackedTokenStream(request.text()[textIndex], charFilterFactories, tokenizerFactory, tokenFilterFactories, tokenFilterIndex + 1);
tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, customAnalyzer, field, includeAttributes);
}
}
}
DetailAnalyzeResponse.CharFilteredText[] charFilteredLists = new DetailAnalyzeResponse.CharFilteredText[charFiltersTexts.length];
if (charFilterFactories != null) {
for (int charFilterIndex = 0; charFilterIndex < charFiltersTexts.length; charFilterIndex++) {
charFilteredLists[charFilterIndex] = new DetailAnalyzeResponse.CharFilteredText(charFilterFactories[charFilterIndex].name(), charFiltersTexts[charFilterIndex]);
}
}
DetailAnalyzeResponse.AnalyzeTokenList[] tokenFilterLists = new DetailAnalyzeResponse.AnalyzeTokenList[tokenFiltersTokenListCreator.length];
if (tokenFilterFactories != null) {
for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFiltersTokenListCreator.length; tokenFilterIndex++) {
tokenFilterLists[tokenFilterIndex] = new DetailAnalyzeResponse.AnalyzeTokenList(tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
}
}
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
} else {
String name;
if (analyzer instanceof NamedAnalyzer) {
name = ((NamedAnalyzer) analyzer).name();
} else {
name = analyzer.getClass().getName();
}
TokenListCreator tokenListCreator = new TokenListCreator();
for (String text : request.text()) {
tokenListCreator.analyze(analyzer.tokenStream(field, text), analyzer, field, includeAttributes);
}
detailResponse = new DetailAnalyzeResponse(new DetailAnalyzeResponse.AnalyzeTokenList(name, tokenListCreator.getArrayTokens()));
}
return detailResponse;
}
Aggregations