use of org.opensearch.index.analysis.CharFilterFactory in project OpenSearch by opensearch-project.
the class ScriptedConditionTokenFilterFactory method getChainAwareTokenFilterFactory.
@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters, List<TokenFilterFactory> previousTokenFilters, Function<String, TokenFilterFactory> allFilters) {
List<TokenFilterFactory> filters = new ArrayList<>();
List<TokenFilterFactory> existingChain = new ArrayList<>(previousTokenFilters);
for (String filter : filterNames) {
TokenFilterFactory tff = allFilters.apply(filter);
if (tff == null) {
throw new IllegalArgumentException("ScriptedConditionTokenFilter [" + name() + "] refers to undefined token filter [" + filter + "]");
}
tff = tff.getChainAwareTokenFilterFactory(tokenizer, charFilters, existingChain, allFilters);
filters.add(tff);
existingChain.add(tff);
}
return new TokenFilterFactory() {
@Override
public String name() {
return ScriptedConditionTokenFilterFactory.this.name();
}
@Override
public TokenStream create(TokenStream tokenStream) {
Function<TokenStream, TokenStream> filter = in -> {
for (TokenFilterFactory tff : filters) {
in = tff.create(in);
}
return in;
};
return new ScriptedConditionTokenFilter(tokenStream, filter, factory.newInstance());
}
};
}
use of org.opensearch.index.analysis.CharFilterFactory in project OpenSearch by opensearch-project.
the class AnnotatedTextFieldMapperTests method createIndexAnalyzers.
@Override
protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) {
NamedAnalyzer dflt = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer(), TextFieldMapper.Defaults.POSITION_INCREMENT_GAP);
NamedAnalyzer standard = new NamedAnalyzer("standard", AnalyzerScope.INDEX, new StandardAnalyzer());
NamedAnalyzer keyword = new NamedAnalyzer("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer());
NamedAnalyzer whitespace = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
NamedAnalyzer stop = new NamedAnalyzer("my_stop_analyzer", AnalyzerScope.INDEX, new CustomAnalyzer(new StandardTokenizerFactory(indexSettings, null, "standard", indexSettings.getSettings()), new CharFilterFactory[0], new TokenFilterFactory[] { new TokenFilterFactory() {
@Override
public String name() {
return "stop";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new StopFilter(tokenStream, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
}
} }));
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
analyzers.put("default", dflt);
analyzers.put("standard", standard);
analyzers.put("keyword", keyword);
analyzers.put("whitespace", whitespace);
analyzers.put("my_stop_analyzer", stop);
return new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap());
}
use of org.opensearch.index.analysis.CharFilterFactory in project OpenSearch by opensearch-project.
the class TransportAnalyzeAction method detailAnalyze.
private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.Request request, Analyzer analyzer, int maxTokenCount) {
AnalyzeAction.DetailAnalyzeResponse detailResponse;
final Set<String> includeAttributes = new HashSet<>();
if (request.attributes() != null) {
for (String attribute : request.attributes()) {
includeAttributes.add(attribute.toLowerCase(Locale.ROOT));
}
}
// maybe unwrap analyzer from NamedAnalyzer
Analyzer potentialCustomAnalyzer = analyzer;
if (analyzer instanceof NamedAnalyzer) {
potentialCustomAnalyzer = ((NamedAnalyzer) analyzer).analyzer();
}
if (potentialCustomAnalyzer instanceof AnalyzerComponentsProvider) {
AnalyzerComponentsProvider customAnalyzer = (AnalyzerComponentsProvider) potentialCustomAnalyzer;
// note: this is not field-name dependent in our cases so we can leave out the argument
int positionIncrementGap = potentialCustomAnalyzer.getPositionIncrementGap("");
int offsetGap = potentialCustomAnalyzer.getOffsetGap("");
AnalyzerComponents components = customAnalyzer.getComponents();
// divide charfilter, tokenizer tokenfilters
CharFilterFactory[] charFilterFactories = components.getCharFilters();
TokenizerFactory tokenizerFactory = components.getTokenizerFactory();
TokenFilterFactory[] tokenFilterFactories = components.getTokenFilters();
String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ? tokenFilterFactories.length : 0];
TokenListCreator tokenizerTokenListCreator = new TokenListCreator(maxTokenCount);
for (int textIndex = 0; textIndex < request.text().length; textIndex++) {
String charFilteredSource = request.text()[textIndex];
Reader reader = new StringReader(charFilteredSource);
if (charFilterFactories != null) {
for (int charFilterIndex = 0; charFilterIndex < charFilterFactories.length; charFilterIndex++) {
reader = charFilterFactories[charFilterIndex].create(reader);
Reader readerForWriteOut = new StringReader(charFilteredSource);
readerForWriteOut = charFilterFactories[charFilterIndex].create(readerForWriteOut);
charFilteredSource = writeCharStream(readerForWriteOut);
charFiltersTexts[charFilterIndex][textIndex] = charFilteredSource;
}
}
// analyzing only tokenizer
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(reader);
tokenizerTokenListCreator.analyze(tokenizer, includeAttributes, positionIncrementGap, offsetGap);
// analyzing each tokenfilter
if (tokenFilterFactories != null) {
for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFilterFactories.length; tokenFilterIndex++) {
if (tokenFiltersTokenListCreator[tokenFilterIndex] == null) {
tokenFiltersTokenListCreator[tokenFilterIndex] = new TokenListCreator(maxTokenCount);
}
TokenStream stream = createStackedTokenStream(request.text()[textIndex], charFilterFactories, tokenizerFactory, tokenFilterFactories, tokenFilterIndex + 1);
tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, includeAttributes, positionIncrementGap, offsetGap);
}
}
}
AnalyzeAction.CharFilteredText[] charFilteredLists = new AnalyzeAction.CharFilteredText[charFiltersTexts.length];
if (charFilterFactories != null) {
for (int charFilterIndex = 0; charFilterIndex < charFiltersTexts.length; charFilterIndex++) {
charFilteredLists[charFilterIndex] = new AnalyzeAction.CharFilteredText(charFilterFactories[charFilterIndex].name(), charFiltersTexts[charFilterIndex]);
}
}
AnalyzeAction.AnalyzeTokenList[] tokenFilterLists = new AnalyzeAction.AnalyzeTokenList[tokenFiltersTokenListCreator.length];
if (tokenFilterFactories != null) {
for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFiltersTokenListCreator.length; tokenFilterIndex++) {
tokenFilterLists[tokenFilterIndex] = new AnalyzeAction.AnalyzeTokenList(tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
}
}
detailResponse = new AnalyzeAction.DetailAnalyzeResponse(charFilteredLists, new AnalyzeAction.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
} else {
String name;
if (analyzer instanceof NamedAnalyzer) {
name = ((NamedAnalyzer) analyzer).name();
} else {
name = analyzer.getClass().getName();
}
TokenListCreator tokenListCreator = new TokenListCreator(maxTokenCount);
for (String text : request.text()) {
tokenListCreator.analyze(analyzer.tokenStream("", text), includeAttributes, analyzer.getPositionIncrementGap(""), analyzer.getOffsetGap(""));
}
detailResponse = new AnalyzeAction.DetailAnalyzeResponse(new AnalyzeAction.AnalyzeTokenList(name, tokenListCreator.getArrayTokens()));
}
return detailResponse;
}
use of org.opensearch.index.analysis.CharFilterFactory in project OpenSearch by opensearch-project.
the class TransportAnalyzeAction method createStackedTokenStream.
private static TokenStream createStackedTokenStream(String source, CharFilterFactory[] charFilterFactories, TokenizerFactory tokenizerFactory, TokenFilterFactory[] tokenFilterFactories, int current) {
Reader reader = new StringReader(source);
for (CharFilterFactory charFilterFactory : charFilterFactories) {
reader = charFilterFactory.create(reader);
}
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(reader);
TokenStream tokenStream = tokenizer;
for (int i = 0; i < current; i++) {
tokenStream = tokenFilterFactories[i].create(tokenStream);
}
return tokenStream;
}
use of org.opensearch.index.analysis.CharFilterFactory in project OpenSearch by opensearch-project.
the class MapperService method reloadSearchAnalyzers.
public synchronized List<String> reloadSearchAnalyzers(AnalysisRegistry registry) throws IOException {
logger.info("reloading search analyzers");
// refresh indexAnalyzers and search analyzers
final Map<String, TokenizerFactory> tokenizerFactories = registry.buildTokenizerFactories(indexSettings);
final Map<String, CharFilterFactory> charFilterFactories = registry.buildCharFilterFactories(indexSettings);
final Map<String, TokenFilterFactory> tokenFilterFactories = registry.buildTokenFilterFactories(indexSettings);
final Map<String, Settings> settings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
final List<String> reloadedAnalyzers = new ArrayList<>();
for (NamedAnalyzer namedAnalyzer : indexAnalyzers.getAnalyzers().values()) {
if (namedAnalyzer.analyzer() instanceof ReloadableCustomAnalyzer) {
ReloadableCustomAnalyzer analyzer = (ReloadableCustomAnalyzer) namedAnalyzer.analyzer();
String analyzerName = namedAnalyzer.name();
Settings analyzerSettings = settings.get(analyzerName);
analyzer.reload(analyzerName, analyzerSettings, tokenizerFactories, charFilterFactories, tokenFilterFactories);
reloadedAnalyzers.add(analyzerName);
}
}
return reloadedAnalyzers;
}
Aggregations