use of org.elasticsearch.index.IndexSettings in project elasticsearch by elastic.
the class CodecTests method createCodecService.
private CodecService createCodecService() throws IOException {
Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build();
IndexSettings settings = IndexSettingsModule.newIndexSettings("_na", nodeSettings);
SimilarityService similarityService = new SimilarityService(settings, Collections.emptyMap());
IndexAnalyzers indexAnalyzers = createTestAnalysis(settings, nodeSettings).indexAnalyzers;
MapperRegistry mapperRegistry = new MapperRegistry(Collections.emptyMap(), Collections.emptyMap());
MapperService service = new MapperService(settings, indexAnalyzers, xContentRegistry(), similarityService, mapperRegistry, () -> null);
return new CodecService(service, ESLoggerFactory.getLogger("test"));
}
use of org.elasticsearch.index.IndexSettings in project elasticsearch by elastic.
the class NGramTokenizerFactoryTests method testBackwardsCompatibilityEdgeNgramTokenFilter.
public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws Exception {
int iters = scaledRandomIntBetween(20, 100);
for (int i = 0; i < iters; i++) {
final Index index = new Index("test", "_na_");
final String name = "ngr";
Version v = randomVersion(random());
Builder builder = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3);
boolean reverse = random().nextBoolean();
if (reverse) {
builder.put("side", "back");
}
Settings settings = builder.build();
Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer);
if (reverse) {
assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
} else {
assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class));
}
}
}
use of org.elasticsearch.index.IndexSettings in project elasticsearch by elastic.
the class NGramTokenizerFactoryTests method testNoTokenChars.
public void testNoTokenChars() throws IOException {
final Index index = new Index("test", "_na_");
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4).putArray("token_chars", new String[0]).build();
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
tokenizer.setReader(new StringReader("1.34"));
assertTokenStreamContents(tokenizer, new String[] { "1.", "1.3", "1.34", ".3", ".34", "34" });
}
use of org.elasticsearch.index.IndexSettings in project elasticsearch by elastic.
the class NGramTokenizerFactoryTests method testPreTokenizationEdge.
public void testPreTokenizationEdge() throws IOException {
// Make sure that pretokenization works well and that it can be used even with token chars which are supplementary characters
final Index index = new Index("test", "_na_");
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
Tokenizer tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
tokenizer.setReader(new StringReader("Åbc déf g𐐀f "));
assertTokenStreamContents(tokenizer, new String[] { "Åb", "Åbc", "dé", "déf", "g𐐀", "g𐐀f" });
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
tokenizer.setReader(new StringReader(" a!$ 9"));
assertTokenStreamContents(tokenizer, new String[] { " a", " a!" });
}
use of org.elasticsearch.index.IndexSettings in project elasticsearch by elastic.
the class NGramTokenizerFactoryTests method testPreTokenization.
public void testPreTokenization() throws IOException {
// Make sure that pretokenization works well and that it can be used even with token chars which are supplementary characters
final Index index = new Index("test", "_na_");
final String name = "ngr";
final Settings indexSettings = newAnalysisSettingsBuilder().build();
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
tokenizer.setReader(new StringReader("Åbc déf g𐐀f "));
assertTokenStreamContents(tokenizer, new String[] { "Åb", "Åbc", "bc", "dé", "déf", "éf", "g𐐀", "g𐐀f", "𐐀f" });
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
tokenizer.setReader(new StringReader(" a!$ 9"));
assertTokenStreamContents(tokenizer, new String[] { " a", " a!", "a!", "a!$", "!$", "!$ ", "$ ", "$ 9", " 9" });
}
Aggregations