use of org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder in project elasticsearch by elastic.
the class SuggestSearchIT method testPrefixLength.
public void testPrefixLength() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).put("index.analysis.analyzer.reverse.tokenizer", "standard").putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse").put("index.analysis.analyzer.body.tokenizer", "standard").putArray("index.analysis.analyzer.body.filter", "lowercase").put("index.analysis.analyzer.bigram.tokenizer", "standard").putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase").put("index.analysis.filter.my_shingle.type", "shingle").put("index.analysis.filter.my_shingle.output_unigrams", false).put("index.analysis.filter.my_shingle.min_shingle_size", 2).put("index.analysis.filter.my_shingle.max_shingle_size", 2));
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("body").field("type", "text").field("analyzer", "body").endObject().startObject("body_reverse").field("type", "text").field("analyzer", "reverse").endObject().startObject("bigram").field("type", "text").field("analyzer", "bigram").endObject().endObject().endObject().endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
index("test", "type1", "1", "body", "hello world");
index("test", "type1", "2", "body", "hello world");
index("test", "type1", "3", "body", "hello words");
refresh();
Suggest searchSuggest = searchSuggest("hello word", "simple_phrase", phraseSuggestion("body").addCandidateGenerator(candidateGenerator("body").prefixLength(4).minWordLength(1).suggestMode("always")).size(1).confidence(1.0f));
assertSuggestion(searchSuggest, 0, "simple_phrase", "hello words");
searchSuggest = searchSuggest("hello word", "simple_phrase", phraseSuggestion("body").addCandidateGenerator(candidateGenerator("body").prefixLength(2).minWordLength(1).suggestMode("always")).size(1).confidence(1.0f));
assertSuggestion(searchSuggest, 0, "simple_phrase", "hello world");
}
use of org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder in project elasticsearch by elastic.
the class SuggestSearchIT method testPhraseBoundaryCases.
public void testPhraseBoundaryCases() throws IOException, URISyntaxException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, // to get reliable statistics we should put this all into one shard
1).put("index.analysis.analyzer.body.tokenizer", "standard").putArray("index.analysis.analyzer.body.filter", "lowercase").put("index.analysis.analyzer.bigram.tokenizer", "standard").putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase").put("index.analysis.analyzer.ngram.tokenizer", "standard").putArray("index.analysis.analyzer.ngram.filter", "my_shingle2", "lowercase").put("index.analysis.analyzer.myDefAnalyzer.tokenizer", "standard").putArray("index.analysis.analyzer.myDefAnalyzer.filter", "shingle", "lowercase").put("index.analysis.filter.my_shingle.type", "shingle").put("index.analysis.filter.my_shingle.output_unigrams", false).put("index.analysis.filter.my_shingle.min_shingle_size", 2).put("index.analysis.filter.my_shingle.max_shingle_size", 2).put("index.analysis.filter.my_shingle2.type", "shingle").put("index.analysis.filter.my_shingle2.output_unigrams", true).put("index.analysis.filter.my_shingle2.min_shingle_size", 2).put("index.analysis.filter.my_shingle2.max_shingle_size", 2));
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("body").field("type", "text").field("analyzer", "body").endObject().startObject("bigram").field("type", "text").field("analyzer", "bigram").endObject().startObject("ngram").field("type", "text").field("analyzer", "ngram").endObject().endObject().endObject().endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
String[] strings = new String[] { "Xorr the God-Jewel", "Grog the God-Crusher", "Xorn", "Walter Newell", "Wanda Maximoff", "Captain America", "American Ace", "Wundarr the Aquarian", "Will o' the Wisp", "Xemnu the Titan" };
for (String line : strings) {
index("test", "type1", line, "body", line, "bigram", line, "ngram", line);
}
refresh();
NumShards numShards = getNumShards("test");
// Lets make sure some things throw exceptions
PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("bigram").analyzer("body").addCandidateGenerator(candidateGenerator("does_not_exist").minWordLength(1).suggestMode("always")).realWordErrorLikelihood(0.95f).maxErrors(0.5f).size(1);
phraseSuggestion.clearCandidateGenerators().analyzer(null);
try {
searchSuggest("xor the got-jewel", numShards.numPrimaries, Collections.singletonMap("simple_phrase", phraseSuggestion));
fail("analyzer does only produce ngrams");
} catch (SearchPhaseExecutionException e) {
}
phraseSuggestion.analyzer("bigram");
try {
searchSuggest("xor the got-jewel", numShards.numPrimaries, Collections.singletonMap("simple_phrase", phraseSuggestion));
fail("analyzer does only produce ngrams");
} catch (SearchPhaseExecutionException e) {
}
// Now we'll make sure some things don't
phraseSuggestion.forceUnigrams(false);
searchSuggest("xor the got-jewel", 0, Collections.singletonMap("simple_phrase", phraseSuggestion));
// Field doesn't produce unigrams but the analyzer does
phraseSuggestion.forceUnigrams(true).analyzer("ngram");
searchSuggest("xor the got-jewel", 0, Collections.singletonMap("simple_phrase", phraseSuggestion));
phraseSuggestion = phraseSuggestion("ngram").analyzer("myDefAnalyzer").forceUnigrams(true).realWordErrorLikelihood(0.95f).maxErrors(0.5f).size(1).addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always"));
Suggest suggest = searchSuggest("xor the got-jewel", 0, Collections.singletonMap("simple_phrase", phraseSuggestion));
// "xorr the god jewel" and and "xorn the god jewel" have identical scores (we are only using unigrams to score), so we tie break by
// earlier term (xorn):
assertSuggestion(suggest, 0, "simple_phrase", "xorn the god jewel");
phraseSuggestion.analyzer(null);
suggest = searchSuggest("xor the got-jewel", 0, Collections.singletonMap("simple_phrase", phraseSuggestion));
// In this case xorr has a better score than xorn because we set the field back to the default (my_shingle2) analyzer, so the
// probability that the term is not in the dictionary but is NOT a misspelling is relatively high in this case compared to the
// others that have no n-gram with the other terms in the phrase :) you can set this realWorldErrorLikelyhood
assertSuggestion(suggest, 0, "simple_phrase", "xorr the god jewel");
}
use of org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder in project elasticsearch by elastic.
the class SuggestSearchIT method testSuggestModes.
// see #3037
public void testSuggestModes() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0).put("index.analysis.analyzer.biword.tokenizer", "standard").putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase").put("index.analysis.filter.shingler.type", "shingle").put("index.analysis.filter.shingler.min_shingle_size", 2).put("index.analysis.filter.shingler.max_shingle_size", 3));
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("name").field("type", "text").startObject("fields").startObject("shingled").field("type", "text").field("analyzer", "biword").field("search_analyzer", "standard").endObject().endObject().endObject().endObject().endObject().endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
index("test", "type1", "1", "name", "I like iced tea");
index("test", "type1", "2", "name", "I like tea.");
index("test", "type1", "3", "name", "I like ice cream.");
refresh();
DirectCandidateGeneratorBuilder generator = candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2);
PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("name.shingled").addCandidateGenerator(generator).gramSize(3);
Suggest searchSuggest = searchSuggest("ice tea", "did_you_mean", phraseSuggestion);
assertSuggestion(searchSuggest, 0, "did_you_mean", "iced tea");
generator.suggestMode(null);
searchSuggest = searchSuggest("ice tea", "did_you_mean", phraseSuggestion);
assertSuggestionSize(searchSuggest, 0, 0, "did_you_mean");
}
use of org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder in project elasticsearch by elastic.
the class SuggestSearchIT method testBasicPhraseSuggest.
public void testBasicPhraseSuggest() throws IOException, URISyntaxException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put("index.analysis.analyzer.reverse.tokenizer", "standard").putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse").put("index.analysis.analyzer.body.tokenizer", "standard").putArray("index.analysis.analyzer.body.filter", "lowercase").put("index.analysis.analyzer.bigram.tokenizer", "standard").putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase").put("index.analysis.filter.my_shingle.type", "shingle").put("index.analysis.filter.my_shingle.output_unigrams", false).put("index.analysis.filter.my_shingle.min_shingle_size", 2).put("index.analysis.filter.my_shingle.max_shingle_size", 2).put("index.number_of_shards", 1));
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("body").field("type", "text").field("analyzer", "body").endObject().startObject("body_reverse").field("type", "text").field("analyzer", "reverse").endObject().startObject("bigram").field("type", "text").field("analyzer", "bigram").endObject().endObject().endObject().endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
String[] strings = new String[] { "Arthur, King of the Britons", "Sir Lancelot the Brave", "Patsy, Arthur's Servant", "Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot", "Sir Bedevere the Wise", "Sir Galahad the Pure", "Miss Islington, the Witch", "Zoot", "Leader of Robin's Minstrels", "Old Crone", "Frank, the Historian", "Frank's Wife", "Dr. Piglet", "Dr. Winston", "Sir Robin (Stand-in)", "Knight Who Says Ni", "Police sergeant who stops the film" };
for (String line : strings) {
index("test", "type1", line, "body", line, "body_reverse", line, "bigram", line);
}
refresh();
PhraseSuggestionBuilder phraseSuggest = phraseSuggestion("bigram").gramSize(2).analyzer("body").addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always")).size(1);
Suggest searchSuggest = searchSuggest("Frank's Wise", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "frank's wife");
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Frank's Wise"));
phraseSuggest.realWordErrorLikelihood(0.95f);
searchSuggest = searchSuggest("Artur, Kinh of the Britons", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
// Check the "text" field this one time.
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Artur, Kinh of the Britons"));
// Ask for highlighting
phraseSuggest.highlight("<em>", "</em>");
searchSuggest = searchSuggest("Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getHighlighted().string(), equalTo("<em>arthur</em> king of the <em>britons</em>"));
// pass in a correct phrase
phraseSuggest.highlight(null, null).confidence(0f).size(1).maxErrors(0.5f);
searchSuggest = searchSuggest("Arthur, King of the Britons", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
// pass in a correct phrase - set confidence to 2
phraseSuggest.confidence(2f);
searchSuggest = searchSuggest("Arthur, King of the Britons", "simple_phrase", phraseSuggest);
assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
// pass in a correct phrase - set confidence to 0.99
phraseSuggest.confidence(0.99f);
searchSuggest = searchSuggest("Arthur, King of the Britons", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
//test reverse suggestions with pre & post filter
phraseSuggest.addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always")).addCandidateGenerator(candidateGenerator("body_reverse").minWordLength(1).suggestMode("always").preFilter("reverse").postFilter("reverse"));
searchSuggest = searchSuggest("Artur, Ging of the Britons", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
// set all mass to trigrams (not indexed)
phraseSuggest.clearCandidateGenerators().addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always")).smoothingModel(new LinearInterpolation(1, 0, 0));
searchSuggest = searchSuggest("Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
// set all mass to bigrams
phraseSuggest.smoothingModel(new LinearInterpolation(0, 1, 0));
searchSuggest = searchSuggest("Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
// distribute mass
phraseSuggest.smoothingModel(new LinearInterpolation(0.4, 0.4, 0.2));
searchSuggest = searchSuggest("Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
searchSuggest = searchSuggest("Frank's Wise", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "frank's wife");
// try all smoothing methods
phraseSuggest.smoothingModel(new LinearInterpolation(0.4, 0.4, 0.2));
searchSuggest = searchSuggest("Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
phraseSuggest.smoothingModel(new Laplace(0.2));
searchSuggest = searchSuggest("Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
phraseSuggest.smoothingModel(new StupidBackoff(0.1));
searchSuggest = searchSuggest("Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
// check tokenLimit
phraseSuggest.smoothingModel(null).tokenLimit(4);
searchSuggest = searchSuggest("Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
phraseSuggest.tokenLimit(15).smoothingModel(new StupidBackoff(0.1));
searchSuggest = searchSuggest("Sir Bedever the Wife Sir Bedever the Wife Sir Bedever the Wife", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "sir bedevere the wise sir bedevere the wise sir bedevere the wise");
// Check the name this time because we're repeating it which is funky
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Sir Bedever the Wife Sir Bedever the Wife Sir Bedever the Wife"));
}
use of org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder in project elasticsearch by elastic.
the class SuggestSearchIT method testSearchForRarePhrase.
/**
* Searching for a rare phrase shouldn't provide any suggestions if confidence > 1. This was possible before we rechecked the cutoff
* score during the reduce phase. Failures don't occur every time - maybe two out of five tries but we don't repeat it to save time.
*/
public void testSearchForRarePhrase() throws IOException {
// If there isn't enough chaf per shard then shards can become unbalanced, making the cutoff recheck this is testing do more harm
// then good.
int chafPerShard = 100;
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put("index.analysis.analyzer.body.tokenizer", "standard").putArray("index.analysis.analyzer.body.filter", "lowercase", "my_shingle").put("index.analysis.filter.my_shingle.type", "shingle").put("index.analysis.filter.my_shingle.output_unigrams", true).put("index.analysis.filter.my_shingle.min_shingle_size", 2).put("index.analysis.filter.my_shingle.max_shingle_size", 2));
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("body").field("type", "text").field("analyzer", "body").endObject().endObject().endObject().endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
NumShards test = getNumShards("test");
List<String> phrases = new ArrayList<>();
Collections.addAll(phrases, "nobel prize", "noble gases", "somethingelse prize", "pride and joy", "notes are fun");
for (int i = 0; i < 8; i++) {
phrases.add("noble somethingelse" + i);
}
for (int i = 0; i < test.numPrimaries * chafPerShard; i++) {
phrases.add("chaff" + i);
}
for (String phrase : phrases) {
index("test", "type1", phrase, "body", phrase);
}
refresh();
Suggest searchSuggest = searchSuggest("nobel prize", "simple_phrase", phraseSuggestion("body").addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always").maxTermFreq(.99f)).confidence(2f).maxErrors(5f).size(1));
assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
searchSuggest = searchSuggest("noble prize", "simple_phrase", phraseSuggestion("body").addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always").maxTermFreq(.99f)).confidence(2f).maxErrors(5f).size(1));
assertSuggestion(searchSuggest, 0, 0, "simple_phrase", "nobel prize");
}
Aggregations