use of org.apache.lucene.tests.analysis.MockSynonymAnalyzer in project OpenSearch by opensearch-project.
the class TextFieldMapperTests method testFastPhraseMapping.
public void testFastPhraseMapping() throws IOException {
MapperService mapperService = createMapperService(mapping(b -> {
b.startObject("field").field("type", "text").field("analyzer", "my_stop_analyzer").field("index_phrases", true).endObject();
// "standard" will be replaced with MockSynonymAnalyzer
b.startObject("synfield").field("type", "text").field("analyzer", "standard").field("index_phrases", true).endObject();
}));
QueryShardContext queryShardContext = createQueryShardContext(mapperService);
Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext);
assertThat(q, is(new PhraseQuery("field._index_phrase", "two words")));
Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext);
assertThat(q2, is(new PhraseQuery("field._index_phrase", "three words", "words here")));
Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext);
assertThat(q3, is(new PhraseQuery(1, "field", "two", "words")));
Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext);
assertThat(q4, is(new TermQuery(new Term("field", "singleton"))));
Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext);
assertThat(q5, is(new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build()));
MatchQuery matchQuery = new MatchQuery(queryShardContext);
matchQuery.setAnalyzer(new MockSynonymAnalyzer());
Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE, "synfield", "motor dogs");
assertThat(q6, is(new MultiPhraseQuery.Builder().add(new Term[] { new Term("synfield._index_phrase", "motor dogs"), new Term("synfield._index_phrase", "motor dog") }).build()));
// https://github.com/elastic/elasticsearch/issues/43976
CannedTokenStream cts = new CannedTokenStream(new Token("foo", 1, 0, 2, 2), new Token("bar", 0, 0, 2), new Token("baz", 1, 0, 2));
Analyzer synonymAnalyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(reader -> {
}, cts);
}
};
matchQuery.setAnalyzer(synonymAnalyzer);
Query q7 = matchQuery.parse(MatchQuery.Type.BOOLEAN, "synfield", "foo");
assertThat(q7, is(new BooleanQuery.Builder().add(new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "foo")), BooleanClause.Occur.SHOULD).add(new PhraseQuery.Builder().add(new Term("synfield._index_phrase", "bar baz")).build(), BooleanClause.Occur.SHOULD).build(), BooleanClause.Occur.SHOULD).build()));
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "Some English text that is going to be very useful")));
IndexableField[] fields = doc.rootDoc().getFields("field._index_phrase");
assertEquals(1, fields.length);
try (TokenStream ts = fields[0].tokenStream(queryShardContext.getMapperService().indexAnalyzer(), null)) {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
assertTrue(ts.incrementToken());
assertEquals("Some English", termAtt.toString());
}
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> b.field("type", "text").field("index", "false").field("index_phrases", true))));
assertThat(e.getMessage(), containsString("Cannot set index_phrases on unindexed field [field]"));
e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> b.field("type", "text").field("index_options", "freqs").field("index_phrases", true))));
assertThat(e.getMessage(), containsString("Cannot set index_phrases on field [field] if positions are not enabled"));
}
use of org.apache.lucene.tests.analysis.MockSynonymAnalyzer in project OpenSearch by opensearch-project.
the class SimpleQueryStringBuilderTests method testAnalyzerWithGraph.
public void testAnalyzerWithGraph() {
SimpleQueryStringQueryParser.Settings settings = new SimpleQueryStringQueryParser.Settings();
settings.analyzeWildcard(true);
SimpleQueryStringQueryParser parser = new SimpleQueryStringQueryParser(new MockSynonymAnalyzer(), Collections.singletonMap(TEXT_FIELD_NAME, 1.0f), -1, settings, createShardContext());
for (Operator op : Operator.values()) {
BooleanClause.Occur defaultOp = op.toBooleanClauseOccur();
parser.setDefaultOperator(defaultOp);
// non-phrase won't detect multi-word synonym because of whitespace splitting
Query query = parser.parse("guinea pig");
Query expectedQuery = new BooleanQuery.Builder().add(new BooleanClause(new TermQuery(new Term(TEXT_FIELD_NAME, "guinea")), defaultOp)).add(new BooleanClause(new TermQuery(new Term(TEXT_FIELD_NAME, "pig")), defaultOp)).build();
assertThat(query, equalTo(expectedQuery));
// phrase will pick it up
query = parser.parse("\"guinea pig\"");
SpanTermQuery span1 = new SpanTermQuery(new Term(TEXT_FIELD_NAME, "guinea"));
SpanTermQuery span2 = new SpanTermQuery(new Term(TEXT_FIELD_NAME, "pig"));
expectedQuery = new SpanOrQuery(new SpanNearQuery(new SpanQuery[] { span1, span2 }, 0, true), new SpanTermQuery(new Term(TEXT_FIELD_NAME, "cavy")));
assertThat(query, equalTo(expectedQuery));
// phrase with slop
query = parser.parse("big \"tiny guinea pig\"~2");
PhraseQuery pq1 = new PhraseQuery.Builder().add(new Term(TEXT_FIELD_NAME, "tiny")).add(new Term(TEXT_FIELD_NAME, "guinea")).add(new Term(TEXT_FIELD_NAME, "pig")).setSlop(2).build();
PhraseQuery pq2 = new PhraseQuery.Builder().add(new Term(TEXT_FIELD_NAME, "tiny")).add(new Term(TEXT_FIELD_NAME, "cavy")).setSlop(2).build();
expectedQuery = new BooleanQuery.Builder().add(new TermQuery(new Term(TEXT_FIELD_NAME, "big")), defaultOp).add(new BooleanQuery.Builder().add(pq1, BooleanClause.Occur.SHOULD).add(pq2, BooleanClause.Occur.SHOULD).build(), defaultOp).build();
assertThat(query, equalTo(expectedQuery));
}
}
use of org.apache.lucene.tests.analysis.MockSynonymAnalyzer in project OpenSearch by opensearch-project.
the class MatchQueryBuilderTests method testMultiWordSynonymsPhrase.
public void testMultiWordSynonymsPhrase() throws Exception {
final MatchQuery matchQuery = new MatchQuery(createShardContext());
matchQuery.setAnalyzer(new MockSynonymAnalyzer());
final Query actual = matchQuery.parse(Type.PHRASE, TEXT_FIELD_NAME, "guinea pig dogs");
Query expected = SpanNearQuery.newOrderedNearQuery(TEXT_FIELD_NAME).addClause(new SpanOrQuery(new SpanQuery[] { SpanNearQuery.newOrderedNearQuery(TEXT_FIELD_NAME).addClause(new SpanTermQuery(new Term(TEXT_FIELD_NAME, "guinea"))).addClause(new SpanTermQuery(new Term(TEXT_FIELD_NAME, "pig"))).setSlop(0).build(), new SpanTermQuery(new Term(TEXT_FIELD_NAME, "cavy")) })).addClause(new SpanOrQuery(new SpanQuery[] { new SpanTermQuery(new Term(TEXT_FIELD_NAME, "dogs")), new SpanTermQuery(new Term(TEXT_FIELD_NAME, "dog")) })).build();
assertEquals(expected, actual);
}
use of org.apache.lucene.tests.analysis.MockSynonymAnalyzer in project OpenSearch by opensearch-project.
the class MultiMatchQueryTests method testMultiMatchCrossFieldsWithSynonymsPhrase.
public void testMultiMatchCrossFieldsWithSynonymsPhrase() throws IOException {
QueryShardContext queryShardContext = indexService.newQueryShardContext(randomInt(20), null, () -> {
throw new UnsupportedOperationException();
}, null);
MultiMatchQuery parser = new MultiMatchQuery(queryShardContext);
parser.setAnalyzer(new MockSynonymAnalyzer());
Map<String, Float> fieldNames = new HashMap<>();
fieldNames.put("name.first", 1.0f);
fieldNames.put("name.last", 1.0f);
Query query = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "guinea pig", null);
Term[] terms = new Term[2];
terms[0] = new Term("name.first", "cavy");
terms[1] = new Term("name.last", "cavy");
float[] boosts = new float[2];
Arrays.fill(boosts, 1.0f);
List<Query> phraseDisjuncts = new ArrayList<>();
phraseDisjuncts.add(new PhraseQuery.Builder().add(new Term("name.first", "guinea")).add(new Term("name.first", "pig")).build());
phraseDisjuncts.add(new PhraseQuery.Builder().add(new Term("name.last", "guinea")).add(new Term("name.last", "pig")).build());
BooleanQuery expected = new BooleanQuery.Builder().add(new BooleanQuery.Builder().add(new DisjunctionMaxQuery(phraseDisjuncts, 0.0f), BooleanClause.Occur.SHOULD).add(BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f), BooleanClause.Occur.SHOULD).build(), BooleanClause.Occur.SHOULD).build();
assertEquals(expected, query);
}
use of org.apache.lucene.tests.analysis.MockSynonymAnalyzer in project OpenSearch by opensearch-project.
the class MultiMatchQueryTests method testMultiMatchCrossFieldsWithSynonyms.
public void testMultiMatchCrossFieldsWithSynonyms() throws IOException {
QueryShardContext queryShardContext = indexService.newQueryShardContext(randomInt(20), null, () -> {
throw new UnsupportedOperationException();
}, null);
MultiMatchQuery parser = new MultiMatchQuery(queryShardContext);
parser.setAnalyzer(new MockSynonymAnalyzer());
Map<String, Float> fieldNames = new HashMap<>();
fieldNames.put("name.first", 1.0f);
// check that synonym query is used for a single field
Query parsedQuery = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "dogs", null);
Query expectedQuery = new SynonymQuery.Builder("name.first").addTerm(new Term("name.first", "dog")).addTerm(new Term("name.first", "dogs")).build();
assertThat(parsedQuery, equalTo(expectedQuery));
// check that blended term query is used for multiple fields
fieldNames.put("name.last", 1.0f);
parsedQuery = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "dogs", null);
Term[] terms = new Term[4];
terms[0] = new Term("name.first", "dog");
terms[1] = new Term("name.first", "dogs");
terms[2] = new Term("name.last", "dog");
terms[3] = new Term("name.last", "dogs");
float[] boosts = new float[4];
Arrays.fill(boosts, 1.0f);
expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f);
assertThat(parsedQuery, equalTo(expectedQuery));
}
Aggregations