use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class CustomPostingsHighlighterTests method testCustomPostingsHighlighter.
public void testCustomPostingsHighlighter() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
//good position but only one match
final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter.";
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue(firstValue);
//two matches, not the best snippet due to its length though
final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower.";
Field body2 = new Field("body", "", offsetsType);
doc.add(body2);
body2.setStringValue(secondValue);
//two matches and short, will be scored highest
final String thirdValue = "This is highlighting the third short highlighting value.";
Field body3 = new Field("body", "", offsetsType);
doc.add(body3);
body3.setStringValue(thirdValue);
//one match, same as first but at the end, will be scored lower due to its position
final String fourthValue = "Just a test4 highlighting from postings highlighter.";
Field body4 = new Field("body", "", offsetsType);
doc.add(body4);
body4.setStringValue(fourthValue);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter.";
String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower.";
String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value.";
String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter.";
IndexSearcher searcher = newSearcher(ir);
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1));
int docId = topDocs.scoreDocs[0].doc;
String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue;
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValue, false);
Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5);
assertThat(snippets.length, equalTo(4));
assertThat(snippets[0].getText(), equalTo(firstHlValue));
assertThat(snippets[1].getText(), equalTo(secondHlValue));
assertThat(snippets[2].getText(), equalTo(thirdHlValue));
assertThat(snippets[3].getText(), equalTo(fourthHlValue));
ir.close();
dir.close();
}
use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class CandidateQueryTests method testDuel.
public void testDuel() throws Exception {
List<Function<String, Query>> queryFunctions = new ArrayList<>();
queryFunctions.add((id) -> new PrefixQuery(new Term("field", id)));
queryFunctions.add((id) -> new WildcardQuery(new Term("field", id + "*")));
queryFunctions.add((id) -> new CustomQuery(new Term("field", id)));
queryFunctions.add((id) -> new SpanTermQuery(new Term("field", id)));
queryFunctions.add((id) -> new TermQuery(new Term("field", id)));
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.MUST);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
if (randomBoolean()) {
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.MUST);
}
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
if (randomBoolean()) {
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
}
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(randomIntBetween(0, 4));
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
builder.add(new CustomQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
return builder.build();
});
queryFunctions.add((id) -> new MatchAllDocsQuery());
queryFunctions.add((id) -> new MatchNoDocsQuery("no reason at all"));
int numDocs = randomIntBetween(queryFunctions.size(), queryFunctions.size() * 3);
List<ParseContext.Document> documents = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
Query query = queryFunctions.get(i % queryFunctions.size()).apply(id);
addQuery(query, documents);
}
indexWriter.addDocuments(documents);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);
for (int i = 0; i < numDocs; i++) {
String id = Integer.toString(i);
Iterable<? extends IndexableField> doc = Collections.singleton(new StringField("field", id, Field.Store.NO));
MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
}
Iterable<? extends IndexableField> doc = Collections.singleton(new StringField("field", "value", Field.Store.NO));
MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
// Empty percolator doc:
memoryIndex = new MemoryIndex();
duelRun(queryStore, memoryIndex, shardSearcher);
}
use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class PercolateQueryTests method testPercolateQuery.
public void testPercolateQuery() throws Exception {
List<Iterable<? extends IndexableField>> docs = new ArrayList<>();
List<Query> queries = new ArrayList<>();
PercolateQuery.QueryStore queryStore = ctx -> queries::get;
queries.add(new TermQuery(new Term("field", "fox")));
docs.add(Collections.singleton(new StringField("select", "a", Field.Store.NO)));
SpanNearQuery.Builder snp = new SpanNearQuery.Builder("field", true);
snp.addClause(new SpanTermQuery(new Term("field", "jumps")));
snp.addClause(new SpanTermQuery(new Term("field", "lazy")));
snp.addClause(new SpanTermQuery(new Term("field", "dog")));
snp.setSlop(2);
queries.add(snp.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
PhraseQuery.Builder pq1 = new PhraseQuery.Builder();
pq1.add(new Term("field", "quick"));
pq1.add(new Term("field", "brown"));
pq1.add(new Term("field", "jumps"));
pq1.setSlop(1);
queries.add(pq1.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
bq1.add(new TermQuery(new Term("field", "quick")), BooleanClause.Occur.MUST);
bq1.add(new TermQuery(new Term("field", "brown")), BooleanClause.Occur.MUST);
bq1.add(new TermQuery(new Term("field", "fox")), BooleanClause.Occur.MUST);
queries.add(bq1.build());
docs.add(Collections.singleton(new StringField("select", "b", Field.Store.NO)));
indexWriter.addDocuments(docs);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
// no scoring, wrapping it in a constant score query:
Query query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("a"), new TermQuery(new Term("select", "a")), percolateSearcher, new MatchNoDocsQuery("")));
TopDocs topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(1));
assertThat(topDocs.scoreDocs.length, equalTo(1));
assertThat(topDocs.scoreDocs[0].doc, equalTo(0));
Explanation explanation = shardSearcher.explain(query, 0);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("b"), new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery("")));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(3));
assertThat(topDocs.scoreDocs.length, equalTo(3));
assertThat(topDocs.scoreDocs[0].doc, equalTo(1));
explanation = shardSearcher.explain(query, 1);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
query = new ConstantScoreQuery(new PercolateQuery("type", queryStore, new BytesArray("c"), new MatchAllDocsQuery(), percolateSearcher, new MatchAllDocsQuery()));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(4));
query = new PercolateQuery("type", queryStore, new BytesArray("{}"), new TermQuery(new Term("select", "b")), percolateSearcher, new MatchNoDocsQuery(""));
topDocs = shardSearcher.search(query, 10);
assertThat(topDocs.totalHits, equalTo(3));
assertThat(topDocs.scoreDocs.length, equalTo(3));
assertThat(topDocs.scoreDocs[0].doc, equalTo(3));
explanation = shardSearcher.explain(query, 3);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[0].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
assertThat(topDocs.scoreDocs[1].doc, equalTo(2));
explanation = shardSearcher.explain(query, 2);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[1].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
assertThat(topDocs.scoreDocs[2].doc, equalTo(1));
explanation = shardSearcher.explain(query, 1);
assertThat(explanation.isMatch(), is(true));
assertThat(explanation.getValue(), equalTo(topDocs.scoreDocs[2].score));
assertThat(explanation.getDetails(), arrayWithSize(1));
}
use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class PercolatorFieldMapperTests method testCreateCandidateQuery.
public void testCreateCandidateQuery() throws Exception {
addQueryMapping();
MemoryIndex memoryIndex = new MemoryIndex(false);
memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());
memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer());
IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
BooleanQuery candidateQuery = (BooleanQuery) fieldType.createCandidateQuery(indexReader);
assertEquals(2, candidateQuery.clauses().size());
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(0).getOccur());
TermInSetQuery termsQuery = (TermInSetQuery) candidateQuery.clauses().get(0).getQuery();
PrefixCodedTerms terms = termsQuery.getTermData();
assertThat(terms.size(), equalTo(14L));
PrefixCodedTerms.TermIterator termIterator = terms.iterator();
assertTermIterator(termIterator, "_field3 me", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "_field3 unhide", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 brown", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 dog", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 fox", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 jumps", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 lazy", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 over", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 quick", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field1 the", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2 more", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2 some", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field2 text", fieldType.queryTermsField.name());
assertTermIterator(termIterator, "field4 123", fieldType.queryTermsField.name());
assertEquals(Occur.SHOULD, candidateQuery.clauses().get(1).getOccur());
assertEquals(new TermQuery(new Term(fieldType.extractionResultField.name(), EXTRACTION_FAILED)), candidateQuery.clauses().get(1).getQuery());
}
use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.
the class PercolatorFieldMapperTests method testExtractTerms.
public void testExtractTerms() throws Exception {
addQueryMapping();
BooleanQuery.Builder bq = new BooleanQuery.Builder();
TermQuery termQuery1 = new TermQuery(new Term("field", "term1"));
bq.add(termQuery1, BooleanClause.Occur.SHOULD);
TermQuery termQuery2 = new TermQuery(new Term("field", "term2"));
bq.add(termQuery2, BooleanClause.Occur.SHOULD);
DocumentMapper documentMapper = mapperService.documentMapper(typeName);
PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName);
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, mapperService.documentMapperParser(), documentMapper, null, null);
fieldMapper.processQuery(bq.build(), parseContext);
ParseContext.Document document = parseContext.doc();
PercolatorFieldMapper.FieldType fieldType = (PercolatorFieldMapper.FieldType) fieldMapper.fieldType();
assertThat(document.getField(fieldType.extractionResultField.name()).stringValue(), equalTo(EXTRACTION_COMPLETE));
List<IndexableField> fields = new ArrayList<>(Arrays.asList(document.getFields(fieldType.queryTermsField.name())));
Collections.sort(fields, (field1, field2) -> field1.binaryValue().compareTo(field2.binaryValue()));
assertThat(fields.size(), equalTo(2));
assertThat(fields.get(0).binaryValue().utf8ToString(), equalTo("field term1"));
assertThat(fields.get(1).binaryValue().utf8ToString(), equalTo("field term2"));
}
Aggregations