use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class PayloadUtils method createSpanQuery.
/**
* The generated SpanQuery will be either a SpanTermQuery or an ordered, zero slop SpanNearQuery, depending
* on how many tokens are emitted.
*/
public static SpanQuery createSpanQuery(String field, String value, Analyzer analyzer) throws IOException {
// adapted this from QueryBuilder.createSpanQuery (which isn't currently public) and added reset(), end(), and close() calls
List<SpanTermQuery> terms = new ArrayList<>();
try (TokenStream in = analyzer.tokenStream(field, value)) {
in.reset();
TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class);
while (in.incrementToken()) {
terms.add(new SpanTermQuery(new Term(field, termAtt.getBytesRef())));
}
in.end();
}
SpanQuery query;
if (terms.isEmpty()) {
query = null;
} else if (terms.size() == 1) {
query = terms.get(0);
} else {
query = new SpanNearQuery(terms.toArray(new SpanTermQuery[terms.size()]), 0, true);
}
return query;
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class WeightedSpanTermExtractor method extract.
/**
* Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
*
* @param query
* Query to extract Terms from
* @param terms
* Map to place created WeightedSpanTerms in
* @throws IOException If there is a low-level I/O error
*/
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
if (query instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) query;
extract(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms);
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
extract(clause.getQuery(), boost, terms);
}
}
} else if (query instanceof PhraseQuery) {
PhraseQuery phraseQuery = ((PhraseQuery) query);
Term[] phraseQueryTerms = phraseQuery.getTerms();
if (phraseQueryTerms.length == 1) {
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
} else {
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
}
// sum position increments beyond 1
int positionGaps = 0;
int[] positions = phraseQuery.getPositions();
if (positions.length >= 2) {
// positions are in increasing order. max(0,...) is just a safeguard.
positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
}
//if original slop is 0 then require inOrder
boolean inorder = (phraseQuery.getSlop() == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
} else if (query instanceof TermQuery || query instanceof SynonymQuery) {
extractWeightedTerms(terms, query, boost);
} else if (query instanceof SpanQuery) {
extractWeightedSpanTerms(terms, (SpanQuery) query, boost);
} else if (query instanceof ConstantScoreQuery) {
final Query q = ((ConstantScoreQuery) query).getQuery();
if (q != null) {
extract(q, boost, terms);
}
} else if (query instanceof CommonTermsQuery) {
// specialized since rewriting would change the result query
// this query is TermContext sensitive.
extractWeightedTerms(terms, query, boost);
} else if (query instanceof DisjunctionMaxQuery) {
for (Query clause : ((DisjunctionMaxQuery) query)) {
extract(clause, boost, terms);
}
} else if (query instanceof ToParentBlockJoinQuery) {
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
} else if (query instanceof ToChildBlockJoinQuery) {
extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final Term[][] termArrays = mpq.getTermArrays();
final int[] positions = mpq.getPositions();
if (positions.length > 0) {
int maxPosition = positions[positions.length - 1];
for (int i = 0; i < positions.length - 1; ++i) {
if (positions[i] > maxPosition) {
maxPosition = positions[i];
}
}
@SuppressWarnings({ "unchecked", "rawtypes" }) final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.length; ++i) {
final Term[] termArray = termArrays[i];
List<SpanQuery> disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
++distinctPositions;
}
for (Term aTermArray : termArray) {
disjuncts.add(new SpanTermQuery(aTermArray));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (List<SpanQuery> disjuncts : disjunctLists) {
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
} else {
++positionGaps;
}
}
if (clauses.length == 1) {
extractWeightedSpanTerms(terms, clauses[0], boost);
} else {
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
}
} else if (query instanceof MatchAllDocsQuery) {
//nothing
} else if (query instanceof CustomScoreQuery) {
extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
} else if (isQueryUnsupported(query.getClass())) {
// nothing
} else {
if (query instanceof MultiTermQuery && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
return;
}
Query origQuery = query;
final IndexReader reader = getLeafContext().reader();
Query rewritten;
if (query instanceof MultiTermQuery) {
rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
} else {
rewritten = origQuery.rewrite(reader);
}
if (rewritten != origQuery) {
// only rewrite once and then flatten again - the rewritten query could have a special treatment
// if this method is overwritten in a subclass or above in the next recursion
extract(rewritten, boost, terms);
} else {
extractUnknownQuery(query, terms);
}
}
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestPayloadCheckQuery method testComplexSpanChecks.
public void testComplexSpanChecks() throws Exception {
SpanTermQuery one = new SpanTermQuery(new Term("field", "one"));
SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand"));
//should be one position in between
SpanTermQuery hundred = new SpanTermQuery(new Term("field", "hundred"));
SpanTermQuery three = new SpanTermQuery(new Term("field", "three"));
SpanNearQuery oneThous = new SpanNearQuery(new SpanQuery[] { one, thous }, 0, true);
SpanNearQuery hundredThree = new SpanNearQuery(new SpanQuery[] { hundred, three }, 0, true);
SpanNearQuery oneThousHunThree = new SpanNearQuery(new SpanQuery[] { oneThous, hundredThree }, 1, true);
SpanQuery query;
//this one's too small
query = new SpanPositionRangeQuery(oneThousHunThree, 1, 2);
checkHits(query, new int[] {});
//this one's just right
query = new SpanPositionRangeQuery(oneThousHunThree, 0, 6);
checkHits(query, new int[] { 1103, 1203, 1303, 1403, 1503, 1603, 1703, 1803, 1903 });
List<BytesRef> payloads = new ArrayList<>();
BytesRef pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8));
BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8));
BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes(StandardCharsets.UTF_8));
BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes(StandardCharsets.UTF_8));
payloads.add(pay);
payloads.add(pay2);
payloads.add(pay3);
payloads.add(pay4);
query = new SpanPayloadCheckQuery(oneThousHunThree, payloads);
checkHits(query, new int[] { 1103, 1203, 1303, 1403, 1503, 1603, 1703, 1803, 1903 });
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestPayloadScoreQuery method testSpanContainingQuery.
@Test
public void testSpanContainingQuery() throws Exception {
// twenty WITHIN ((one OR hundred) NEAR two)~2
SpanContainingQuery q = new SpanContainingQuery(new SpanNearQuery(new SpanQuery[] { new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))), new SpanTermQuery(new Term("field", "two")) }, 2, true), new SpanTermQuery(new Term("field", "twenty")));
checkQuery(q, new AveragePayloadFunction(), new int[] { 222, 122 }, new float[] { 4.0f, 3.666666f });
checkQuery(q, new MaxPayloadFunction(), new int[] { 122, 222 }, new float[] { 4.0f, 4.0f });
checkQuery(q, new MinPayloadFunction(), new int[] { 222, 122 }, new float[] { 4.0f, 2.0f });
}
use of org.apache.lucene.search.spans.SpanTermQuery in project lucene-solr by apache.
the class TestPayloadSpans method testFirstClauseWithoutPayload.
public void testFirstClauseWithoutPayload() throws Exception {
Spans spans;
IndexSearcher searcher = getSearcher();
SpanQuery[] clauses = new SpanQuery[3];
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nopayload"));
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "qq"));
clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ss"));
SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 6, true);
SpanQuery[] clauses2 = new SpanQuery[2];
clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "pp"));
clauses2[1] = spanNearQuery;
SpanNearQuery snq = new SpanNearQuery(clauses2, 6, false);
SpanQuery[] clauses3 = new SpanQuery[2];
clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np"));
clauses3[1] = snq;
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
spans = nestedSpanNearQuery.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 1, new int[] { 3 });
closeIndexReader.close();
directory.close();
}
Aggregations