use of org.apache.lucene.search.spans.SpanWeight in project lucene-solr by apache.
the class PhraseHelper method getTermToSpans.
// code extracted & refactored from WSTE.extractWeightedSpanTerms()
private void getTermToSpans(SpanQuery spanQuery, LeafReaderContext readerContext, int doc, Map<BytesRef, Spans> result) throws IOException {
// note: in WSTE there was some field specific looping that seemed pointless so that isn't here.
final IndexSearcher searcher = new IndexSearcher(readerContext.reader());
searcher.setQueryCache(null);
if (willRewrite) {
// searcher.rewrite loops till done
spanQuery = (SpanQuery) searcher.rewrite(spanQuery);
}
// Get the underlying query terms
// sorted so we can loop over results in order shortly...
TreeSet<Term> termSet = new FieldFilteringTermSet();
//needsScores==false
searcher.createWeight(spanQuery, false, 1.0f).extractTerms(termSet);
// Get Spans by running the query against the reader
// TODO it might make sense to re-use/cache the Spans instance, to advance forward between docs
SpanWeight spanWeight = (SpanWeight) searcher.createNormalizedWeight(spanQuery, false);
Spans spans = spanWeight.getSpans(readerContext, SpanWeight.Postings.POSITIONS);
if (spans == null) {
return;
}
TwoPhaseIterator twoPhaseIterator = spans.asTwoPhaseIterator();
if (twoPhaseIterator != null) {
if (twoPhaseIterator.approximation().advance(doc) != doc || !twoPhaseIterator.matches()) {
return;
}
} else if (spans.advance(doc) != doc) {
// preposition, and return doing nothing if find none
return;
}
// Consume the Spans into a cache. This instance is used as a source for multiple cloned copies.
// It's important we do this and not re-use the same original Spans instance since these will be iterated
// independently later on; sometimes in ways that prevents sharing the original Spans.
// consumes spans for this doc only and caches
CachedSpans cachedSpansSource = new CachedSpans(spans);
// we don't use it below
spans = null;
// Map terms to a Spans instance (aggregate if necessary)
for (final Term queryTerm : termSet) {
// spanQuery list were already filtered by these conditions.
if (positionInsensitiveTerms.contains(queryTerm)) {
continue;
}
// copy-constructor refers to same data (shallow) but has iteration state from the beginning
CachedSpans cachedSpans = new CachedSpans(cachedSpansSource);
// Add the span to whatever span may or may not exist
Spans existingSpans = result.get(queryTerm.bytes());
if (existingSpans != null) {
if (existingSpans instanceof MultiSpans) {
((MultiSpans) existingSpans).addSpans(cachedSpans);
} else {
// upgrade to MultiSpans
MultiSpans multiSpans = new MultiSpans();
multiSpans.addSpans(existingSpans);
multiSpans.addSpans(cachedSpans);
result.put(queryTerm.bytes(), multiSpans);
}
} else {
result.put(queryTerm.bytes(), cachedSpans);
}
}
}
use of org.apache.lucene.search.spans.SpanWeight in project lucene-solr by apache.
the class WeightedSpanTermExtractor method extractWeightedSpanTerms.
/**
* Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>SpanQuery</code>.
*
* @param terms
* Map to place created WeightedSpanTerms in
* @param spanQuery
* SpanQuery to extract Terms from
* @throws IOException If there is a low-level I/O error
*/
protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery, float boost) throws IOException {
Set<String> fieldNames;
if (fieldName == null) {
fieldNames = new HashSet<>();
collectSpanQueryFields(spanQuery, fieldNames);
} else {
fieldNames = new HashSet<>(1);
fieldNames.add(fieldName);
}
// To support the use of the default field name
if (defaultField != null) {
fieldNames.add(defaultField);
}
Map<String, SpanQuery> queries = new HashMap<>();
Set<Term> nonWeightedTerms = new HashSet<>();
final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
final IndexSearcher searcher = new IndexSearcher(getLeafContext());
searcher.setQueryCache(null);
if (mustRewriteQuery) {
for (final String field : fieldNames) {
final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContext().reader());
queries.put(field, rewrittenQuery);
rewrittenQuery.createWeight(searcher, false, boost).extractTerms(nonWeightedTerms);
}
} else {
spanQuery.createWeight(searcher, false, boost).extractTerms(nonWeightedTerms);
}
List<PositionSpan> spanPositions = new ArrayList<>();
for (final String field : fieldNames) {
final SpanQuery q;
if (mustRewriteQuery) {
q = queries.get(field);
} else {
q = spanQuery;
}
LeafReaderContext context = getLeafContext();
SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(q, false);
Bits acceptDocs = context.reader().getLiveDocs();
final Spans spans = w.getSpans(context, SpanWeight.Postings.POSITIONS);
if (spans == null) {
return;
}
// collect span positions
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
if (acceptDocs != null && acceptDocs.get(spans.docID()) == false) {
continue;
}
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
spanPositions.add(new PositionSpan(spans.startPosition(), spans.endPosition() - 1));
}
}
}
if (spanPositions.size() == 0) {
// no spans found
return;
}
for (final Term queryTerm : nonWeightedTerms) {
if (fieldNameComparator(queryTerm.field())) {
WeightedSpanTerm weightedSpanTerm = terms.get(queryTerm.text());
if (weightedSpanTerm == null) {
weightedSpanTerm = new WeightedSpanTerm(boost, queryTerm.text());
weightedSpanTerm.addPositionSpans(spanPositions);
weightedSpanTerm.positionSensitive = true;
terms.put(queryTerm.text(), weightedSpanTerm);
} else {
if (spanPositions.size() > 0) {
weightedSpanTerm.addPositionSpans(spanPositions);
}
}
}
}
}
use of org.apache.lucene.search.spans.SpanWeight in project lucene-solr by apache.
the class PayloadSpanUtil method getPayloads.
private void getPayloads(Collection<byte[]> payloads, SpanQuery query) throws IOException {
final IndexSearcher searcher = new IndexSearcher(context);
searcher.setQueryCache(null);
SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(query, false);
PayloadSpanCollector collector = new PayloadSpanCollector();
for (LeafReaderContext leafReaderContext : context.leaves()) {
final Spans spans = w.getSpans(leafReaderContext, SpanWeight.Postings.PAYLOADS);
if (spans != null) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
payloads.addAll(collector.getPayloads());
}
}
}
}
}
Aggregations