Search in sources :

Example 1 with SpanWeight

use of org.apache.lucene.search.spans.SpanWeight in project lucene-solr by apache.

the class PhraseHelper method getTermToSpans.

// code extracted & refactored from WSTE.extractWeightedSpanTerms()
private void getTermToSpans(SpanQuery spanQuery, LeafReaderContext readerContext, int doc, Map<BytesRef, Spans> result) throws IOException {
    // note: in WSTE there was some field specific looping that seemed pointless so that isn't here.
    final IndexSearcher searcher = new IndexSearcher(readerContext.reader());
    searcher.setQueryCache(null);
    if (willRewrite) {
        // searcher.rewrite loops till done
        spanQuery = (SpanQuery) searcher.rewrite(spanQuery);
    }
    // Get the underlying query terms
    // sorted so we can loop over results in order shortly...
    TreeSet<Term> termSet = new FieldFilteringTermSet();
    //needsScores==false
    searcher.createWeight(spanQuery, false, 1.0f).extractTerms(termSet);
    // Get Spans by running the query against the reader
    // TODO it might make sense to re-use/cache the Spans instance, to advance forward between docs
    SpanWeight spanWeight = (SpanWeight) searcher.createNormalizedWeight(spanQuery, false);
    Spans spans = spanWeight.getSpans(readerContext, SpanWeight.Postings.POSITIONS);
    if (spans == null) {
        return;
    }
    TwoPhaseIterator twoPhaseIterator = spans.asTwoPhaseIterator();
    if (twoPhaseIterator != null) {
        if (twoPhaseIterator.approximation().advance(doc) != doc || !twoPhaseIterator.matches()) {
            return;
        }
    } else if (spans.advance(doc) != doc) {
        // preposition, and return doing nothing if find none
        return;
    }
    // Consume the Spans into a cache.  This instance is used as a source for multiple cloned copies.
    // It's important we do this and not re-use the same original Spans instance since these will be iterated
    // independently later on; sometimes in ways that prevents sharing the original Spans.
    // consumes spans for this doc only and caches
    CachedSpans cachedSpansSource = new CachedSpans(spans);
    // we don't use it below
    spans = null;
    // Map terms to a Spans instance (aggregate if necessary)
    for (final Term queryTerm : termSet) {
        //   spanQuery list were already filtered by these conditions.
        if (positionInsensitiveTerms.contains(queryTerm)) {
            continue;
        }
        // copy-constructor refers to same data (shallow) but has iteration state from the beginning
        CachedSpans cachedSpans = new CachedSpans(cachedSpansSource);
        // Add the span to whatever span may or may not exist
        Spans existingSpans = result.get(queryTerm.bytes());
        if (existingSpans != null) {
            if (existingSpans instanceof MultiSpans) {
                ((MultiSpans) existingSpans).addSpans(cachedSpans);
            } else {
                // upgrade to MultiSpans
                MultiSpans multiSpans = new MultiSpans();
                multiSpans.addSpans(existingSpans);
                multiSpans.addSpans(cachedSpans);
                result.put(queryTerm.bytes(), multiSpans);
            }
        } else {
            result.put(queryTerm.bytes(), cachedSpans);
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TwoPhaseIterator(org.apache.lucene.search.TwoPhaseIterator) SpanWeight(org.apache.lucene.search.spans.SpanWeight) Term(org.apache.lucene.index.Term) WeightedSpanTerm(org.apache.lucene.search.highlight.WeightedSpanTerm) Spans(org.apache.lucene.search.spans.Spans)

Example 2 with SpanWeight

use of org.apache.lucene.search.spans.SpanWeight in project lucene-solr by apache.

the class WeightedSpanTermExtractor method extractWeightedSpanTerms.

/**
   * Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>SpanQuery</code>.
   * 
   * @param terms
   *          Map to place created WeightedSpanTerms in
   * @param spanQuery
   *          SpanQuery to extract Terms from
   * @throws IOException If there is a low-level I/O error
   */
protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery, float boost) throws IOException {
    Set<String> fieldNames;
    if (fieldName == null) {
        fieldNames = new HashSet<>();
        collectSpanQueryFields(spanQuery, fieldNames);
    } else {
        fieldNames = new HashSet<>(1);
        fieldNames.add(fieldName);
    }
    // To support the use of the default field name
    if (defaultField != null) {
        fieldNames.add(defaultField);
    }
    Map<String, SpanQuery> queries = new HashMap<>();
    Set<Term> nonWeightedTerms = new HashSet<>();
    final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
    final IndexSearcher searcher = new IndexSearcher(getLeafContext());
    searcher.setQueryCache(null);
    if (mustRewriteQuery) {
        for (final String field : fieldNames) {
            final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContext().reader());
            queries.put(field, rewrittenQuery);
            rewrittenQuery.createWeight(searcher, false, boost).extractTerms(nonWeightedTerms);
        }
    } else {
        spanQuery.createWeight(searcher, false, boost).extractTerms(nonWeightedTerms);
    }
    List<PositionSpan> spanPositions = new ArrayList<>();
    for (final String field : fieldNames) {
        final SpanQuery q;
        if (mustRewriteQuery) {
            q = queries.get(field);
        } else {
            q = spanQuery;
        }
        LeafReaderContext context = getLeafContext();
        SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(q, false);
        Bits acceptDocs = context.reader().getLiveDocs();
        final Spans spans = w.getSpans(context, SpanWeight.Postings.POSITIONS);
        if (spans == null) {
            return;
        }
        // collect span positions
        while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
            if (acceptDocs != null && acceptDocs.get(spans.docID()) == false) {
                continue;
            }
            while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                spanPositions.add(new PositionSpan(spans.startPosition(), spans.endPosition() - 1));
            }
        }
    }
    if (spanPositions.size() == 0) {
        // no spans found
        return;
    }
    for (final Term queryTerm : nonWeightedTerms) {
        if (fieldNameComparator(queryTerm.field())) {
            WeightedSpanTerm weightedSpanTerm = terms.get(queryTerm.text());
            if (weightedSpanTerm == null) {
                weightedSpanTerm = new WeightedSpanTerm(boost, queryTerm.text());
                weightedSpanTerm.addPositionSpans(spanPositions);
                weightedSpanTerm.positionSensitive = true;
                terms.put(queryTerm.text(), weightedSpanTerm);
            } else {
                if (spanPositions.size() > 0) {
                    weightedSpanTerm.addPositionSpans(spanPositions);
                }
            }
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Spans(org.apache.lucene.search.spans.Spans) SpanWeight(org.apache.lucene.search.spans.SpanWeight) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) HashSet(java.util.HashSet)

Example 3 with SpanWeight

use of org.apache.lucene.search.spans.SpanWeight in project lucene-solr by apache.

the class PayloadSpanUtil method getPayloads.

private void getPayloads(Collection<byte[]> payloads, SpanQuery query) throws IOException {
    final IndexSearcher searcher = new IndexSearcher(context);
    searcher.setQueryCache(null);
    SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(query, false);
    PayloadSpanCollector collector = new PayloadSpanCollector();
    for (LeafReaderContext leafReaderContext : context.leaves()) {
        final Spans spans = w.getSpans(leafReaderContext, SpanWeight.Postings.PAYLOADS);
        if (spans != null) {
            while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
                while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                    collector.reset();
                    spans.collect(collector);
                    payloads.addAll(collector.getPayloads());
                }
            }
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SpanWeight(org.apache.lucene.search.spans.SpanWeight) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Spans(org.apache.lucene.search.spans.Spans)

Aggregations

IndexSearcher (org.apache.lucene.search.IndexSearcher)3 SpanWeight (org.apache.lucene.search.spans.SpanWeight)3 Spans (org.apache.lucene.search.spans.Spans)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)2 Term (org.apache.lucene.index.Term)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 TwoPhaseIterator (org.apache.lucene.search.TwoPhaseIterator)1 WeightedSpanTerm (org.apache.lucene.search.highlight.WeightedSpanTerm)1 FieldMaskingSpanQuery (org.apache.lucene.search.spans.FieldMaskingSpanQuery)1 SpanQuery (org.apache.lucene.search.spans.SpanQuery)1 Bits (org.apache.lucene.util.Bits)1