Search in sources :

Example 21 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class PayloadCheckQParserPlugin method createParser.

@Override
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
    return new QParser(qstr, localParams, params, req) {

        @Override
        public Query parse() throws SyntaxError {
            String field = localParams.get(QueryParsing.F);
            String value = localParams.get(QueryParsing.V);
            String p = localParams.get("payloads");
            if (field == null) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'f' not specified");
            }
            if (value == null) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "query string missing");
            }
            if (p == null) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'payloads' not specified");
            }
            FieldType ft = req.getCore().getLatestSchema().getFieldType(field);
            Analyzer analyzer = ft.getQueryAnalyzer();
            SpanQuery query = null;
            try {
                query = PayloadUtils.createSpanQuery(field, value, analyzer);
            } catch (IOException e) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
            }
            if (query == null) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "SpanQuery is null");
            }
            PayloadEncoder encoder = null;
            String e = PayloadUtils.getPayloadEncoder(ft);
            if ("float".equals(e)) {
                // TODO: centralize this string->PayloadEncoder logic (see DelimitedPayloadTokenFilterFactory)
                encoder = new FloatEncoder();
            } else if ("integer".equals(e)) {
                encoder = new IntegerEncoder();
            } else if ("identity".equals(e)) {
                encoder = new IdentityEncoder();
            }
            if (encoder == null) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "invalid encoder: " + e + " for field: " + field);
            }
            List<BytesRef> payloads = new ArrayList<>();
            // since payloads (most likely) came in whitespace delimited, just split
            String[] rawPayloads = p.split(" ");
            for (String rawPayload : rawPayloads) {
                if (rawPayload.length() > 0)
                    payloads.add(encoder.encode(rawPayload.toCharArray()));
            }
            return new SpanPayloadCheckQuery(query, payloads);
        }
    };
}
Also used : PayloadEncoder(org.apache.lucene.analysis.payloads.PayloadEncoder) SpanPayloadCheckQuery(org.apache.lucene.queries.payloads.SpanPayloadCheckQuery) FloatEncoder(org.apache.lucene.analysis.payloads.FloatEncoder) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IdentityEncoder(org.apache.lucene.analysis.payloads.IdentityEncoder) Analyzer(org.apache.lucene.analysis.Analyzer) FieldType(org.apache.solr.schema.FieldType) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SolrException(org.apache.solr.common.SolrException) BytesRef(org.apache.lucene.util.BytesRef) IntegerEncoder(org.apache.lucene.analysis.payloads.IntegerEncoder)

Example 22 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class PayloadUtils method createSpanQuery.

/**
   * The generated SpanQuery will be either a SpanTermQuery or an ordered, zero slop SpanNearQuery, depending
   * on how many tokens are emitted.
   */
public static SpanQuery createSpanQuery(String field, String value, Analyzer analyzer) throws IOException {
    // adapted this from QueryBuilder.createSpanQuery (which isn't currently public) and added reset(), end(), and close() calls
    List<SpanTermQuery> terms = new ArrayList<>();
    try (TokenStream in = analyzer.tokenStream(field, value)) {
        in.reset();
        TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class);
        while (in.incrementToken()) {
            terms.add(new SpanTermQuery(new Term(field, termAtt.getBytesRef())));
        }
        in.end();
    }
    SpanQuery query;
    if (terms.isEmpty()) {
        query = null;
    } else if (terms.size() == 1) {
        query = terms.get(0);
    } else {
        query = new SpanNearQuery(terms.toArray(new SpanTermQuery[terms.size()]), 0, true);
    }
    return query;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 23 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class WeightedSpanTermExtractor method extract.

/**
   * Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
   * 
   * @param query
   *          Query to extract Terms from
   * @param terms
   *          Map to place created WeightedSpanTerms in
   * @throws IOException If there is a low-level I/O error
   */
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
    if (query instanceof BoostQuery) {
        BoostQuery boostQuery = (BoostQuery) query;
        extract(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms);
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                extract(clause.getQuery(), boost, terms);
            }
        }
    } else if (query instanceof PhraseQuery) {
        PhraseQuery phraseQuery = ((PhraseQuery) query);
        Term[] phraseQueryTerms = phraseQuery.getTerms();
        if (phraseQueryTerms.length == 1) {
            extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
        } else {
            SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
            for (int i = 0; i < phraseQueryTerms.length; i++) {
                clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
            }
            // sum position increments beyond 1
            int positionGaps = 0;
            int[] positions = phraseQuery.getPositions();
            if (positions.length >= 2) {
                // positions are in increasing order.   max(0,...) is just a safeguard.
                positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
            }
            //if original slop is 0 then require inOrder
            boolean inorder = (phraseQuery.getSlop() == 0);
            SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
            extractWeightedSpanTerms(terms, sp, boost);
        }
    } else if (query instanceof TermQuery || query instanceof SynonymQuery) {
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof SpanQuery) {
        extractWeightedSpanTerms(terms, (SpanQuery) query, boost);
    } else if (query instanceof ConstantScoreQuery) {
        final Query q = ((ConstantScoreQuery) query).getQuery();
        if (q != null) {
            extract(q, boost, terms);
        }
    } else if (query instanceof CommonTermsQuery) {
        // specialized since rewriting would change the result query 
        // this query is TermContext sensitive.
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query clause : ((DisjunctionMaxQuery) query)) {
            extract(clause, boost, terms);
        }
    } else if (query instanceof ToParentBlockJoinQuery) {
        extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
    } else if (query instanceof ToChildBlockJoinQuery) {
        extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
    } else if (query instanceof MultiPhraseQuery) {
        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final Term[][] termArrays = mpq.getTermArrays();
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {
            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }
            @SuppressWarnings({ "unchecked", "rawtypes" }) final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;
            for (int i = 0; i < termArrays.length; ++i) {
                final Term[] termArray = termArrays[i];
                List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
                    ++distinctPositions;
                }
                for (Term aTermArray : termArray) {
                    disjuncts.add(new SpanTermQuery(aTermArray));
                }
            }
            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (List<SpanQuery> disjuncts : disjunctLists) {
                if (disjuncts != null) {
                    clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                } else {
                    ++positionGaps;
                }
            }
            if (clauses.length == 1) {
                extractWeightedSpanTerms(terms, clauses[0], boost);
            } else {
                final int slop = mpq.getSlop();
                final boolean inorder = (slop == 0);
                SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                extractWeightedSpanTerms(terms, sp, boost);
            }
        }
    } else if (query instanceof MatchAllDocsQuery) {
    //nothing
    } else if (query instanceof CustomScoreQuery) {
        extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
    } else if (isQueryUnsupported(query.getClass())) {
    // nothing
    } else {
        if (query instanceof MultiTermQuery && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
            return;
        }
        Query origQuery = query;
        final IndexReader reader = getLeafContext().reader();
        Query rewritten;
        if (query instanceof MultiTermQuery) {
            rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
        } else {
            rewritten = origQuery.rewrite(reader);
        }
        if (rewritten != origQuery) {
            // only rewrite once and then flatten again - the rewritten query could have a special treatment
            // if this method is overwritten in a subclass or above in the next recursion
            extract(rewritten, boost, terms);
        } else {
            extractUnknownQuery(query, terms);
        }
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BoostQuery(org.apache.lucene.search.BoostQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) List(java.util.List) ArrayList(java.util.ArrayList) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) BooleanClause(org.apache.lucene.search.BooleanClause) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 24 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class PhraseHelper method getTermToSpans.

/**
   * Collect a list of pre-positioned {@link Spans} for each term, given a reader that has just one document.
   * It returns no mapping for query terms that occurs in a position insensitive way which therefore don't
   * need to be filtered.
   */
Map<BytesRef, Spans> getTermToSpans(LeafReader leafReader, int doc) throws IOException {
    if (spanQueries.isEmpty()) {
        return Collections.emptyMap();
    }
    final LeafReader filteredReader = new SingleFieldFilterLeafReader(leafReader, fieldName);
    // for each SpanQuery, collect the member spans into a map.
    Map<BytesRef, Spans> result = new HashMap<>();
    for (SpanQuery spanQuery : spanQueries) {
        getTermToSpans(spanQuery, filteredReader.getContext(), doc, result);
    }
    return result;
}
Also used : FilterLeafReader(org.apache.lucene.index.FilterLeafReader) LeafReader(org.apache.lucene.index.LeafReader) HashMap(java.util.HashMap) BytesRef(org.apache.lucene.util.BytesRef) Spans(org.apache.lucene.search.spans.Spans) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 25 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class TestMemoryIndexAgainstRAMDir method testNullPointerException.

// LUCENE-3831
public void testNullPointerException() throws IOException {
    RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
    SpanQuery wrappedquery = new SpanMultiTermQueryWrapper<>(regex);
    MemoryIndex mindex = randomMemoryIndex();
    mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there"));
    // This throws an NPE
    assertEquals(0, mindex.search(wrappedquery), 0.00001f);
    TestUtil.checkReader(mindex.createSearcher().getIndexReader());
}
Also used : SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Aggregations

SpanQuery (org.apache.lucene.search.spans.SpanQuery)316 Test (org.junit.Test)217 Term (org.apache.lucene.index.Term)127 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)126 KrillIndex (de.ids_mannheim.korap.KrillIndex)110 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)65 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)52 Result (de.ids_mannheim.korap.response.Result)48 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)39 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)35 ArrayList (java.util.ArrayList)34 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)32 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)31 Query (org.apache.lucene.search.Query)30 IndexReader (org.apache.lucene.index.IndexReader)19 BooleanQuery (org.apache.lucene.search.BooleanQuery)19 IndexSearcher (org.apache.lucene.search.IndexSearcher)18 SpanBoostQuery (org.apache.lucene.search.spans.SpanBoostQuery)18 SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)18 Document (org.apache.lucene.document.Document)17