Search in sources :

Example 21 with SpanMultiTermQueryWrapper

use of org.apache.lucene.search.spans.SpanMultiTermQueryWrapper in project lucene-solr by apache.

the class TestUnifiedHighlighterStrictPhrases method testFilteredOutSpan.

public void testFilteredOutSpan() throws IOException {
    indexWriter.addDocument(newDoc("freezing cold stuff like stuff freedom of speech"));
    initReaderSearcherHighlighter();
    WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "free*"));
    SpanMultiTermQueryWrapper<WildcardQuery> wildcardSpanQuery = new SpanMultiTermQueryWrapper<>(wildcardQuery);
    SpanTermQuery termQuery = new SpanTermQuery(new Term("body", "speech"));
    SpanQuery spanQuery = new SpanNearQuery(new SpanQuery[] { wildcardSpanQuery, termQuery }, 3, false);
    BooleanQuery query = new BooleanQuery.Builder().add(spanQuery, BooleanClause.Occur.MUST).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertArrayEquals(new String[] { "freezing cold stuff like stuff <b>freedom</b> of <b>speech</b>" }, snippets);
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) WildcardQuery(org.apache.lucene.search.WildcardQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) QueryBuilder(org.apache.lucene.util.QueryBuilder) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 22 with SpanMultiTermQueryWrapper

use of org.apache.lucene.search.spans.SpanMultiTermQueryWrapper in project SearchServices by Alfresco.

the class Solr4QueryParser method generateSpanOrQuery.

/**
 * @param field
 * @param fixedTokenSequences
 *            LinkedList<LinkedList<PackedTokenAttributeImpl>>
 * @return Query
 */
protected SpanQuery generateSpanOrQuery(String field, LinkedList<LinkedList<PackedTokenAttributeImpl>> fixedTokenSequences) {
    PackedTokenAttributeImpl nextToken;
    ArrayList<SpanQuery> spanOrQueryParts = new ArrayList<SpanQuery>();
    for (LinkedList<PackedTokenAttributeImpl> tokenSequence : fixedTokenSequences) {
        int gap = 1;
        SpanQuery spanQuery = null;
        ArrayList<SpanQuery> atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
        // create flat nearQuery
        if (getEnablePositionIncrements() && isAllTokensSequentiallyShifted(tokenSequence)) {
            // there will be no tokens at same position
            List<SpanQuery> wildWrappedList = new ArrayList<SpanQuery>(tokenSequence.size());
            for (PackedTokenAttributeImpl token : tokenSequence) {
                String termText = token.toString();
                Term term = new Term(field, termText);
                SpanQuery nextSpanQuery = wrapWildcardTerms(term);
                wildWrappedList.add(nextSpanQuery);
            }
            if (wildWrappedList.size() == 1) {
                spanQuery = wildWrappedList.get(0);
            } else {
                spanQuery = new SpanNearQuery(wildWrappedList.toArray(new SpanQuery[wildWrappedList.size()]), 0, true);
            }
        } else {
            for (int i = 0; i < tokenSequence.size(); i++) {
                nextToken = (PackedTokenAttributeImpl) tokenSequence.get(i);
                String termText = nextToken.toString();
                Term term = new Term(field, termText);
                if (getEnablePositionIncrements()) {
                    SpanQuery nextSpanQuery = wrapWildcardTerms(term);
                    if (gap == 0) {
                        atSamePositionSpanOrQueryParts.add(nextSpanQuery);
                    } else {
                        if (atSamePositionSpanOrQueryParts.size() == 0) {
                            if (spanQuery == null) {
                                spanQuery = nextSpanQuery;
                            } else {
                                spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, nextSpanQuery }, (gap - 1) + internalSlop, internalSlop < 2);
                            }
                            atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
                        } else if (atSamePositionSpanOrQueryParts.size() == 1) {
                            if (spanQuery == null) {
                                spanQuery = atSamePositionSpanOrQueryParts.get(0);
                            } else {
                                spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, atSamePositionSpanOrQueryParts.get(0) }, (gap - 1) + internalSlop, internalSlop < 2);
                            }
                            atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
                            atSamePositionSpanOrQueryParts.add(nextSpanQuery);
                        } else {
                            if (spanQuery == null) {
                                spanQuery = new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {}));
                            } else {
                                spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, spanQuery = new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {})) }, (gap - 1) + internalSlop, internalSlop < 2);
                            }
                            atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
                            atSamePositionSpanOrQueryParts.add(nextSpanQuery);
                        }
                    }
                    gap = nextToken.getPositionIncrement();
                } else {
                    SpanQuery nextSpanQuery;
                    if ((termText != null) && (termText.contains("*") || termText.contains("?"))) {
                        org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery(term);
                        SpanMultiTermQueryWrapper<org.apache.lucene.search.WildcardQuery> wrapper = new SpanMultiTermQueryWrapper<org.apache.lucene.search.WildcardQuery>(wildQuery);
                        wrapper.setRewriteMethod(new TopTermsSpanBooleanQueryRewrite(topTermSpanRewriteLimit));
                        nextSpanQuery = wrapper;
                    } else {
                        nextSpanQuery = new SpanTermQuery(term);
                    }
                    if (spanQuery == null) {
                        spanQuery = new SpanOrQuery(nextSpanQuery);
                    } else {
                        spanQuery = new SpanOrQuery(spanQuery, nextSpanQuery);
                    }
                }
            }
        }
        if (atSamePositionSpanOrQueryParts.size() == 0) {
            spanOrQueryParts.add(spanQuery);
        } else if (atSamePositionSpanOrQueryParts.size() == 1) {
            if (spanQuery == null) {
                spanQuery = atSamePositionSpanOrQueryParts.get(0);
            } else {
                spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, atSamePositionSpanOrQueryParts.get(0) }, (gap - 1) + internalSlop, internalSlop < 2);
            }
            atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
            spanOrQueryParts.add(spanQuery);
        } else {
            if (spanQuery == null) {
                spanQuery = new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {}));
            } else {
                spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {})) }, (gap - 1) + internalSlop, internalSlop < 2);
            }
            atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
            spanOrQueryParts.add(spanQuery);
        }
    }
    if (spanOrQueryParts.size() == 1) {
        return spanOrQueryParts.get(0);
    } else {
        return new SpanOrQuery(spanOrQueryParts.toArray(new SpanQuery[] {}));
    }
}
Also used : SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) PackedTokenAttributeImpl(org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl) TopTermsSpanBooleanQueryRewrite(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper.TopTermsSpanBooleanQueryRewrite) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 23 with SpanMultiTermQueryWrapper

use of org.apache.lucene.search.spans.SpanMultiTermQueryWrapper in project SearchServices by Alfresco.

the class Solr4QueryParser method wrapWildcardTerms.

private SpanQuery wrapWildcardTerms(org.apache.lucene.index.Term term) {
    String termText = term.text();
    SpanQuery nextSpanQuery;
    if ((termText != null) && (termText.contains("*") || termText.contains("?"))) {
        org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery(term);
        SpanMultiTermQueryWrapper<org.apache.lucene.search.WildcardQuery> wrapper = new SpanMultiTermQueryWrapper<org.apache.lucene.search.WildcardQuery>(wildQuery);
        wrapper.setRewriteMethod(new TopTermsSpanBooleanQueryRewrite(topTermSpanRewriteLimit));
        nextSpanQuery = wrapper;
    } else {
        nextSpanQuery = new SpanTermQuery(term);
    }
    return nextSpanQuery;
}
Also used : TopTermsSpanBooleanQueryRewrite(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper.TopTermsSpanBooleanQueryRewrite) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 24 with SpanMultiTermQueryWrapper

use of org.apache.lucene.search.spans.SpanMultiTermQueryWrapper in project Krill by KorAP.

the class TestMultipleDistanceIndex method testQueryWithWildCard.

@Test
public void testQueryWithWildCard() throws IOException {
    // meine* /+w1:2,s0 &Erfahrung
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc5());
    ki.commit();
    // Check simple rewriting
    WildcardQuery wcquery = new WildcardQuery(new Term("tokens", "s:Meine*"));
    SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>(wcquery);
    assertEquals(wcquery.toString(), "tokens:s:Meine*");
    kr = ki.search(mtq, (short) 10);
    assertEquals(4, kr.getMatches().size());
    assertEquals(0, kr.getMatch(0).getStartPos());
    assertEquals(1, kr.getMatch(0).getEndPos());
    // Check rewriting in multidistance query
    SpanQuery sq = new SpanTermQuery(new Term("tokens", "l:Erfahrung"));
    kr = ki.search(sq, (short) 10);
    assertEquals(4, kr.getMatches().size());
    List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
    constraints.add(createConstraint("w", 1, 2, true, false));
    constraints.add(createConstraint("tokens", "s", 0, 0, true, false));
    SpanQuery mdsq = new SpanMultipleDistanceQuery(mtq, sq, constraints, true, true);
    assertEquals(mdsq.toString(), "spanMultipleDistance(SpanMultiTermQueryWrapper(tokens:s:Meine*), " + "tokens:l:Erfahrung, [(w[1:2], ordered, notExcluded), (s[0:0], " + "ordered, notExcluded)])");
    kr = ki.search(mdsq, (short) 10);
    assertEquals(3, kr.getMatches().size());
    assertEquals(0, kr.getMatch(0).getStartPos());
    assertEquals(2, kr.getMatch(0).getEndPos());
    // Check skipping with multiple documents
    ki.addDoc(createFieldDoc6());
    ki.addDoc(createFieldDoc7());
    ki.addDoc(createFieldDoc8());
    ki.commit();
    kr = ki.search(mdsq, (short) 10);
    assertEquals(6, kr.getMatches().size());
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ArrayList(java.util.ArrayList) DistanceConstraint(de.ids_mannheim.korap.query.DistanceConstraint) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanMultipleDistanceQuery(de.ids_mannheim.korap.query.SpanMultipleDistanceQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Example 25 with SpanMultiTermQueryWrapper

use of org.apache.lucene.search.spans.SpanMultiTermQueryWrapper in project Krill by KorAP.

the class TestRegexIndex method testWildcardPlusRewritten.

@Test
public void testWildcardPlusRewritten() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc1());
    ki.commit();
    // C2 meine+ /+w1:2,s0 &Erfahrung
    // meine+ rewritten into meine.?
    SpanMultiTermQueryWrapper<RegexpQuery> mtq = new SpanMultiTermQueryWrapper<RegexpQuery>(new RegexpQuery(new Term("tokens", "s:meine.?")));
    SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true);
    kr = ki.search(mdsq, (short) 10);
    assertEquals(4, kr.getMatches().size());
}
Also used : SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanMultipleDistanceQuery(de.ids_mannheim.korap.query.SpanMultipleDistanceQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) Test(org.junit.Test)

Aggregations

SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)31 Term (org.apache.lucene.index.Term)27 SpanQuery (org.apache.lucene.search.spans.SpanQuery)19 WildcardQuery (org.apache.lucene.search.WildcardQuery)15 RegexpQuery (org.apache.lucene.search.RegexpQuery)14 Test (org.junit.Test)14 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)12 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)12 KrillIndex (de.ids_mannheim.korap.KrillIndex)11 SpanMultipleDistanceQuery (de.ids_mannheim.korap.query.SpanMultipleDistanceQuery)11 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)10 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)10 Query (org.apache.lucene.search.Query)9 BooleanQuery (org.apache.lucene.search.BooleanQuery)8 BoostQuery (org.apache.lucene.search.BoostQuery)8 TermQuery (org.apache.lucene.search.TermQuery)8 SpanBoostQuery (org.apache.lucene.search.spans.SpanBoostQuery)8 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)7 PrefixQuery (org.apache.lucene.search.PrefixQuery)7 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)7