Search in sources :

Example 1 with SpanMultipleDistanceQuery

use of de.ids_mannheim.korap.query.SpanMultipleDistanceQuery in project Krill by KorAP.

the class SpanSequenceQueryWrapper method toFragmentQuery.

/**
 * Serialize the wrapped sequence to a {@link SpanQuery} object.
 *
 * @return A {@link SpanQuery} object.
 * @throws QueryException
 */
public SpanQuery toFragmentQuery() throws QueryException {
    // There was a serialization failure not yet reported
    if (this.constraintException != null)
        throw constraintException;
    int size = this.segments.size();
    // Nothing to do
    if (size == 0 || this.isNull())
        return (SpanQuery) null;
    // No real sequence - only one element
    if (size == 1) {
        // But the element may be expanded
        if (this.segments.get(0).isExtended() && (this.hasConstraints() || !this.isInOrder())) {
            throw new QueryException(613, limitationError);
        }
        ;
        // Unproblematic single query
        if (this.segments.get(0).maybeAnchor())
            return (SpanQuery) this.segments.get(0).retrieveNode(this.retrieveNode).toFragmentQuery();
        if (this.segments.get(0).isEmpty())
            throw new QueryException(613, "Sequence is not allowed to be empty");
        if (this.segments.get(0).isOptional())
            throw new QueryException(613, "Sequence is not allowed to be optional");
        if (this.segments.get(0).isNegative())
            throw new QueryException(613, "Sequence is not allowed to be negative");
    }
    ;
    if (!this.isSolved) {
        if (!_solveProblematicSequence()) {
            if (this.segments.get(0).maybeExtension()) {
                throw new QueryException(613, "Sequence contains unresolvable " + "empty, optional, or negative segments");
            }
            ;
        }
        ;
    }
    ;
    // The element may be expanded
    if (this.segments.size() == 1 && this.segments.get(0).isExtended() && (this.hasConstraints() || !this.isInOrder())) {
        throw new QueryException(613, limitationError);
    }
    ;
    // Create the initial query
    SpanQuery query = null;
    int i = 0;
    // Get the first valid segment
    while (query == null && i < this.segments.size()) {
        query = this.segments.get(i).retrieveNode(this.retrieveNode).toFragmentQuery();
        i++;
    }
    ;
    // No valid segment found
    if (query == null)
        return (SpanQuery) null;
    // NextQueries
    if (!this.hasConstraints() && this.isInOrder()) {
        for (; i < this.segments.size(); i++) {
            // Get the first query for next sequence
            SpanQuery second = this.segments.get(i).retrieveNode(this.retrieveNode).toFragmentQuery();
            if (second == null)
                continue;
            query = new SpanNextQuery(query, second);
        }
        ;
        return (SpanQuery) query;
    }
    ;
    // DistanceQueries with problems
    if (this.hasConstraints() && this.isProblematic) {
        throw new QueryException(613, "Distance constraints not supported with empty, optional or negative operands");
    }
    ;
    // DistanceQueries
    if (this.constraints.size() == 1) {
        DistanceConstraint constraint = this.constraints.get(0);
        // Create spanElementDistance query
        if (!constraint.getUnit().equals("w")) {
            for (i = 1; i < this.segments.size(); i++) {
                // No support for extended spans in constraints
                if (this.segments.get(i).isExtended())
                    throw new QueryException(613, limitationError);
                /* Maybe important
                    if (this.segments.get(i).isOptional())
                        throw new QueryException(613, limitationError);
                    */
                SpanQuery sq = (SpanQuery) this.segments.get(i).retrieveNode(this.retrieveNode).toFragmentQuery();
                if (sq == null)
                    continue;
                SpanDistanceQuery sdquery = new SpanDistanceQuery(query, sq, constraint, true);
                query = (SpanQuery) sdquery;
            }
            ;
        } else // Create spanDistance query
        {
            for (i = 1; i < this.segments.size(); i++) {
                // No support for extended spans in constraints
                if (this.segments.get(i).isExtended())
                    throw new QueryException(613, limitationError);
                /* May be necessary
                    if (this.segments.get(i).isOptional())
                        throw new QueryException(613, limitationError);
                    */
                SpanQuery sq = (SpanQuery) this.segments.get(i).retrieveNode(this.retrieveNode).toFragmentQuery();
                if (sq == null)
                    continue;
                SpanDistanceQuery sdquery = new SpanDistanceQuery(query, sq, constraint, true);
                query = (SpanQuery) sdquery;
            }
            ;
        }
        ;
        return (SpanQuery) query;
    }
    ;
    // MultipleDistanceQueries
    for (i = 1; i < this.segments.size(); i++) {
        // No support for extended spans in constraints
        if (this.segments.get(i).isExtended())
            throw new QueryException(613, limitationError);
        SpanQuery sq = (SpanQuery) this.segments.get(i).retrieveNode(this.retrieveNode).toFragmentQuery();
        if (sq == null)
            continue;
        query = new SpanMultipleDistanceQuery(query, sq, this.constraints, isInOrder, true);
    }
    ;
    return (SpanQuery) query;
}
Also used : QueryException(de.ids_mannheim.korap.util.QueryException) SpanDistanceQuery(de.ids_mannheim.korap.query.SpanDistanceQuery) DistanceConstraint(de.ids_mannheim.korap.query.DistanceConstraint) DistanceConstraint(de.ids_mannheim.korap.query.DistanceConstraint) SpanMultipleDistanceQuery(de.ids_mannheim.korap.query.SpanMultipleDistanceQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery)

Example 2 with SpanMultipleDistanceQuery

use of de.ids_mannheim.korap.query.SpanMultipleDistanceQuery in project Krill by KorAP.

the class TestMultipleDistanceIndex method createQuery.

public SpanQuery createQuery(String x, String y, List<DistanceConstraint> constraints, boolean isOrdered) {
    SpanQuery sx = new SpanTermQuery(new Term("base", x));
    SpanQuery sy = new SpanTermQuery(new Term("base", y));
    return new SpanMultipleDistanceQuery(sx, sy, constraints, isOrdered, true);
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanMultipleDistanceQuery(de.ids_mannheim.korap.query.SpanMultipleDistanceQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 3 with SpanMultipleDistanceQuery

use of de.ids_mannheim.korap.query.SpanMultipleDistanceQuery in project Krill by KorAP.

the class TestRegexWildcardIndex method indexRegexWithinRewrite.

@Test
public void indexRegexWithinRewrite() throws Exception {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "affe afffe baum baumgarten steingarten franz hans haus efeu effe", "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10]" + "[(5-10)s:afffe|_1$<i>5<i>10]" + "[(11-15)s:baum|_2$<i>11<i>15]" + "[(16-26)s:baumgarten|_3$<i>16<i>26]" + "[(27-38)s:steingarten|_4$<i>27<i>38]" + "[(39-44)s:franz|_5$<i>39<i>44]" + "[(45-49)s:hans|_6$<i>45<i>49]" + "[(50-54)s:haus|_7$<i>50<i>54]" + "[(55-59)s:efeu|_8$<i>55<i>59]" + "[(60-64)s:effe|_9$<i>60<i>64]");
    ki.addDoc(fd);
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQuery sq = kq.contains(kq.seq(kq.re("s:a.*e")).append(kq.re("s:af*e")), kq.seg("s:affe")).toQuery();
    assertEquals("spanContain(spanNext(SpanMultiTermQueryWrapper(base:/s:a.*e/), SpanMultiTermQueryWrapper(base:/s:af*e/)), base:s:affe)", sq.toString());
    Krill ks = new Krill(sq);
    ks.getMeta().getContext().left.setToken(true).setLength(1);
    ks.getMeta().getContext().right.setToken(true).setLength(1);
    Result kr = ki.search(ks);
    assertEquals((long) 1, kr.getTotalResults());
    assertEquals("[[affe afffe]] baum ...", kr.getMatch(0).getSnippetBrackets());
    // Test without matches in sequence
    sq = kq.seq(kq.re("s:z.*e")).append(kq.seg("s:affe")).toQuery();
    assertEquals("spanNext(SpanMultiTermQueryWrapper(base:/s:z.*e/), base:s:affe)", sq.toString());
    kr = ki.search(new Krill(sq));
    assertEquals((long) 0, kr.getTotalResults());
    // Test without matches in segment
    sq = kq.seg().with(kq.re("s:z.*e")).with("s:affe").toQuery();
    assertEquals("spanSegment(SpanMultiTermQueryWrapper(base:/s:z.*e/), base:s:affe)", sq.toString());
    kr = ki.search(new Krill(sq));
    assertEquals((long) 0, kr.getTotalResults());
    // Test without matches in or
    sq = kq.or(kq.re("s:z.*e"), kq.seg("s:affe")).toQuery();
    assertEquals("spanOr([SpanMultiTermQueryWrapper(base:/s:z.*e/), base:s:affe])", sq.toString());
    kr = ki.search(new Krill(sq));
    assertEquals((long) 1, kr.getTotalResults());
    // Test without matches in within
    sq = kq.within(kq.re("s:z.*e"), kq.seg("s:affe")).toQuery();
    assertEquals("spanContain(SpanMultiTermQueryWrapper(base:/s:z.*e/), base:s:affe)", sq.toString());
    kr = ki.search(new Krill(sq));
    assertEquals((long) 0, kr.getTotalResults());
    // Test without matches in within (reversed)
    sq = kq.within(kq.seg("s:affe"), kq.re("s:z.*e")).toQuery();
    assertEquals("spanContain(base:s:affe, SpanMultiTermQueryWrapper(base:/s:z.*e/))", sq.toString());
    kr = ki.search(new Krill(sq));
    assertEquals((long) 0, kr.getTotalResults());
    // Test with classes
    sq = kq.nr(kq.re("s:z.*e")).toQuery();
    assertEquals("{1: SpanMultiTermQueryWrapper(base:/s:z.*e/)}", sq.toString());
    kr = ki.search(new Krill(sq));
    assertEquals((long) 0, kr.getTotalResults());
    // Test with nested classes
    sq = kq.within(kq.nr(kq.re("s:z.*e")), kq.seg("s:affe")).toQuery();
    assertEquals("spanContain({1: SpanMultiTermQueryWrapper(base:/s:z.*e/)}, base:s:affe)", sq.toString());
    kr = ki.search(new Krill(sq));
    assertEquals((long) 0, kr.getTotalResults());
    // Test with multiple distances
    List<DistanceConstraint> constraints = new ArrayList<DistanceConstraint>();
    constraints.add(TestMultipleDistanceIndex.createConstraint("w", 1, 2, true, false));
    constraints.add(TestMultipleDistanceIndex.createConstraint("tokens", "base/s:s", 0, 0, true, false));
    sq = new SpanMultipleDistanceQuery(kq.re("s:z.*e").toQuery(), kq.seg("s:affe").toQuery(), constraints, true, true);
    assertEquals("spanMultipleDistance(SpanMultiTermQueryWrapper(base:/s:z.*e/), " + "base:s:affe, [(w[1:2], ordered, notExcluded), (" + "base/s:s[0:0], ordered, notExcluded)])", sq.toString());
    kr = ki.search(new Krill(sq));
    assertEquals((long) 0, kr.getTotalResults());
    // Test with multiple distances and a class
    sq = new SpanMultipleDistanceQuery(kq.nr(kq.re("s:z.*e")).toQuery(), kq.seg("s:affe").toQuery(), constraints, true, true);
    assertEquals("spanMultipleDistance({1: SpanMultiTermQueryWrapper(base:/s:z.*e/)}, " + "base:s:affe, [(w[1:2], ordered, notExcluded), (base/s:s[0:0], ordered, notExcluded)])", sq.toString());
    kr = ki.search(new Krill(sq));
    assertEquals((long) 0, kr.getTotalResults());
}
Also used : Krill(de.ids_mannheim.korap.Krill) ArrayList(java.util.ArrayList) DistanceConstraint(de.ids_mannheim.korap.query.DistanceConstraint) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanMultipleDistanceQuery(de.ids_mannheim.korap.query.SpanMultipleDistanceQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 4 with SpanMultipleDistanceQuery

use of de.ids_mannheim.korap.query.SpanMultipleDistanceQuery in project Krill by KorAP.

the class TestWildcardIndex method testWildcardPlusWithCollection.

@Test
public void testWildcardPlusWithCollection() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc1());
    ki.commit();
    // mein+ /+w1:2,s0 &Erfahrung
    SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>(new WildcardQuery(new Term("tokens", "s:mein+")));
    // Just to make sure, Lucene internal queries treat SpanOr([]) correctly
    SpanQuery soq = new SpanNearQuery(new SpanQuery[] { mtq, sq }, 1, true);
    kr = ki.search(soq, (short) 10);
    // As described in http://korap.github.io/Koral/, '+' is not a valid wildcard
    assertEquals(0, kr.getMatches().size());
    // Check the reported classed query
    SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true);
    kr = ki.search(mdsq, (short) 10);
    assertEquals(0, kr.getMatches().size());
    // Check multiple distance query
    mdsq = new SpanMultipleDistanceQuery(mtq, sq, constraints, true, true);
    kr = ki.search(mdsq, (short) 10);
    assertEquals(0, kr.getMatches().size());
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanMultipleDistanceQuery(de.ids_mannheim.korap.query.SpanMultipleDistanceQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Example 5 with SpanMultipleDistanceQuery

use of de.ids_mannheim.korap.query.SpanMultipleDistanceQuery in project Krill by KorAP.

the class TestWildcardIndex method testWildcardQuestionMark1.

@Test
public void testWildcardQuestionMark1() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc1());
    ki.commit();
    // Wildcard ? means regex . (expects exactly one character)
    SpanMultiTermQueryWrapper<WildcardQuery> mtq = new SpanMultiTermQueryWrapper<WildcardQuery>(new WildcardQuery(new Term("tokens", "s:meine?")));
    SpanMultipleDistanceQuery mdsq = new SpanMultipleDistanceQuery(new SpanClassQuery(mtq, (byte) 129), new SpanClassQuery(sq, (byte) 129), constraints, true, true);
    kr = ki.search(mdsq, (short) 10);
    assertEquals(3, kr.getMatches().size());
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanMultipleDistanceQuery(de.ids_mannheim.korap.query.SpanMultipleDistanceQuery) Test(org.junit.Test)

Aggregations

SpanMultipleDistanceQuery (de.ids_mannheim.korap.query.SpanMultipleDistanceQuery)14 Term (org.apache.lucene.index.Term)12 Test (org.junit.Test)12 SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)11 KrillIndex (de.ids_mannheim.korap.KrillIndex)10 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)10 SpanQuery (org.apache.lucene.search.spans.SpanQuery)8 WildcardQuery (org.apache.lucene.search.WildcardQuery)7 RegexpQuery (org.apache.lucene.search.RegexpQuery)4 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)3 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)2 ArrayList (java.util.ArrayList)2 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)2 Krill (de.ids_mannheim.korap.Krill)1 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)1 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)1 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)1 Result (de.ids_mannheim.korap.response.Result)1 QueryException (de.ids_mannheim.korap.util.QueryException)1 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)1