Search in sources :

Example 1 with SpanExpansionQuery

use of de.ids_mannheim.korap.query.SpanExpansionQuery in project Krill by KorAP.

the class SpanSequenceQueryWrapper method _merge.

// Todo: Deal with negative, empty and optional!
// [base=der][base!=Baum]?
private SpanQueryWrapper _merge(SpanQueryWrapper anchor, SpanQueryWrapper problem, boolean mergeLeft) throws QueryException {
    // Extend to the right - merge to the left
    int direction = mergeLeft ? 1 : -1;
    if (DEBUG)
        log.trace("Will merge two spans to {}", mergeLeft ? "left" : "right");
    // Make empty extension to anchor
    if (problem.isEmpty()) {
        SpanQuery query;
        if (DEBUG)
            log.trace("Problem is empty with class {}", problem.getClassNumber());
        // Merge extensions!
        if (!problem.hasClass() && !anchor.hasClass() && anchor.isExtended()) {
            if (DEBUG)
                log.trace("It may be possible to extend anchor with problem");
            if (// Further extend to the right ...
            (direction >= 0 && anchor.isExtendedToTheRight) || // or the left
            (direction < 0 && !anchor.isExtendedToTheRight)) {
                if (DEBUG)
                    log.trace("Readjust min and max");
                // Readjust the anchor
                anchor.setMin(anchor.getMin() + problem.getMin());
                anchor.setMax(anchor.getMax() + problem.getMax());
                /*
                     * This is wrong - min is only relevant for extensions
                    if (anchor.getMin() > 0)
                    	anchor.isOptional = false;
                    */
                return anchor;
            }
            ;
        }
        ;
        // Can't merge extensions
        SpanQueryWrapper sqw = new SpanExpansionQueryWrapper(anchor, problem.isOptional() ? 0 : problem.getMin(), problem.getMax(), direction, problem.hasClass() ? problem.getClassNumber() : (byte) 0).isExtended(true);
        // Set right extension
        if (direction >= 0)
            sqw.isExtendedToTheRight(true);
        return sqw;
    } else // make negative extension to anchor
    if (problem.isNegative()) {
        SpanQuery query;
        if (DEBUG)
            log.trace("Problem is negative with class {}", problem.getClassNumber());
        // TODO: Should probably wrapped as well!
        // A sequence of negative tokens may expand jointly!
        query = new SpanExpansionQuery(anchor.retrieveNode(this.retrieveNode).toFragmentQuery(), problem.retrieveNode(this.retrieveNode).toFragmentQuery(), problem.getMin(), problem.getMax(), direction, problem.hasClass() ? problem.getClassNumber() : (byte) 0, true);
        SpanQueryWrapper sqw = new SpanSimpleQueryWrapper(query).isExtended(true);
        // Set right extension
        if (direction >= 0)
            sqw.isExtendedToTheRight(true);
        return sqw;
    }
    ;
    if (DEBUG)
        log.trace("Problem is optional");
    // [base=der][][base=Baum]?
    // [base=der][base=baum]?
    // [base=der]
    SpanAlterQueryWrapper saqw = new SpanAlterQueryWrapper(this.field, anchor);
    // [base=der]
    SpanSequenceQueryWrapper ssqw = new SpanSequenceQueryWrapper(this.field, anchor);
    // [base=der][base=baum]
    if (mergeLeft) {
        ssqw.append(problem.isOptional(false));
    } else // [base=baum][base=der]
    {
        ssqw.prepend(problem.isOptional(false));
    }
    saqw.or(ssqw);
    return (SpanQueryWrapper) saqw;
}
Also used : SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) DistanceConstraint(de.ids_mannheim.korap.query.DistanceConstraint) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 2 with SpanExpansionQuery

use of de.ids_mannheim.korap.query.SpanExpansionQuery in project Krill by KorAP.

the class TestSpanExpansionIndex method testCase5.

/**
 * Expansion over start and end documents start => cut to 0
 * TODO: end => to be handled in rendering process
 *
 * @throws IOException
 */
@Test
public void testCase5() throws IOException {
    KrillIndex ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.commit();
    SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:e"));
    // left expansion precedes 0
    SpanExpansionQuery seq = new SpanExpansionQuery(stq, 2, 2, -1, true);
    kr = ki.search(seq, (short) 10);
    assertEquals((long) 4, kr.getTotalResults());
    assertEquals(0, kr.getMatch(0).getStartPos());
    assertEquals(2, kr.getMatch(0).getEndPos());
    // right expansion exceeds end position
    seq = new SpanExpansionQuery(stq, 3, 3, 0, true);
    kr = ki.search(seq, (short) 10);
    assertEquals((long) 4, kr.getTotalResults());
    assertEquals(7, kr.getMatch(2).getStartPos());
    assertEquals(11, kr.getMatch(2).getEndPos());
    assertEquals(8, kr.getMatch(3).getStartPos());
    assertEquals(12, kr.getMatch(3).getEndPos());
/*
         * for (Match km : kr.getMatches()){
         * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
         * //+km.getSnippetBrackets() ); }
         */
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Example 3 with SpanExpansionQuery

use of de.ids_mannheim.korap.query.SpanExpansionQuery in project Krill by KorAP.

the class TestSpanExpansionIndex method testCase2.

/**
 * Classnumber
 * Check the expansion offsets
 */
@Test
public void testCase2() {
    byte classNumber = 1;
    SpanExpansionQuery sq;
    // create new payload for the expansion offsets
    SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "s:des"));
    sq = new SpanExpansionQuery(stq, 0, 2, -1, classNumber, true);
    kr = ki.search(sq, (short) 10);
    assertEquals(5, kr.getMatch(0).getStartPos());
    assertEquals(8, kr.getMatch(0).getEndPos());
    // expansion 5,7
    assertEquals(5, kr.getMatch(0).getStartPos(1));
    assertEquals(7, kr.getMatch(0).getEndPos(1));
    // expansion offsets
    assertEquals(6, kr.getMatch(1).getStartPos(1));
    assertEquals(7, kr.getMatch(1).getEndPos(1));
    assertEquals(7, kr.getMatch(2).getStartPos(1));
    assertEquals(7, kr.getMatch(2).getEndPos(1));
    assertEquals(154, kr.getMatch(3).getStartPos(1));
    assertEquals(156, kr.getMatch(3).getEndPos(1));
    /*
         * for (Match km : kr.getMatches()){
         * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
         * +km.getSnippetBrackets()); }
         */
    // add expansion offsets to the existing payload
    SpanElementQuery seq = new SpanElementQuery("tokens", "base/s:s");
    sq = new SpanExpansionQuery(seq, 1, 2, 0, classNumber, true);
    kr = ki.search(sq, (short) 10);
    assertEquals(13, kr.getMatch(0).getStartPos());
    assertEquals(26, kr.getMatch(0).getEndPos());
    assertEquals(13, kr.getMatch(1).getStartPos());
    assertEquals(27, kr.getMatch(1).getEndPos());
    assertEquals(25, kr.getMatch(2).getStartPos());
    assertEquals(35, kr.getMatch(2).getEndPos());
    assertEquals(34, kr.getMatch(2).getStartPos(1));
    assertEquals(35, kr.getMatch(2).getEndPos(1));
    assertEquals(25, kr.getMatch(3).getStartPos());
    assertEquals(36, kr.getMatch(3).getEndPos());
    assertEquals(34, kr.getMatch(3).getStartPos(1));
    assertEquals(36, kr.getMatch(3).getEndPos(1));
/*
         * for (Match km : kr.getMatches()){
         * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
         * +km.getSnippetBrackets()); }
         */
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) Test(org.junit.Test)

Example 4 with SpanExpansionQuery

use of de.ids_mannheim.korap.query.SpanExpansionQuery in project Krill by KorAP.

the class TestSpanExpansionIndex method testCase6.

/**
 * Expansion exclusion : multiple documents
 *
 * @throws IOException
 */
@Test
public void testCase6() throws IOException {
    KrillIndex ki = new KrillIndex();
    // same doc
    ki.addDoc(createFieldDoc0());
    // only not clause
    ki.addDoc(createFieldDoc1());
    // only main clause
    ki.addDoc(createFieldDoc2());
    ki.commit();
    SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:e"));
    SpanTermQuery notQuery = new SpanTermQuery(new Term("base", "s:d"));
    SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 2, 3, 0, true);
    kr = ki.search(seq, (short) 20);
    // notClause.doc() > firstSpans.doc()
    assertEquals(7, kr.getMatch(0).getStartPos());
    assertEquals(10, kr.getMatch(0).getEndPos());
    assertEquals(7, kr.getMatch(1).getStartPos());
    assertEquals(11, kr.getMatch(1).getEndPos());
    // !hasMoreNotClause
    assertEquals(2, kr.getMatch(4).getLocalDocID());
    assertEquals(1, kr.getMatch(4).getStartPos());
    assertEquals(4, kr.getMatch(4).getEndPos());
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Example 5 with SpanExpansionQuery

use of de.ids_mannheim.korap.query.SpanExpansionQuery in project Krill by KorAP.

the class TestSpanExpansionIndex method testCase3.

/**
 * Right expansion with exclusion
 */
@Test
public void testCase3() throws IOException {
    byte classNumber = 1;
    SpanTermQuery stq = new SpanTermQuery(new Term("tokens", "tt/p:NN"));
    SpanTermQuery notQuery = new SpanTermQuery(new Term("tokens", "s:Buchstabe"));
    SpanExpansionQuery seq = new SpanExpansionQuery(stq, notQuery, 2, 3, 0, classNumber, true);
    kr = ki.search(seq, (short) 20);
    assertEquals(6, kr.getMatch(0).getStartPos());
    assertEquals(9, kr.getMatch(0).getEndPos());
    assertEquals(7, kr.getMatch(0).getStartPos(1));
    assertEquals(9, kr.getMatch(0).getEndPos(1));
    assertEquals(9, kr.getMatch(2).getStartPos());
    assertEquals(12, kr.getMatch(2).getEndPos());
    assertEquals(9, kr.getMatch(3).getStartPos());
    assertEquals(13, kr.getMatch(3).getEndPos());
    assertEquals(10, kr.getMatch(3).getStartPos(1));
    assertEquals(13, kr.getMatch(3).getEndPos(1));
/*
         * for (Match km : kr.getMatches()){
         * System.out.println(km.getStartPos() +","+km.getEndPos()+" "
         * +km.getSnippetBrackets()); }
         */
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanExpansionQuery(de.ids_mannheim.korap.query.SpanExpansionQuery) Term(org.apache.lucene.index.Term) Test(org.junit.Test)

Aggregations

SpanExpansionQuery (de.ids_mannheim.korap.query.SpanExpansionQuery)9 Term (org.apache.lucene.index.Term)8 Test (org.junit.Test)8 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)7 KrillIndex (de.ids_mannheim.korap.KrillIndex)4 RegexpQuery (org.apache.lucene.search.RegexpQuery)2 SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)2 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)1 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)1 SpanRepetitionQuery (de.ids_mannheim.korap.query.SpanRepetitionQuery)1 SpanQuery (org.apache.lucene.search.spans.SpanQuery)1