Search in sources :

Example 36 with SpanElementQuery

use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.

the class TestElementIndex method indexExample3.

@Test
public void indexExample3() throws IOException {
    KrillIndex ki = new KrillIndex();
    // <a><a><a>u</a></a></a>
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "xyz", "[(0-3)s:xyz|<>:a$<b>64<i>0<i>3<i>0<b>0|" + "<>:a$<b>64<i>0<i>3<i>0<b>0|" + "<>:a$<b>64<i>0<i>3<i>0<b>0|<>:b$<b>64<i>0<i>3<i>0<b>0]");
    ki.addDoc(fd);
    // <a><b>x<a>y<a>zcde</a>cde</a>cde</b></a>
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  c  d  e  c  d  e  c  d  e  ", "[(0-3)s:x|<>:a$<b>64<i>0<i>36<i>12<b>0|<>:b$<b>64<i>0<i>36<i>12<b>0]" + "[(3-6)s:y|<>:a$<b>64<i>3<i>27<i>9<b>0]" + "[(6-9)s:z|<>:a$<b>64<i>6<i>18<i>6]" + "[(9-12)s:c<b>0]" + "[(12-15)s:d]" + "[(15-18)s:e]" + "[(18-21)s:c]" + "[(21-24)s:d]" + "[(24-27)s:e]" + "[(27-30)s:c]" + "[(30-33)s:d]" + "[(33-36)s:e]");
    ki.addDoc(fd);
    // xyz
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  ", "[(0-3)s:x]" + "[(3-6)s:y]" + "[(6-9)s:z]");
    ki.addDoc(fd);
    // <a>x<a><b>y<a>zcde</a>cde</b></a>cde</a>
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  k  l  m  k  l  m  k  l  m  ", "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0|<>:b$<b>64<i>3<i>6<i>9<b>0]" + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6<b>0]" + "[(9-12)s:k<b>0]" + "[(12-15)s:l]" + "[(15-18)s:m]" + "[(18-21)s:k]" + "[(21-24)s:l]" + "[(24-27)s:m]" + "[(27-30)s:k]" + "[(30-33)s:l]" + "[(33-36)s:m]");
    ki.addDoc(fd);
    // <a><a><a>h</a>hhij</a>hij</a>hij</a>
    fd = new FieldDocument();
    fd.addTV("base", "h  h        i  j  h  i  j  h  i  j  ", "[(0-3)s:h|" + "<>:a$<b>64<i>0<i>18<i>3<b>0|" + "<>:a$<b>64<i>0<i>27<i>6<b>0|" + "<>:a$<b>64<i>0<i>36<i>9<b>0]" + "[(3-6)s:h]" + "[(12-15)s:i]" + "[(15-18)s:j]" + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]");
    ki.addDoc(fd);
    // xyz
    fd = new FieldDocument();
    fd.addTV("base", "a  b  c  ", "[(0-3)s:a]" + "[(3-6)s:b]" + "[(6-9)s:c]");
    ki.addDoc(fd);
    // Save documents
    ki.commit();
    assertEquals(6, ki.numberOf("documents"));
    SpanQuery sq = new SpanElementQuery("base", "a");
    Result kr = ki.search(sq, (short) 15);
    // System.err.println(kr.toJSON());
    assertEquals("totalResults", kr.getTotalResults(), 12);
    assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
    assertEquals("EndPos (0)", 0, kr.getMatch(0).endPos);
    assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
    assertEquals("EndPos (1)", 0, kr.getMatch(1).endPos);
    assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
    assertEquals("EndPos (2)", 0, kr.getMatch(2).endPos);
    assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
    assertEquals("EndPos (3)", 12, kr.getMatch(3).endPos);
    assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos);
    assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos);
    assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos);
    assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos);
    assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
    assertEquals("EndPos (6)", 12, kr.getMatch(6).endPos);
    assertEquals("StartPos (7)", 1, kr.getMatch(7).startPos);
    assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos);
    assertEquals("StartPos (8)", 2, kr.getMatch(8).startPos);
    assertEquals("EndPos (8)", 6, kr.getMatch(8).endPos);
    assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos);
    assertEquals("EndPos (9)", 3, kr.getMatch(9).endPos);
    assertEquals("StartPos (10)", 0, kr.getMatch(10).startPos);
    assertEquals("EndPos (10)", 6, kr.getMatch(10).endPos);
    assertEquals("StartPos (11)", 0, kr.getMatch(11).startPos);
    assertEquals("EndPos (11)", 9, kr.getMatch(11).endPos);
}
Also used : KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 37 with SpanElementQuery

use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.

the class TestElementIndex method indexExample2.

@Test
public void indexExample2() throws IOException {
    KrillIndex ki = new KrillIndex();
    // <a><a><a>h</a>hhij</a>hij</a>hij</a>
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "h  h        i  j   h  i  j  h  i  j  ", "[(0-3)s:h|" + "<>:a$<b>64<i>0<i>18<i>3<b>0|" + "<>:a$<b>64<i>0<i>27<i>6<b>0|" + "<>:a$<b>64<i>0<i>36<i>9]" + "[(3-6)s:h]" + "[(12-15)s:i<b>0]" + "[(15-18)s:j]" + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]");
    ki.addDoc(fd);
    // Save documents
    ki.commit();
    assertEquals(1, ki.numberOf("documents"));
    SpanQuery sq = new SpanElementQuery("base", "a");
    Result kr = ki.search(sq, (short) 10);
    assertEquals("totalResults", kr.getTotalResults(), 3);
    assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
    assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
    assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
    assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
    assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
    assertEquals("EndPos (2)", 9, kr.getMatch(2).endPos);
}
Also used : KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 38 with SpanElementQuery

use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.

the class TestElementIndex method indexExample1.

// Todo: primary data as a non-indexed field separated.
@Test
public void indexExample1() throws IOException {
    KrillIndex ki = new KrillIndex();
    // <a>x<a>y<a>zhij</a>hij</a>hij</a>hij</a>
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "x  y  z  h  i  j  h  i  j  h  i  j  ", "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]" + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6]" + "[(9-12)s:h<b>0]" + "[(12-15)s:i]" + "[(15-18)s:j]" + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]");
    ki.addDoc(fd);
    // <a>x<a>y<a>zcde</a>cde</a>cde</a>cde</a>
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  c  d  e  c  d  e  c  d  e  ", "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]" + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6]" + "[(9-12)s:c<b>0]" + "[(12-15)s:d]" + "[(15-18)s:e]" + "[(18-21)s:c]" + "[(21-24)s:d]" + "[(24-27)s:e]" + "[(27-30)s:c]" + "[(30-33)s:d]" + "[(33-36)s:e]");
    ki.addDoc(fd);
    // Save documents
    ki.commit();
    assertEquals(2, ki.numberOf("documents"));
    SpanQuery sq = new SpanElementQuery("base", "a");
    Result kr = ki.search(sq, (short) 10);
    assertEquals("totalResults", kr.getTotalResults(), 6);
    assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
    assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos);
    assertEquals("StartPos (1)", 1, kr.getMatch(1).startPos);
    assertEquals("EndPos (1)", 9, kr.getMatch(1).endPos);
    assertEquals("StartPos (2)", 2, kr.getMatch(2).startPos);
    assertEquals("EndPos (2)", 6, kr.getMatch(2).endPos);
    assertEquals("StartPos (0)", 0, kr.getMatch(3).startPos);
    assertEquals("EndPos (0)", 12, kr.getMatch(3).endPos);
    assertEquals("StartPos (1)", 1, kr.getMatch(4).startPos);
    assertEquals("EndPos (1)", 9, kr.getMatch(4).endPos);
    assertEquals("StartPos (2)", 2, kr.getMatch(5).startPos);
    assertEquals("EndPos (2)", 6, kr.getMatch(5).endPos);
// System.err.println(kr.toJSON());
}
Also used : KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 39 with SpanElementQuery

use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.

the class Match method expandContextToSpan.

// Expand the context to a span
// THIS IS NOT VERY CLEVER - MAKE IT MORE CLEVER!
public int[] expandContextToSpan(LeafReaderContext atomic, Bits bitset, String field, String element) {
    try {
        // Store character offsets in ByteBuffer
        ByteBuffer bb = ByteBuffer.allocate(24);
        SpanElementQuery cquery = new SpanElementQuery(field, element);
        Spans contextSpans = cquery.getSpans(atomic, bitset, new HashMap<Term, TermContext>());
        int newStart = -1, newEnd = -1;
        int newStartChar = -1, newEndChar = -1;
        if (DEBUG)
            log.trace("Extend match to context boundary with {} in docID {}", cquery.toString(), this.localDocID);
        while (true) {
            // Game over
            if (contextSpans.next() != true)
                break;
            if (contextSpans.doc() != this.localDocID) {
                contextSpans.skipTo(this.localDocID);
                if (contextSpans.doc() != this.localDocID)
                    break;
            }
            ;
            // if it's closer to the match than everything before
            if (contextSpans.start() <= this.getStartPos() && contextSpans.end() >= this.getStartPos()) {
                // Set as newStart
                newStart = contextSpans.start() > newStart ? contextSpans.start() : newStart;
                if (DEBUG)
                    log.trace("NewStart is at {}", newStart);
                // Get character offset (start)
                if (contextSpans.isPayloadAvailable()) {
                    try {
                        bb.rewind();
                        for (byte[] b : contextSpans.getPayload()) {
                            // Not an element span
                            if (b[0] != (byte) 64)
                                continue;
                            bb.rewind();
                            bb.put(b);
                            bb.position(1);
                            newStartChar = bb.getInt();
                            newEndChar = bb.getInt();
                            break;
                        }
                        ;
                    } catch (Exception e) {
                        log.warn("Some problems with ByteBuffer: {}", e.getMessage());
                    }
                    ;
                }
                ;
            } else {
                // Has to be resettet to avoid multiple readings of the payload
                newEndChar = 0;
            }
            ;
            // There's an s found, that ends after the match
            if (contextSpans.end() >= this.getEndPos()) {
                newEnd = contextSpans.end();
                // Get character offset (end)
                if (newEndChar == 0 && contextSpans.isPayloadAvailable()) {
                    try {
                        bb.rewind();
                        for (byte[] b : contextSpans.getPayload()) {
                            // Not an element span
                            if (b[0] != (byte) 64)
                                continue;
                            bb.rewind();
                            bb.put(b);
                            bb.position(1);
                            newEndChar = bb.getInt(1);
                            break;
                        }
                        ;
                    } catch (Exception e) {
                        log.warn(e.getMessage());
                    }
                    ;
                }
                ;
                break;
            }
            ;
        }
        ;
        // We have a new match surrounding
        if (DEBUG)
            log.trace("New match spans from {}-{}/{}-{}", newStart, newEnd, newStartChar, newEndChar);
        return new int[] { newStart, newEnd, newStartChar, newEndChar };
    } catch (IOException e) {
        log.error(e.getMessage());
    }
    ;
    return new int[] { -1, -1, -1, -1 };
}
Also used : Term(org.apache.lucene.index.Term) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) TermContext(org.apache.lucene.index.TermContext) IOException(java.io.IOException) Spans(org.apache.lucene.search.spans.Spans)

Example 40 with SpanElementQuery

use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.

the class TestNextIndex method indexExample3.

@Test
public void indexExample3() throws IOException {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:a|i:a|_3$<i>3<i>4|<>:x$<b>64<i>3<i>7<i>7<b>0]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    ki.commit();
    SpanQuery sq;
    Result kr;
    sq = new SpanNextQuery(new SpanElementQuery("base", "x"), new SpanTermQuery(new Term("base", "s:b")));
    kr = ki.search(sq, (short) 10);
    assertEquals("abc[[abcab]]ac", kr.getMatch(0).getSnippetBrackets());
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Aggregations

SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)57 Test (org.junit.Test)53 KrillIndex (de.ids_mannheim.korap.KrillIndex)37 Term (org.apache.lucene.index.Term)36 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)35 SpanQuery (org.apache.lucene.search.spans.SpanQuery)32 Result (de.ids_mannheim.korap.response.Result)26 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)16 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)16 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)11 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)11 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)10 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)9 SpanRelationQuery (de.ids_mannheim.korap.query.SpanRelationQuery)8 SpanAttributeQuery (de.ids_mannheim.korap.query.SpanAttributeQuery)6 SpanClassFilterQuery (de.ids_mannheim.korap.query.SpanClassFilterQuery)6 SpanRelationMatchQuery (de.ids_mannheim.korap.query.SpanRelationMatchQuery)6 SpanWithAttributeQuery (de.ids_mannheim.korap.query.SpanWithAttributeQuery)6 SpanSegmentQuery (de.ids_mannheim.korap.query.SpanSegmentQuery)4 ArrayList (java.util.ArrayList)4