Search in sources :

Example 6 with SpanClassQuery

use of de.ids_mannheim.korap.query.SpanClassQuery in project Krill by KorAP.

the class TestMatchIndex method indexExampleFocusWithSkip.

@Ignore
public void indexExampleFocusWithSkip() throws IOException {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "abcabcabac", // The payload should be ignored
    // |<>:p#0-10<i>9]" +
    "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>10<i>9]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    // contains(<p>, focus(3: contains({2:<s>}, {3:a})))
    fd.addTV("base", "acabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>9<i>8]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:a|i:a|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:b|i:b|_3$<i>3<i>4]" + "[(4-5)s:c|i:c|_4$<i>4<i>5]" + "[(5-6)s:a|i:a|_5$<i>5<i>6]" + "[(6-7)s:b|i:b|_6$<i>6<i>7]" + "[(7-8)s:a|i:a|_7$<i>7<i>8]" + "[(8-9)s:c|i:c|_8$<i>8<i>9]");
    ki.addDoc(fd);
    ki.commit();
    SpanQuery sq;
    Result kr;
    KrillCollection kc = new KrillCollection(ki);
    assertEquals("Documents", 4, kc.numberOf("documents"));
    // within(<p>, focus(3:within({2:<s>}, {3:a})))
    sq = new SpanWithinQuery(new SpanElementQuery("base", "p"), new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 3)), (byte) 3));
    // fail("Skipping may go horribly wrong! (Known issue)");
    Krill ks = new Krill(sq);
    ks.getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    kr = ks.apply(ki);
    // kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
    assertEquals(kr.getSerialQuery(), "spanContain(<base:p />, focus(3: spanContain({2: <base:s />}, {3: base:s:a})))");
    assertEquals(12, kr.getTotalResults());
    assertEquals("[a{2:bc{3:a}b}cabac]", kr.getMatch(0).getSnippetBrackets());
    assertEquals("[ab{2:c{3:a}bcab}ac]", kr.getMatch(1).getSnippetBrackets());
    assertEquals("[ab{2:cabc{3:a}}bac]", kr.getMatch(2).getSnippetBrackets());
}
Also used : SearchContext(de.ids_mannheim.korap.response.SearchContext) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Krill(de.ids_mannheim.korap.Krill) SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) SpanFocusQuery(de.ids_mannheim.korap.query.SpanFocusQuery) KrillCollection(de.ids_mannheim.korap.KrillCollection) Ignore(org.junit.Ignore)

Example 7 with SpanClassQuery

use of de.ids_mannheim.korap.query.SpanClassQuery in project Krill by KorAP.

the class TestMatchIndex method testEmbeddedClassQuery.

@Test
public void testEmbeddedClassQuery() throws IOException {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    ki.commit();
    SpanQuery sq;
    Result kr;
    sq = new SpanFocusQuery(new SpanClassQuery(new SpanNextQuery(new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 1), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:c")), (byte) 2)), (byte) 3), (byte) 3);
    kr = ki.search(sq, (short) 10);
    assertEquals("totalResults", kr.getTotalResults(), 2);
    assertEquals("StartPos (0)", 1, kr.getMatch(0).startPos);
    assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
    assertEquals("SnippetBrackets (0)", "a[[{3:{1:b}{2:c}}]]abcaba ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("StartPos (1)", 4, kr.getMatch(1).startPos);
    assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
    assertEquals("SnippetBrackets (1)", "abca[[{3:{1:b}{2:c}}]]abac", kr.getMatch(1).getSnippetBrackets());
    assertEquals("Document count", 1, ki.numberOf("base", "documents"));
    assertEquals("Token count", 10, ki.numberOf("base", "t"));
}
Also used : SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanFocusQuery(de.ids_mannheim.korap.query.SpanFocusQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Example 8 with SpanClassQuery

use of de.ids_mannheim.korap.query.SpanClassQuery in project Krill by KorAP.

the class SpanQueryWrapper method toQuery.

/**
 * Serialize the wrapped query and return a SpanQuery.
 * This will be the final query and may be rewritten.
 *
 * @return A {@link SpanQuery} object.
 * @throws QueryException
 */
public SpanQuery toQuery() throws QueryException {
    if (this.isNull() || this.isEmpty()) {
        return null;
    }
    ;
    // Wrap the query in a <base/s=t>, if it's extended to the right
    if (this.isExtendedToTheRight()) {
        return new SpanFocusQuery(new SpanWithinQuery("base/s:t", new SpanClassQuery(this.toFragmentQuery(), (byte) 254)), (byte) 254);
    }
    ;
    SpanQuery sq = this.toFragmentQuery();
    log.info(sq.toString());
    return sq;
}
Also used : SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) SpanFocusQuery(de.ids_mannheim.korap.query.SpanFocusQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 9 with SpanClassQuery

use of de.ids_mannheim.korap.query.SpanClassQuery in project Krill by KorAP.

the class TestNextIndex method indexExample6.

/**
 * Skip to NextSpan
 */
@Test
public void indexExample6() throws IOException {
    KrillIndex ki = new KrillIndex();
    ki.addDoc(createFieldDoc1());
    ki.addDoc(createFieldDoc2());
    ki.addDoc(createFieldDoc3());
    ki.commit();
    SpanQuery sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:c")), new SpanNextQuery(new SpanTermQuery(new Term("base", "s:d")), new SpanTermQuery(new Term("base", "s:b"))));
    Result kr = ki.search(sq, (short) 10);
    assertEquals("totalResults", kr.getTotalResults(), 1);
    assertEquals("doc-number", 2, kr.getMatch(0).getLocalDocID());
    assertEquals("StartPos", 0, kr.getMatch(0).startPos);
    assertEquals("EndPos", 3, kr.getMatch(0).endPos);
    sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:c")), new SpanNextQuery(new SpanFocusQuery(new SpanClassQuery(new SpanTermQuery(new Term("base", "s:d")), (byte) 1), (byte) 1), new SpanFocusQuery(new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 2), (byte) 2)));
    kr = ki.search(sq, (short) 10);
    assertEquals("doc-number", 2, kr.getMatch(0).getLocalDocID());
    assertEquals("StartPos", 0, kr.getMatch(0).startPos);
    assertEquals("EndPos", 3, kr.getMatch(0).endPos);
// for (Match km : kr.getMatches()) {
// System.out.println(km.getStartPos() + "," + km.getEndPos()
// + " "
// + km.getSnippetBrackets());
// }
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanFocusQuery(de.ids_mannheim.korap.query.SpanFocusQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 10 with SpanClassQuery

use of de.ids_mannheim.korap.query.SpanClassQuery in project Krill by KorAP.

the class TestWithinIndex method indexExample3Offsets.

@Test
public void indexExample3Offsets() throws IOException {
    KrillIndex ki = new KrillIndex();
    // Er schrie: <s>"Das war ich!"</s>
    FieldDocument fd = new FieldDocument();
    fd = new FieldDocument();
    fd.addTV("base", "Er schrie: \"Das war ich!\" und ging.", "[(0-2)s:Er|_0$<i>0<i>3]" + "[(3-9)s:schrie|_1$<i>3<i>9]" + "[(12-15)s:Das|_2$<i>12<i>15|<>:sentence$<b>64<i>11<i>25<i>5<b>0]" + "[(16-19)s:war|_3$<i>16<i>19]" + "[(20-23)s:ich|_4$<i>20<i>23]" + "[(26-29)s:und|_5$<i>26<i>29]" + "[(30-34)s:ging|_6$<i>30<i>34]");
    ki.addDoc(fd);
    // Save documents
    ki.commit();
    SpanQuery sq = new SpanClassQuery(new SpanElementQuery("base", "sentence"), (byte) 3);
    Result kr;
    kr = ki.search(sq, 0, (short) 15, true, (short) 1, true, (short) 1);
    assertEquals("totalResults", kr.getTotalResults(), 1);
    assertEquals("... schrie: [[\"{3:Das war ich}!\"]] und ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("<span class=\"context-left\"><span class=\"more\"></span>schrie: </span><span class=\"match\"><mark>&quot;<mark class=\"class-3 level-0\">Das war ich</mark>!&quot;</mark></span><span class=\"context-right\"> und<span class=\"more\"></span></span>", kr.getMatch(0).getSnippetHTML());
    kr = ki.search(sq, 0, (short) 15, true, (short) 0, true, (short) 0);
    assertEquals("... [[\"{3:Das war ich}!\"]] ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("totalResults", kr.getTotalResults(), 1);
    kr = ki.search(sq, 0, (short) 15, true, (short) 6, true, (short) 6);
    assertEquals("Er schrie: [[\"{3:Das war ich}!\"]] und ging.", kr.getMatch(0).getSnippetBrackets());
    assertEquals("totalResults", kr.getTotalResults(), 1);
    kr = ki.search(sq, 0, (short) 15, true, (short) 2, true, (short) 2);
    assertEquals("Er schrie: [[\"{3:Das war ich}!\"]] und ging ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("totalResults", kr.getTotalResults(), 1);
    sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery("base", "sentence"), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:Das")), (byte) 2)), (byte) 1);
    kr = ki.search(sq, (short) 15);
    assertEquals("Er schrie: [[\"{1:{2:Das} war ich}!\"]] und ging.", kr.getMatch(0).getSnippetBrackets());
    assertEquals("totalResults", kr.getTotalResults(), 1);
    sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery("base", "sentence"), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:war")), (byte) 2)), (byte) 1);
    kr = ki.search(sq, (short) 15);
    assertEquals("Er schrie: [[\"{1:Das {2:war} ich}!\"]] und ging.", kr.getMatch(0).getSnippetBrackets());
    assertEquals("totalResults", kr.getTotalResults(), 1);
    sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery("base", "sentence"), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:ich")), (byte) 2)), (byte) 1);
    kr = ki.search(sq, (short) 15);
    assertEquals("Er schrie: [[\"{1:Das war {2:ich}}!\"]] und ging.", kr.getMatch(0).getSnippetBrackets());
    assertEquals("totalResults", kr.getTotalResults(), 1);
    sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery("base", "sentence"), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:und")), (byte) 2)), (byte) 1);
    kr = ki.search(sq, (short) 15);
    assertEquals("totalResults", kr.getTotalResults(), 0);
    sq = new SpanClassQuery(new SpanWithinQuery(new SpanElementQuery("base", "sentence"), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:schrie")), (byte) 2)), (byte) 1);
    kr = ki.search(sq, (short) 15);
    assertEquals("totalResults", kr.getTotalResults(), 0);
}
Also used : SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)32 Test (org.junit.Test)30 Term (org.apache.lucene.index.Term)25 KrillIndex (de.ids_mannheim.korap.KrillIndex)24 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)16 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)15 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)14 SpanQuery (org.apache.lucene.search.spans.SpanQuery)14 SpanMultipleDistanceQuery (de.ids_mannheim.korap.query.SpanMultipleDistanceQuery)10 SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)10 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)9 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)8 Result (de.ids_mannheim.korap.response.Result)8 SpanRelationQuery (de.ids_mannheim.korap.query.SpanRelationQuery)7 SpanClassFilterQuery (de.ids_mannheim.korap.query.SpanClassFilterQuery)6 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)6 SpanRelationMatchQuery (de.ids_mannheim.korap.query.SpanRelationMatchQuery)6 WildcardQuery (org.apache.lucene.search.WildcardQuery)6 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)4 RegexpQuery (org.apache.lucene.search.RegexpQuery)4