Search in sources :

Example 6 with SpanElementQuery

use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.

the class TestElementIndex method indexExample4.

@Test
public void indexExample4() throws IOException {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "111111ccc222222fff333333iiijjj", "[(0-3)s:a|_0$<i>0<i>3]" + "[(3-6)s:b|_1$<i>3<i>6]" + "[(6-9)s:c|_2$<i>6<i>9]" + "[(9-12)s:d|_3$<i>9<i>12|<>:a$<b>64<i>9<i>15<i>4<b>0]" + "[(12-15)s:e|_4$<i>12<i>15]" + "[(15-18)s:f|_5$<i>15<i>18]" + "[(18-21)s:g|_6$<i>18<i>21|<>:a$<b>64<i>18<i>24<i>8<b>0]" + "[(21-24)s:h|_7$<i>21<i>24]" + "[(24-27)s:i|_8$<i>24<i>27]" + "[(27-30)s:j|_9$<i>27<i>30]");
    ki.addDoc(fd);
    // Save documents
    ki.commit();
    assertEquals(1, ki.numberOf("documents"));
    SpanQuery sq = new SpanElementQuery("base", "a");
    Result kr = ki.search(sq, 0, (short) 15, false, (short) 3, false, (short) 3);
    assertEquals("... ccc[[222222]]fff ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... fff[[333333]]iii ...", kr.getMatch(1).getSnippetBrackets());
}
Also used : KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 7 with SpanElementQuery

use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.

the class TestElementIndex method indexExample6.

@Test
public void indexExample6() throws IOException {
    KrillIndex ki = new KrillIndex();
    // <a>x<a>y<a>zhij</a>hij</a>hij</a>
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "x  y  z  h  i  j  h  i  j  h  i  j  ", // 1
    "[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 2
    "[(3-6)s:y|_1$<i>3<i>6|<>:a$<b>64<i>3<i>27<i>9<b>0]" + // 3
    "[(6-9)s:z|_2$<i>6<i>9|<>:a$<b>64<i>6<i>18<i>6<b>0]" + // 4
    "[(9-12)s:h|_3$<i>9<i>12]" + // 5
    "[(12-15)s:i|_4$<i>12<i>15]" + // 6
    "[(15-18)s:j|_5$<i>15<i>18]" + // 7
    "[(18-21)s:h|_6$<i>18<i>21]" + // 8
    "[(21-24)s:i|_7$<i>21<i>24]" + // 9
    "[(24-27)s:j|_8$<i>24<i>27]" + // 10
    "[(27-30)s:h|_9$<i>27<i>30]" + // 11
    "[(30-33)s:i|_10$<i>30<i>33]" + // 12
    "[(33-36)s:j|_11$<i>33<i>36]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  h  ", // 1
    "[(0-3)s:x|_0$<i>0<i>3]" + // 2
    "[(3-6)s:y|_1$<i>3<i>6]" + // 3
    "[(6-9)s:z|_2$<i>6<i>9]" + // 4
    "[(9-12)s:h|_3$<i>9<i>12]");
    ki.addDoc(fd);
    // Here is a larger offset than expected
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  h  ", // 1
    "[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 2
    "[(3-6)s:y|_1$<i>3<i>6]" + // 3
    "[(6-9)s:z|_2$<i>6<i>9]" + // 4
    "[(9-12)s:h|_3$<i>9<i>12]");
    ki.addDoc(fd);
    // <a>x<a>y<a>zabc</a>abc</a>abc</a>
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  a  b  c  a  b  c  a  b  c  ", // 1
    "[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 2
    "[(3-6)s:y|_1$<i>3<i>6|<>:a$<b>64<i>3<i>27<i>9<b>0]" + // 3
    "[(6-9)s:z|_2$<i>6<i>9|<>:a$<b>64<i>6<i>18<i>6<b>0]" + // 4
    "[(9-12)s:a|_3$<i>9<i>12]" + // 5
    "[(12-15)s:b|_4$<i>12<i>15]" + // 6
    "[(15-18)s:c|_5$<i>15<i>18]" + // 7
    "[(18-21)s:a|_6$<i>18<i>21]" + // 8
    "[(21-24)s:b|_7$<i>21<i>24]" + // 9
    "[(24-27)s:c|_8$<i>24<i>27]" + // 10
    "[(27-30)s:a|_9$<i>27<i>30]" + // 11
    "[(30-33)s:b|_10$<i>30<i>33]" + // 12
    "[(33-36)s:c|_11$<i>33<i>36]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  h  ", // 1
    "[(0-3)s:x|_0$<i>0<i>3]" + // 2
    "[(3-6)s:y|_1$<i>3<i>6]" + // 3
    "[(6-9)s:z|_2$<i>6<i>9]" + // 4
    "[(9-12)s:h|_3$<i>9<i>12]");
    ki.addDoc(fd);
    // Save documents
    ki.commit();
    SpanQuery sq;
    Result kr;
    sq = new SpanElementQuery("base", "a");
    kr = ki.search(sq, (short) 15);
    // System.err.println(kr.toJSON());
    assertEquals(5, ki.numberOf("documents"));
    assertEquals("totalResults", kr.getTotalResults(), 7);
}
Also used : KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 8 with SpanElementQuery

use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.

the class TestMatchIndex method indexExampleFocusWithSpan.

@Test
public void indexExampleFocusWithSpan() throws IOException {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    ki.commit();
    SpanQuery sq;
    Result kr;
    // sq = new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery(
    // "base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(
    // new Term("base", "s:b")), (byte) 3));
    // 
    // kr = ki.search(sq, (short) 10);
    // assertEquals(kr.getSerialQuery(),
    // "spanContain({2: <base:s />}, {3: base:s:b})");
    // assertEquals(kr.getMatch(0).getSnippetBrackets(),
    // "a[{2:{3:b}cab}]cabac");
    sq = new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 3)), (byte) 3);
    kr = ki.search(sq, (short) 10);
    assertEquals(kr.getSerialQuery(), "focus(3: spanContain({2: <base:s />}, {3: base:s:b}))");
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "a[[{3:b}]]cabcab ...");
}
Also used : SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanFocusQuery(de.ids_mannheim.korap.query.SpanFocusQuery) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 9 with SpanElementQuery

use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.

the class TestMatchIndex method indexExampleFocusWithSkip.

@Ignore
public void indexExampleFocusWithSkip() throws IOException {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "abcabcabac", // The payload should be ignored
    // |<>:p#0-10<i>9]" +
    "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>10<i>9]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    // contains(<p>, focus(3: contains({2:<s>}, {3:a})))
    fd.addTV("base", "acabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>9<i>8]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:a|i:a|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:b|i:b|_3$<i>3<i>4]" + "[(4-5)s:c|i:c|_4$<i>4<i>5]" + "[(5-6)s:a|i:a|_5$<i>5<i>6]" + "[(6-7)s:b|i:b|_6$<i>6<i>7]" + "[(7-8)s:a|i:a|_7$<i>7<i>8]" + "[(8-9)s:c|i:c|_8$<i>8<i>9]");
    ki.addDoc(fd);
    ki.commit();
    SpanQuery sq;
    Result kr;
    KrillCollection kc = new KrillCollection(ki);
    assertEquals("Documents", 4, kc.numberOf("documents"));
    // within(<p>, focus(3:within({2:<s>}, {3:a})))
    sq = new SpanWithinQuery(new SpanElementQuery("base", "p"), new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 3)), (byte) 3));
    // fail("Skipping may go horribly wrong! (Known issue)");
    Krill ks = new Krill(sq);
    ks.getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    kr = ks.apply(ki);
    // kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
    assertEquals(kr.getSerialQuery(), "spanContain(<base:p />, focus(3: spanContain({2: <base:s />}, {3: base:s:a})))");
    assertEquals(12, kr.getTotalResults());
    assertEquals("[a{2:bc{3:a}b}cabac]", kr.getMatch(0).getSnippetBrackets());
    assertEquals("[ab{2:c{3:a}bcab}ac]", kr.getMatch(1).getSnippetBrackets());
    assertEquals("[ab{2:cabc{3:a}}bac]", kr.getMatch(2).getSnippetBrackets());
}
Also used : SearchContext(de.ids_mannheim.korap.response.SearchContext) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Krill(de.ids_mannheim.korap.Krill) SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) SpanFocusQuery(de.ids_mannheim.korap.query.SpanFocusQuery) KrillCollection(de.ids_mannheim.korap.KrillCollection) Ignore(org.junit.Ignore)

Example 10 with SpanElementQuery

use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.

the class TestNextIndex method indexExample4.

@Test
public void indexExample4() throws IOException {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    // abc<x>abc<x>a</x>b</x>ac
    FieldDocument fd = new FieldDocument();
    fd.addString("ID", "doc-1");
    fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:a|i:a|_3$<i>3<i>4|<>:x$<b>64<i>3<i>7<i>7<b>0]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]<>:x$<b>64<i>6<i>8<i>8<b>0]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    // xbz<x>xbzx</x>bxz
    fd = new FieldDocument();
    fd.addString("ID", "doc-2");
    fd.addTV("base", "xbzxbzxbxz", "[(0-1)s:x|i:x|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:z|i:z|_2$<i>2<i>3]" + "[(3-4)s:x|i:x|_3$<i>3<i>4|<>:x$<b>64<i>3<i>7<i>7<b>0]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:z|i:z|_5$<i>5<i>6]" + "[(6-7)s:x|i:x|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:x|i:x|_8$<i>8<i>9]" + "[(9-10)s:z|i:z|_9$<i>9<i>10]");
    ki.addDoc(fd);
    ki.commit();
    SpanQuery sq;
    Result kr;
    sq = new SpanNextQuery(new SpanElementQuery("base", "x"), new SpanTermQuery(new Term("base", "s:b")));
    kr = ki.search(sq, (short) 10);
    assertEquals("TotalResults", kr.getTotalResults(), 2);
    assertEquals("abc[[abcab]]ac", kr.getMatch(0).getSnippetBrackets());
    assertEquals("xbz[[xbzxb]]xz", kr.getMatch(1).getSnippetBrackets());
    sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:c")), new SpanElementQuery("base", "x"));
    kr = ki.search(sq, (short) 10);
    assertEquals(kr.getTotalResults(), 1);
    assertEquals("ab[[cabca]]bac", kr.getMatch(0).getSnippetBrackets());
    sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:z")), new SpanElementQuery("base", "x"));
    kr = ki.search(sq, (short) 10);
    assertEquals(1, kr.getTotalResults());
    assertEquals("xb[[zxbzx]]bxz", kr.getMatch(0).getSnippetBrackets());
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Aggregations

SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)57 Test (org.junit.Test)53 KrillIndex (de.ids_mannheim.korap.KrillIndex)37 Term (org.apache.lucene.index.Term)36 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)35 SpanQuery (org.apache.lucene.search.spans.SpanQuery)32 Result (de.ids_mannheim.korap.response.Result)26 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)16 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)16 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)11 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)11 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)10 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)9 SpanRelationQuery (de.ids_mannheim.korap.query.SpanRelationQuery)8 SpanAttributeQuery (de.ids_mannheim.korap.query.SpanAttributeQuery)6 SpanClassFilterQuery (de.ids_mannheim.korap.query.SpanClassFilterQuery)6 SpanRelationMatchQuery (de.ids_mannheim.korap.query.SpanRelationMatchQuery)6 SpanWithAttributeQuery (de.ids_mannheim.korap.query.SpanWithAttributeQuery)6 SpanSegmentQuery (de.ids_mannheim.korap.query.SpanSegmentQuery)4 ArrayList (java.util.ArrayList)4