Search in sources :

Example 41 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project Krill by KorAP.

the class TestDistanceExclusionIndex method testCase6.

// Add skipTo test
@Test
public void testCase6() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc1());
    ki.addDoc(createFieldDoc2());
    ki.commit();
    SpanQuery sq;
    // ordered distance 0 to 1
    sq = createQuery("s:d", "s:b", 0, 1, true);
    kr = ki.search(sq, (short) 10);
    assertEquals((long) 4, kr.getTotalResults());
    SpanTermQuery stq = new SpanTermQuery(new Term("base", "s:c"));
    kr = ki.search(stq, (short) 10);
    assertEquals((long) 6, kr.getTotalResults());
    SpanNextQuery snq = new SpanNextQuery(stq, sq);
    kr = ki.search(snq, (short) 10);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals(3, kr.getMatch(0).getStartPos());
    assertEquals(5, kr.getMatch(0).getEndPos());
    assertEquals(8, kr.getMatch(1).getStartPos());
    assertEquals(10, kr.getMatch(1).getEndPos());
/*System.out.print(kr.getTotalResults()+"\n");
        for (int i=0; i< kr.getTotalResults(); i++){
        	System.out.println(
        		kr.getMatch(i).getLocalDocID()+" "+
        		kr.getMatch(i).startPos + " " +
        		kr.getMatch(i).endPos
            );
        }*/
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Example 42 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project Krill by KorAP.

the class TestElementDistanceExclusionIndex method testCase5.

/**
 * Skip to
 */
@Test
public void testCase5() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.addDoc(createFieldDoc1());
    ki.addDoc(createFieldDoc0());
    ki.addDoc(createFieldDoc2());
    ki.commit();
    SpanQuery sq = createQuery("s", "s:c", "s:d", 1, 1, false, true);
    kr = ki.search(sq, (short) 10);
    assertEquals(kr.getTotalResults(), 3);
    assertEquals(3, kr.getMatch(2).getLocalDocID());
    assertEquals(3, kr.getMatch(2).startPos);
    assertEquals(4, kr.getMatch(2).endPos);
    sq = new SpanNextQuery(createQuery("s", "s:c", "s:d", 1, 1, false, true), new SpanTermQuery(new Term("base", "s:a")));
    kr = ki.search(sq, (short) 10);
    assertEquals(kr.getTotalResults(), 1);
    assertEquals(3, kr.getMatch(0).getLocalDocID());
    assertEquals(3, kr.getMatch(0).startPos);
    assertEquals(5, kr.getMatch(0).endPos);
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Example 43 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project Krill by KorAP.

the class TestAttributeIndex method testCase7.

/**
 * Arbitrary elements with an attribute and a not attribute.
 */
@Test
public void testCase7() throws IOException {
    ki.addDoc(createFieldDoc2());
    ki.commit();
    List<SpanQuery> sql = new ArrayList<>();
    sql.add(new SpanAttributeQuery(new SpanTermQuery(new Term("base", "@:class=header")), true, true));
    sql.add(new SpanAttributeQuery(new SpanTermQuery(new Term("base", "@:class=book")), true));
    SpanWithAttributeQuery swaq = new SpanWithAttributeQuery(sql, true);
    kr = ki.search(swaq, (short) 10);
    assertEquals(4, kr.getTotalResults());
    assertEquals(0, kr.getMatch(0).getStartPos());
    assertEquals(5, kr.getMatch(0).getEndPos());
    assertEquals(1, kr.getMatch(1).getStartPos());
    assertEquals(2, kr.getMatch(1).getEndPos());
    assertEquals(2, kr.getMatch(2).getStartPos());
    assertEquals(5, kr.getMatch(2).getEndPos());
    assertEquals(6, kr.getMatch(3).getStartPos());
    assertEquals(7, kr.getMatch(3).getEndPos());
// for (int i = 0; i < kr.getTotalResults(); i++) {
// System.out.println(kr.getMatch(i).getLocalDocID() + " "
// + kr.getMatch(i).startPos + " " + kr.getMatch(i).endPos);
// }
}
Also used : SpanAttributeQuery(de.ids_mannheim.korap.query.SpanAttributeQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ArrayList(java.util.ArrayList) SpanWithAttributeQuery(de.ids_mannheim.korap.query.SpanWithAttributeQuery) Term(org.apache.lucene.index.Term) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Example 44 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project Krill by KorAP.

the class TestMatchIndex method indexExampleFocusWithSpan.

@Test
public void indexExampleFocusWithSpan() throws IOException {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    ki.commit();
    SpanQuery sq;
    Result kr;
    // sq = new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery(
    // "base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(
    // new Term("base", "s:b")), (byte) 3));
    // 
    // kr = ki.search(sq, (short) 10);
    // assertEquals(kr.getSerialQuery(),
    // "spanContain({2: <base:s />}, {3: base:s:b})");
    // assertEquals(kr.getMatch(0).getSnippetBrackets(),
    // "a[{2:{3:b}cab}]cabac");
    sq = new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:b")), (byte) 3)), (byte) 3);
    kr = ki.search(sq, (short) 10);
    assertEquals(kr.getSerialQuery(), "focus(3: spanContain({2: <base:s />}, {3: base:s:b}))");
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "a[[{3:b}]]cabcab ...");
}
Also used : SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanFocusQuery(de.ids_mannheim.korap.query.SpanFocusQuery) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 45 with SpanTermQuery

use of org.apache.lucene.search.spans.SpanTermQuery in project Krill by KorAP.

the class TestMatchIndex method indexExampleFocusWithSkip.

@Ignore
public void indexExampleFocusWithSkip() throws IOException {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "abcabcabac", // The payload should be ignored
    // |<>:p#0-10<i>9]" +
    "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>10<i>9]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    // contains(<p>, focus(3: contains({2:<s>}, {3:a})))
    fd.addTV("base", "acabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>9<i>8]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:a|i:a|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:b|i:b|_3$<i>3<i>4]" + "[(4-5)s:c|i:c|_4$<i>4<i>5]" + "[(5-6)s:a|i:a|_5$<i>5<i>6]" + "[(6-7)s:b|i:b|_6$<i>6<i>7]" + "[(7-8)s:a|i:a|_7$<i>7<i>8]" + "[(8-9)s:c|i:c|_8$<i>8<i>9]");
    ki.addDoc(fd);
    ki.commit();
    SpanQuery sq;
    Result kr;
    KrillCollection kc = new KrillCollection(ki);
    assertEquals("Documents", 4, kc.numberOf("documents"));
    // within(<p>, focus(3:within({2:<s>}, {3:a})))
    sq = new SpanWithinQuery(new SpanElementQuery("base", "p"), new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 3)), (byte) 3));
    // fail("Skipping may go horribly wrong! (Known issue)");
    Krill ks = new Krill(sq);
    ks.getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    kr = ks.apply(ki);
    // kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
    assertEquals(kr.getSerialQuery(), "spanContain(<base:p />, focus(3: spanContain({2: <base:s />}, {3: base:s:a})))");
    assertEquals(12, kr.getTotalResults());
    assertEquals("[a{2:bc{3:a}b}cabac]", kr.getMatch(0).getSnippetBrackets());
    assertEquals("[ab{2:c{3:a}bcab}ac]", kr.getMatch(1).getSnippetBrackets());
    assertEquals("[ab{2:cabc{3:a}}bac]", kr.getMatch(2).getSnippetBrackets());
}
Also used : SearchContext(de.ids_mannheim.korap.response.SearchContext) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Krill(de.ids_mannheim.korap.Krill) SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) SpanFocusQuery(de.ids_mannheim.korap.query.SpanFocusQuery) KrillCollection(de.ids_mannheim.korap.KrillCollection) Ignore(org.junit.Ignore)

Aggregations

SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)196 Term (org.apache.lucene.index.Term)191 SpanQuery (org.apache.lucene.search.spans.SpanQuery)121 Test (org.junit.Test)103 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)59 KrillIndex (de.ids_mannheim.korap.KrillIndex)57 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)35 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)34 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)31 Result (de.ids_mannheim.korap.response.Result)30 ArrayList (java.util.ArrayList)27 Document (org.apache.lucene.document.Document)24 IndexReader (org.apache.lucene.index.IndexReader)24 BooleanQuery (org.apache.lucene.search.BooleanQuery)22 Query (org.apache.lucene.search.Query)22 TermQuery (org.apache.lucene.search.TermQuery)22 TopDocs (org.apache.lucene.search.TopDocs)21 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)20 IndexSearcher (org.apache.lucene.search.IndexSearcher)20 SpanRelationQuery (de.ids_mannheim.korap.query.SpanRelationQuery)18