Search in sources :

Example 1 with SearchContext

use of de.ids_mannheim.korap.response.SearchContext in project Krill by KorAP.

the class TestMatchIndex method indexExampleFocusWithSkip.

@Ignore
public void indexExampleFocusWithSkip() throws IOException {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "abcabcabac", // The payload should be ignored
    // |<>:p#0-10<i>9]" +
    "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>10<i>9]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    // contains(<p>, focus(3: contains({2:<s>}, {3:a})))
    fd.addTV("base", "acabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>9<i>8]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:a|i:a|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:b|i:b|_3$<i>3<i>4]" + "[(4-5)s:c|i:c|_4$<i>4<i>5]" + "[(5-6)s:a|i:a|_5$<i>5<i>6]" + "[(6-7)s:b|i:b|_6$<i>6<i>7]" + "[(7-8)s:a|i:a|_7$<i>7<i>8]" + "[(8-9)s:c|i:c|_8$<i>8<i>9]");
    ki.addDoc(fd);
    ki.commit();
    SpanQuery sq;
    Result kr;
    KrillCollection kc = new KrillCollection(ki);
    assertEquals("Documents", 4, kc.numberOf("documents"));
    // within(<p>, focus(3:within({2:<s>}, {3:a})))
    sq = new SpanWithinQuery(new SpanElementQuery("base", "p"), new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 3)), (byte) 3));
    // fail("Skipping may go horribly wrong! (Known issue)");
    Krill ks = new Krill(sq);
    ks.getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    kr = ks.apply(ki);
    // kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
    assertEquals(kr.getSerialQuery(), "spanContain(<base:p />, focus(3: spanContain({2: <base:s />}, {3: base:s:a})))");
    assertEquals(12, kr.getTotalResults());
    assertEquals("[a{2:bc{3:a}b}cabac]", kr.getMatch(0).getSnippetBrackets());
    assertEquals("[ab{2:c{3:a}bcab}ac]", kr.getMatch(1).getSnippetBrackets());
    assertEquals("[ab{2:cabc{3:a}}bac]", kr.getMatch(2).getSnippetBrackets());
}
Also used : SearchContext(de.ids_mannheim.korap.response.SearchContext) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Krill(de.ids_mannheim.korap.Krill) SpanClassQuery(de.ids_mannheim.korap.query.SpanClassQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) SpanFocusQuery(de.ids_mannheim.korap.query.SpanFocusQuery) KrillCollection(de.ids_mannheim.korap.KrillCollection) Ignore(org.junit.Ignore)

Example 2 with SearchContext

use of de.ids_mannheim.korap.response.SearchContext in project Krill by KorAP.

the class TestKrillCollectionIndex method filterExampleFromLegacy.

@Test
public void filterExampleFromLegacy() throws Exception {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    // Create Virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    assertEquals("Documents", 7, kc.numberOf("documents"));
    // The virtual collection consists of all documents that have
    // the textClass "reisen" and "freizeit"
    /*        kc.filter(kf.and("textClass", "reisen").and("textClass",
                "freizeit-unterhaltung"));
        */
    kc.fromBuilder(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
    assertEquals("Documents", 5, kc.numberOf("documents"));
    assertEquals("Tokens", 1678, kc.numberOf("tokens"));
    assertEquals("Sentences", 194, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
    // Subset this to all documents that have also the text
    // kc.filter(kf.and("textClass", "kultur"));
    /*
        kc.fromBuilder(
          kc.build().andGroup().with(
            kc.getBuilder()
          ).with(
            kc.build().term("textClass", "kultur")
          )
        );
        */
    kc.filter(kc.build().term("textClass", "kultur"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
    // kc.filter(kf.and("corpusID", "WPD"));
    kc.filter(kc.build().term("corpusID", "WPD"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
    // Create a query
    Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    Result kr = ks.apply(ki);
    /*
        Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
                (short) 5);
        */
    assertEquals(kr.getTotalResults(), 70);
    kc.extend(kc.build().term("textClass", "uninteresting"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    kc.extend(kc.build().term("textClass", "wissenschaft"));
    assertEquals("Documents", 3, kc.numberOf("documents"));
    assertEquals("Tokens", 1669, kc.numberOf("tokens"));
    assertEquals("Sentences", 188, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
}
Also used : Krill(de.ids_mannheim.korap.Krill) SearchContext(de.ids_mannheim.korap.response.SearchContext) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 3 with SearchContext

use of de.ids_mannheim.korap.response.SearchContext in project Krill by KorAP.

the class TestKrillCollectionIndex method uidCollectionLegacy.

@Test
public void uidCollectionLegacy() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    int uid = 1;
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        FieldDocument fd = ki.addDoc(uid++, getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    assertEquals("Documents", 7, ki.numberOf("documents"));
    assertEquals("Paragraphs", 174, ki.numberOf("paragraphs"));
    assertEquals("Sentences", 281, ki.numberOf("sentences"));
    assertEquals("Tokens", 2661, ki.numberOf("tokens"));
    SpanQuery sq = new SpanTermQuery(new Term("tokens", "s:der"));
    Result kr = ki.search(sq, (short) 10);
    assertEquals(86, kr.getTotalResults());
    // Create Virtual collections:
    KrillCollection kc = new KrillCollection();
    kc.filterUIDs(new String[] { "2", "3", "4" });
    kc.setIndex(ki);
    assertEquals("Documents", 3, kc.numberOf("documents"));
    assertEquals("Paragraphs", 46, kc.numberOf("paragraphs"));
    assertEquals("Sentences", 103, kc.numberOf("sentences"));
    assertEquals("Tokens", 1229, kc.numberOf("tokens"));
    Krill ks = new Krill(sq);
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    kr = ks.apply(ki);
    // kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
    assertEquals((long) 39, kr.getTotalResults());
}
Also used : Krill(de.ids_mannheim.korap.Krill) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SearchContext(de.ids_mannheim.korap.response.SearchContext) Term(org.apache.lucene.index.Term) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 4 with SearchContext

use of de.ids_mannheim.korap.response.SearchContext in project Krill by KorAP.

the class TestKrillCollectionIndex method filterExampleAtomicLegacy.

@Test
public void filterExampleAtomicLegacy() throws Exception {
    // That's exactly the same test class, but with multiple atomic indices
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
        ki.commit();
    }
    ;
    CollectionBuilder kf = new CollectionBuilder();
    // Create Virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    assertEquals("Documents", 7, kc.numberOf("documents"));
    // If this is set - everything is fine automatically ...
    kc.filter(kc.build().term("corpusID", "WPD"));
    assertEquals("Documents", 7, kc.numberOf("documents"));
    // The virtual collection consists of all documents that have the textClass "reisen" and "freizeit"
    /*
        kc.filter(kf.and("textClass", "reisen").and("textClass",
                "freizeit-unterhaltung"));
        */
    kc.filter(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
    assertEquals("Documents", 5, kc.numberOf("documents"));
    assertEquals("Tokens", 1678, kc.numberOf("tokens"));
    assertEquals("Sentences", 194, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
    // Subset this to all documents that have also the text
    // kc.filter(kf.and("textClass", "kultur"));
    kc.filter(kc.build().term("textClass", "kultur"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
    // This is already filtered though ...
    // kc.filter(kf.and("corpusID", "WPD"));
    kc.filter(kc.build().term("corpusID", "WPD"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
    // Create a query
    Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    Result kr = ks.apply(ki);
    /*
        Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
                (short) 5);
        */
    assertEquals(kr.getTotalResults(), 70);
    // kc.extend(kf.and("textClass", "uninteresting"));
    kc.extend(kc.build().term("textClass", "uninteresting"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    kc.extend(kc.build().term("textClass", "wissenschaft"));
    assertEquals("Documents", 3, kc.numberOf("documents"));
    assertEquals("Tokens", 1669, kc.numberOf("tokens"));
    assertEquals("Sentences", 188, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
    // System.err.println(kc.toString());
    // Test collectionbuilder simplifier!
    /*
        OrGroup(
                AndGroup(
                         corpusID:WPD
                         textClass:reisen
                         textClass:freizeit-unterhaltung
                         textClass:kultur
                         corpusID:WPD
                         )
                textClass:uninteresting
                textClass:wissenschaft
        )
        */
    assertTrue(ki.delDocs("textClass", "wissenschaft"));
    ki.commit();
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
}
Also used : Krill(de.ids_mannheim.korap.Krill) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) SearchContext(de.ids_mannheim.korap.response.SearchContext) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 5 with SearchContext

use of de.ids_mannheim.korap.response.SearchContext in project Krill by KorAP.

the class TestKrillCollectionIndex method filterExample2Legacy.

@Test
public void filterExample2Legacy() throws Exception {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    ki.addDoc(getClass().getResourceAsStream("/wiki/00012-fakemeta.json.gz"), true);
    ki.commit();
    /*
        CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
        
        // Create Virtual collections:
        KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
        kc.filter(kf.and("textClass", "reisen").and("textClass",
                "freizeit-unterhaltung"));
        */
    KrillCollection kc = new KrillCollection(ki);
    CollectionBuilder cb = kc.build();
    kc.filter(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "freizeit-unterhaltung")));
    assertEquals("Documents", 5, kc.numberOf("documents"));
    assertEquals("Tokens", 1678, kc.numberOf("tokens"));
    assertEquals("Sentences", 194, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
    // Create a query
    Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 369);
    // kc.filter(kf.and("corpusID", "QQQ"));
    kc.filter(cb.term("corpusID", "QQQ"));
    assertEquals("Documents", 0, kc.numberOf("documents"));
    assertEquals("Tokens", 0, kc.numberOf("tokens"));
    assertEquals("Sentences", 0, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 0, kc.numberOf("paragraphs"));
    ks.setCollection(kc);
    // Create a query
    kr = ks.apply(ki);
    /*
        kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
                (short) 5);
        */
    assertEquals(kr.getTotalResults(), 0);
}
Also used : Krill(de.ids_mannheim.korap.Krill) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) SearchContext(de.ids_mannheim.korap.response.SearchContext) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

KrillIndex (de.ids_mannheim.korap.KrillIndex)7 SearchContext (de.ids_mannheim.korap.response.SearchContext)7 Krill (de.ids_mannheim.korap.Krill)6 Result (de.ids_mannheim.korap.response.Result)6 Test (org.junit.Test)6 KrillCollection (de.ids_mannheim.korap.KrillCollection)5 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)3 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)2 Term (org.apache.lucene.index.Term)2 SpanQuery (org.apache.lucene.search.spans.SpanQuery)2 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)2 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)1 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)1 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)1 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)1 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)1 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)1 Match (de.ids_mannheim.korap.response.Match)1 Ignore (org.junit.Ignore)1