Search in sources :

Example 31 with CollectionBuilder

use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.

the class TestKrillCollectionIndex method testIndexWithMultipleCommitsAndDeletes.

@Test
public void testIndexWithMultipleCommitsAndDeletes() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createDoc1());
    ki.addDoc(createDoc2());
    ki.commit();
    CollectionBuilder cb = new CollectionBuilder();
    KrillCollection kcn = new KrillCollection(ki);
    kcn.fromBuilder(cb.term("author", "Frank"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Peter"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Sebastian"));
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Michael").not());
    assertEquals(2, kcn.docCount());
    // Add Sebastians doc
    ki.addDoc(createDoc3());
    ki.commit();
    kcn.fromBuilder(cb.term("author", "Frank"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Peter"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Sebastian"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Michael").not());
    assertEquals(3, kcn.docCount());
    // Remove one document
    ki.delDocs("author", "Peter");
    ki.commit();
    kcn.fromBuilder(cb.term("author", "Frank"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Peter"));
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Sebastian"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Michael").not());
    assertEquals(2, kcn.docCount());
    // Readd Peter's doc
    ki.addDoc(createDoc2());
    ki.commit();
    kcn.fromBuilder(cb.term("author", "Frank"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Peter"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Sebastian"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Michael").not());
    assertEquals(3, kcn.docCount());
}
Also used : CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Example 32 with CollectionBuilder

use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.

the class TestKrill method searchJSONmultitermRewriteBug.

@Test
public void searchJSONmultitermRewriteBug() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    assertEquals(ki.numberOf("documents"), 0);
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
    ki.commit();
    assertEquals(ki.numberOf("documents"), 1);
    assertEquals("BZK", fd.getCorpusSigle());
    // [tt/p="A.*"]{0,3}[tt/p="N.*"]
    String json = getJsonString(getClass().getResource("/queries/bugs/multiterm_rewrite.jsonld").getFile());
    Krill ks = new Krill(json);
    KrillCollection kc = ks.getCollection();
    // No index was set
    assertEquals(-1, kc.numberOf("documents"));
    kc.setIndex(ki);
    // Index was set but vc restricted to WPD
    assertEquals(0, kc.numberOf("documents"));
    /*
        kc.extend(new CollectionBuilder().or("corpusSigle", "BZK"));
        */
    CollectionBuilder cb = new CollectionBuilder();
    kc.fromBuilder(cb.orGroup().with(kc.getBuilder()).with(cb.term("corpusSigle", "BZK")));
    ks.setCollection(kc);
    assertEquals(1, kc.numberOf("documents"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getSerialQuery(), "spanOr([SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/), " + "spanNext(spanRepetition(SpanMultiTermQueryWrapper" + "(tokens:/tt/p:A.*/){1,3}), " + "SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/))])");
    assertEquals(kr.getTotalResults(), 58);
    assertEquals(0, kr.getStartIndex());
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "[[Saragat-Partei]] zerfällt Rom (ADN) die von dem ...");
    assertEquals(kr.getMatch(1).getSnippetBrackets(), "[[Saragat-Partei]] zerfällt Rom (ADN) die von dem ...");
    assertEquals(kr.getMatch(2).getSnippetBrackets(), "Saragat-Partei zerfällt [[Rom]] (ADN) " + "die von dem Rechtssozialisten Saragat ...");
    assertEquals(kr.getMatch(3).getSnippetBrackets(), "Saragat-Partei zerfällt Rom ([[ADN]]) " + "die von dem Rechtssozialisten Saragat geführte ...");
    assertEquals(kr.getMatch(23).getSnippetBrackets(), "... dem Namen \"Einheitsbewegung der sozialistischen " + "Initiative\" [[eine neue politische Gruppierung]] " + "ins Leben gerufen hatten. Pressemeldungen zufolge ...");
}
Also used : Krill(de.ids_mannheim.korap.Krill) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 33 with CollectionBuilder

use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.

the class TestWPDIndex method testCase4.

/**
 * Element distance exclusion
 */
@Test
public void testCase4() throws IOException {
    SpanDistanceQuery sq = createElementDistanceQuery("s", "s:weg", "s:fahren", 1, 1, false, true);
    ks = new Krill(sq);
    kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 979);
    // 0.8s
    // Check if it includes some results
    /*
        BooleanFilter bf = new BooleanFilter();
        bf.or("ID", "WPD_BBB.04463", "WPD_III.00758");
        */
    KrillCollection kc = new KrillCollection();
    CollectionBuilder cb = new CollectionBuilder();
    kc.fromBuilder(cb.orGroup().with(cb.term("ID", "WPD_BBB.04463")).with(cb.term("ID", "WPD_III.00758")));
    // kc.filter(bf);
    ks.setCollection(kc);
    kr = ks.apply(ki);
    assertEquals(1094, kr.getMatch(0).getStartPos());
    assertEquals(451, kr.getMatch(1).getEndPos());
}
Also used : Krill(de.ids_mannheim.korap.Krill) SpanDistanceQuery(de.ids_mannheim.korap.query.SpanDistanceQuery) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Aggregations

CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)33 Test (org.junit.Test)33 KrillCollection (de.ids_mannheim.korap.KrillCollection)11 KrillIndex (de.ids_mannheim.korap.KrillIndex)11 Krill (de.ids_mannheim.korap.Krill)5 Result (de.ids_mannheim.korap.response.Result)4 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)3 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)2 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)2 SearchContext (de.ids_mannheim.korap.response.SearchContext)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 KrillMeta (de.ids_mannheim.korap.KrillMeta)1 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)1