Search in sources :

Example 26 with CollectionBuilder

use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.

the class TestCollectionBuilder method builderAndNestedSimple.

@Test
public void builderAndNestedSimple() throws IOException {
    CollectionBuilder kc = new CollectionBuilder();
    assertEquals("AndGroup(author:tree title:name)", kc.andGroup().with(kc.andGroup().with(kc.term("author", "tree")).with(kc.term("title", "name"))).toString());
}
Also used : CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) Test(org.junit.Test)

Example 27 with CollectionBuilder

use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.

the class TestKrillCollectionIndex method testIndexWithRegex.

@Test
public void testIndexWithRegex() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createDoc1());
    ki.addDoc(createDoc2());
    ki.addDoc(createDoc3());
    ki.commit();
    CollectionBuilder cb = new CollectionBuilder();
    KrillCollection kcn = new KrillCollection(ki);
    // Frank, Sebastian
    kcn.fromBuilder(cb.re("author", ".*an.*"));
    assertEquals(2, kcn.docCount());
    // Kultur & Reisen,
    // Reisen & Finanzen,
    // Nachricht & Kultur & Reisen
    kcn.fromBuilder(cb.re("textClass", ".*(ult|eis).*"));
    assertEquals(3, kcn.docCount());
    // Test in group
    kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "kultur")));
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.andGroup().with(cb.re("textClass", ".*eis.*")).with(cb.re("textClass", ".*ult.*")));
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.andGroup().with(cb.re("textClass", ".*eis.*")).with(cb.orGroup().with(cb.re("textClass", ".*ult.*")).with(cb.re("textClass", ".*nan.*"))));
    assertEquals(3, kcn.docCount());
}
Also used : CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Example 28 with CollectionBuilder

use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.

the class TestKrillCollectionIndex method filterExampleAtomicLegacy.

@Test
public void filterExampleAtomicLegacy() throws Exception {
    // That's exactly the same test class, but with multiple atomic indices
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
        ki.commit();
    }
    ;
    CollectionBuilder kf = new CollectionBuilder();
    // Create Virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    assertEquals("Documents", 7, kc.numberOf("documents"));
    // If this is set - everything is fine automatically ...
    kc.filter(kc.build().term("corpusID", "WPD"));
    assertEquals("Documents", 7, kc.numberOf("documents"));
    // The virtual collection consists of all documents that have the textClass "reisen" and "freizeit"
    /*
        kc.filter(kf.and("textClass", "reisen").and("textClass",
                "freizeit-unterhaltung"));
        */
    kc.filter(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
    assertEquals("Documents", 5, kc.numberOf("documents"));
    assertEquals("Tokens", 1678, kc.numberOf("tokens"));
    assertEquals("Sentences", 194, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
    // Subset this to all documents that have also the text
    // kc.filter(kf.and("textClass", "kultur"));
    kc.filter(kc.build().term("textClass", "kultur"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
    // This is already filtered though ...
    // kc.filter(kf.and("corpusID", "WPD"));
    kc.filter(kc.build().term("corpusID", "WPD"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
    // Create a query
    Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    Result kr = ks.apply(ki);
    /*
        Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
                (short) 5);
        */
    assertEquals(kr.getTotalResults(), 70);
    // kc.extend(kf.and("textClass", "uninteresting"));
    kc.extend(kc.build().term("textClass", "uninteresting"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    kc.extend(kc.build().term("textClass", "wissenschaft"));
    assertEquals("Documents", 3, kc.numberOf("documents"));
    assertEquals("Tokens", 1669, kc.numberOf("tokens"));
    assertEquals("Sentences", 188, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
    // System.err.println(kc.toString());
    // Test collectionbuilder simplifier!
    /*
        OrGroup(
                AndGroup(
                         corpusID:WPD
                         textClass:reisen
                         textClass:freizeit-unterhaltung
                         textClass:kultur
                         corpusID:WPD
                         )
                textClass:uninteresting
                textClass:wissenschaft
        )
        */
    assertTrue(ki.delDocs("textClass", "wissenschaft"));
    ki.commit();
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
}
Also used : Krill(de.ids_mannheim.korap.Krill) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) SearchContext(de.ids_mannheim.korap.response.SearchContext) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 29 with CollectionBuilder

use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.

the class TestKrillCollectionIndex method testIndexWithTextStringQueries.

@Test
public void testIndexWithTextStringQueries() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createDoc1());
    ki.commit();
    CollectionBuilder cb = new CollectionBuilder();
    KrillCollection kcn = new KrillCollection(ki);
    kcn.fromBuilder(cb.term("text", "mann"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("text", "Der alte  Mann ging über die Straße"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.text("text", "Der alte Mann"));
    assertEquals(kcn.toString(), "QueryWrapperFilter(text:\"der alte mann\")");
    assertEquals(1, kcn.docCount());
}
Also used : CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Example 30 with CollectionBuilder

use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.

the class TestKrillCollectionIndex method filterExample2Legacy.

@Test
public void filterExample2Legacy() throws Exception {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    ki.addDoc(getClass().getResourceAsStream("/wiki/00012-fakemeta.json.gz"), true);
    ki.commit();
    /*
        CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
        
        // Create Virtual collections:
        KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
        kc.filter(kf.and("textClass", "reisen").and("textClass",
                "freizeit-unterhaltung"));
        */
    KrillCollection kc = new KrillCollection(ki);
    CollectionBuilder cb = kc.build();
    kc.filter(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "freizeit-unterhaltung")));
    assertEquals("Documents", 5, kc.numberOf("documents"));
    assertEquals("Tokens", 1678, kc.numberOf("tokens"));
    assertEquals("Sentences", 194, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
    // Create a query
    Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 369);
    // kc.filter(kf.and("corpusID", "QQQ"));
    kc.filter(cb.term("corpusID", "QQQ"));
    assertEquals("Documents", 0, kc.numberOf("documents"));
    assertEquals("Tokens", 0, kc.numberOf("tokens"));
    assertEquals("Sentences", 0, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 0, kc.numberOf("paragraphs"));
    ks.setCollection(kc);
    // Create a query
    kr = ks.apply(ki);
    /*
        kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
                (short) 5);
        */
    assertEquals(kr.getTotalResults(), 0);
}
Also used : Krill(de.ids_mannheim.korap.Krill) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) SearchContext(de.ids_mannheim.korap.response.SearchContext) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)33 Test (org.junit.Test)33 KrillCollection (de.ids_mannheim.korap.KrillCollection)11 KrillIndex (de.ids_mannheim.korap.KrillIndex)11 Krill (de.ids_mannheim.korap.Krill)5 Result (de.ids_mannheim.korap.response.Result)4 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)3 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)2 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)2 SearchContext (de.ids_mannheim.korap.response.SearchContext)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 KrillMeta (de.ids_mannheim.korap.KrillMeta)1 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)1