Search in sources :

Example 6 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestKrillCollectionIndex method testIndexWithDateRanges.

@Test
public void testIndexWithDateRanges() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createDoc1());
    ki.addDoc(createDoc2());
    ki.addDoc(createDoc3());
    ki.commit();
    CollectionBuilder cb = new CollectionBuilder();
    KrillCollection kcn = new KrillCollection(ki);
    kcn.fromBuilder(cb.date("pubDate", "2005"));
    assertEquals(3, kcn.docCount());
    kcn.fromBuilder(cb.date("pubDate", "2005-12"));
    assertEquals(3, kcn.docCount());
    kcn.fromBuilder(cb.date("pubDate", "2005-12-10"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.date("pubDate", "2005-12-16"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.date("pubDate", "2005-12-07"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.since("pubDate", "2005-12-07"));
    assertEquals(3, kcn.docCount());
    kcn.fromBuilder(cb.since("pubDate", "2005-12-10"));
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.since("pubDate", "2005-12-16"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.till("pubDate", "2005-12-16"));
    assertEquals(3, kcn.docCount());
    kcn.fromBuilder(cb.till("pubDate", "2005-12-10"));
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.till("pubDate", "2005-12-07"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.date("pubDate", "2005-12-10").not());
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.date("pubDate", "2005-12-16").not());
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.date("pubDate", "2005-12-07").not());
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.date("pubDate", "2005-12-09").not());
    assertEquals(3, kcn.docCount());
    kcn.fromBuilder(cb.till("pubDate", "2005-12-16").not());
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.till("pubDate", "2005-12-15").not());
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.till("pubDate", "2005-12-10").not());
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.till("pubDate", "2005-12-09").not());
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.till("pubDate", "2005-12-07").not());
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.till("pubDate", "2005-12-06").not());
    assertEquals(3, kcn.docCount());
}
Also used : CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Example 7 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestKrillCollectionIndex method testKrillCollectionWithWrongJson.

@Test
public void testKrillCollectionWithWrongJson() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createDoc1());
    ki.addDoc(createDoc2());
    ki.addDoc(createDoc3());
    ki.commit();
    KrillCollection kc = new KrillCollection("{lalala}");
    assertEquals("Unable to parse JSON", kc.getError(0).getMessage());
    kc.setIndex(ki);
    long docs = 0, tokens = 0, sentences = 0, paragraphs = 0;
    try {
        docs = kc.numberOf("documents");
        tokens = kc.numberOf("tokens");
        sentences = kc.numberOf("sentences");
        paragraphs = kc.numberOf("paragraphs");
    } catch (IOException e) {
        e.printStackTrace();
    }
    assertEquals(0, docs);
    assertEquals(0, tokens);
    assertEquals(0, sentences);
    assertEquals(0, paragraphs);
    assertEquals(1, kc.getErrors().size());
    assertEquals(StatusCodes.UNABLE_TO_PARSE_JSON, kc.getErrors().get(0).getCode());
}
Also used : IOException(java.io.IOException) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Example 8 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestKrillCollectionIndex method filterExampleFromLegacy.

@Test
public void filterExampleFromLegacy() throws Exception {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    // Create Virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    assertEquals("Documents", 7, kc.numberOf("documents"));
    // The virtual collection consists of all documents that have
    // the textClass "reisen" and "freizeit"
    /*        kc.filter(kf.and("textClass", "reisen").and("textClass",
                "freizeit-unterhaltung"));
        */
    kc.fromBuilder(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
    assertEquals("Documents", 5, kc.numberOf("documents"));
    assertEquals("Tokens", 1678, kc.numberOf("tokens"));
    assertEquals("Sentences", 194, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
    // Subset this to all documents that have also the text
    // kc.filter(kf.and("textClass", "kultur"));
    /*
        kc.fromBuilder(
          kc.build().andGroup().with(
            kc.getBuilder()
          ).with(
            kc.build().term("textClass", "kultur")
          )
        );
        */
    kc.filter(kc.build().term("textClass", "kultur"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
    // kc.filter(kf.and("corpusID", "WPD"));
    kc.filter(kc.build().term("corpusID", "WPD"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
    // Create a query
    Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    Result kr = ks.apply(ki);
    /*
        Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
                (short) 5);
        */
    assertEquals(kr.getTotalResults(), 70);
    kc.extend(kc.build().term("textClass", "uninteresting"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    kc.extend(kc.build().term("textClass", "wissenschaft"));
    assertEquals("Documents", 3, kc.numberOf("documents"));
    assertEquals("Tokens", 1669, kc.numberOf("tokens"));
    assertEquals("Sentences", 188, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
}
Also used : Krill(de.ids_mannheim.korap.Krill) SearchContext(de.ids_mannheim.korap.response.SearchContext) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 9 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestKrillCollectionIndex method testIndexWithCollectionBuilder.

@Test
public void testIndexWithCollectionBuilder() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createDoc1());
    ki.addDoc(createDoc2());
    ki.addDoc(createDoc3());
    ki.commit();
    CollectionBuilder cb = new CollectionBuilder();
    KrillCollection kcn = new KrillCollection(ki);
    // Simple string tests
    kcn.fromBuilder(cb.term("author", "Frank"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Peter"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Sebastian"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("author", "Michael"));
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.term("nothing", "nothing"));
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.term("textClass", "reisen"));
    assertEquals(3, kcn.docCount());
    kcn.fromBuilder(cb.term("textClass", "kultur"));
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.term("textClass", "finanzen"));
    assertEquals(1, kcn.docCount());
    // Simple orGroup tests
    kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Michael")));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Sebastian")));
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Sebastian")).with(cb.term("author", "Peter")));
    assertEquals(3, kcn.docCount());
    kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Huhu")).with(cb.term("author", "Haha")).with(cb.term("author", "Hehe")));
    assertEquals(0, kcn.docCount());
    // Multi field orGroup tests
    kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Peter")));
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Frank")));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Michael")));
    assertEquals(1, kcn.docCount());
    // Simple andGroup tests
    kcn.fromBuilder(cb.andGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Michael")));
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.andGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Frank")));
    assertEquals(1, kcn.docCount());
    // andGroup in keyword field test
    kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "finanzen")));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "kultur")));
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "finanzen")).with(cb.term("textClass", "kultur")));
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.term("text", "mann"));
    assertEquals(3, kcn.docCount());
    kcn.fromBuilder(cb.term("text", "frau"));
    assertEquals(1, kcn.docCount());
}
Also used : CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Example 10 with KrillCollection

use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.

the class TestKrillCollectionIndex method testIndexWithNegation.

@Test
public void testIndexWithNegation() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createDoc1());
    ki.addDoc(createDoc2());
    ki.addDoc(createDoc3());
    ki.commit();
    CollectionBuilder cb = new CollectionBuilder();
    KrillCollection kcn = new KrillCollection(ki);
    // Simple negation tests
    kcn.fromBuilder(cb.term("author", "Frank").not());
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.term("textClass", "reisen").not());
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.term("textClass", "kultur").not());
    assertEquals(1, kcn.docCount());
    // orGroup with simple Negation
    kcn.fromBuilder(cb.orGroup().with(cb.term("textClass", "kultur").not()).with(cb.term("author", "Peter")));
    assertEquals(2, kcn.docCount());
    kcn.fromBuilder(cb.orGroup().with(cb.term("textClass", "kultur").not()).with(cb.term("author", "Sebastian")));
    assertEquals(1, kcn.docCount());
}
Also used : CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Aggregations

KrillCollection (de.ids_mannheim.korap.KrillCollection)37 Test (org.junit.Test)35 KrillIndex (de.ids_mannheim.korap.KrillIndex)24 Krill (de.ids_mannheim.korap.Krill)17 Result (de.ids_mannheim.korap.response.Result)15 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)11 SearchContext (de.ids_mannheim.korap.response.SearchContext)5 MMapDirectory (org.apache.lucene.store.MMapDirectory)5 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)4 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)4 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)3 Term (org.apache.lucene.index.Term)2 SpanQuery (org.apache.lucene.search.spans.SpanQuery)2 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)1 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)1 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)1 SpanFocusQuery (de.ids_mannheim.korap.query.SpanFocusQuery)1