Search in sources :

Example 6 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestKrill method searchJSONitemsPerResourceServer.

@Test
public void searchJSONitemsPerResourceServer() throws IOException {
    /*
         * This test is a server-only implementation of
         * TestResource#testCollection
         */
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    int uid = 1;
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(uid++, getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    String json = getJsonString(getClass().getResource("/queries/bsp-uid-example.jsonld").getFile());
    Krill ks = new Krill(json);
    ks.getMeta().setItemsPerResource(1);
    KrillCollection kc = new KrillCollection();
    kc.filterUIDs(new String[] { "1", "4" });
    kc.setIndex(ki);
    ks.setCollection(kc);
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 2);
    assertEquals(0, kr.getStartIndex());
    assertEquals(25, kr.getItemsPerPage());
}
Also used : Krill(de.ids_mannheim.korap.Krill) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 7 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestKrill method searchJSONnewJSON.

@Test
public void searchJSONnewJSON() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828.json.gz"), true);
    ki.commit();
    assertEquals(fd.getUID(), 1);
    assertEquals(fd.getTextSigle(), "GOE_AGA.03828");
    assertEquals(fd.getDocSigle(), "GOE_AGA");
    assertEquals(fd.getCorpusSigle(), "GOE");
    assertEquals(fd.getFieldValue("title"), "Autobiographische Einzelheiten");
    assertNull(fd.getFieldValue("subTitle"));
    assertEquals(fd.getFieldValue("textType"), "Autobiographie");
    assertNull(fd.getFieldValue("textTypeArt"));
    assertNull(fd.getFieldValue("textTypeRef"));
    assertNull(fd.getFieldValue("textColumn"));
    assertNull(fd.getFieldValue("textDomain"));
    // assertEquals(fd.getPages(), "529-547");
    assertEquals(fd.getFieldValue("availability"), "QAO-NC");
    assertEquals(fd.getFieldValue("creationDate"), "1820");
    assertEquals(fd.getFieldValue("pubDate"), "1982");
    assertEquals(fd.getFieldValue("author"), "Goethe, Johann Wolfgang von");
    assertNull(fd.getFieldValue("textClass"));
    assertEquals(fd.getFieldValue("language"), "de");
    assertEquals(fd.getFieldValue("pubPlace"), "München");
    assertEquals(fd.getFieldValue("reference"), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
    assertEquals(fd.getFieldValue("publisher"), "Verlag C. H. Beck");
    assertNull(fd.getFieldValue("editor"));
    assertNull(fd.getFieldValue("fileEditionStatement"));
    assertNull(fd.getFieldValue("biblEditionStatement"));
    assertNull(fd.getFieldValue("keywords"));
    assertEquals(fd.getFieldValue("tokenSource"), "opennlp#tokens");
    assertEquals(fd.getFieldValue("foundries"), "base base/paragraphs base/sentences corenlp " + "corenlp/constituency corenlp/morpho " + "corenlp/namedentities corenlp/sentences " + "glemm glemm/morpho mate mate/morpho" + " opennlp opennlp/morpho opennlp/sentences" + " treetagger treetagger/morpho " + "treetagger/sentences");
    assertEquals(fd.getFieldValue("layerInfos"), "base/s=spans corenlp/c=spans corenlp/ne=tokens" + " corenlp/p=tokens corenlp/s=spans glemm/l=tokens" + " mate/l=tokens mate/m=tokens mate/p=tokens" + " opennlp/p=tokens opennlp/s=spans tt/l=tokens" + " tt/p=tokens tt/s=spans");
    assertEquals(fd.getFieldValue("corpusTitle"), "Goethes Werke");
    assertNull(fd.getFieldValue("corpusSubTitle"));
    assertEquals(fd.getFieldValue("corpusAuthor"), "Goethe, Johann Wolfgang von");
    assertEquals(fd.getFieldValue("corpusEditor"), "Trunz, Erich");
    assertEquals(fd.getFieldValue("docTitle"), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
    assertNull(fd.getFieldValue("docSubTitle"));
    assertNull(fd.getFieldValue("docEditor"));
    assertNull(fd.getFieldValue("docAuthor"));
    Krill ks = new Krill(new QueryBuilder("tokens").seg("mate/m:case:nom").with("mate/m:number:pl"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 148);
    assertEquals(0, kr.getStartIndex());
    assertEquals(25, kr.getItemsPerPage());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 8 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestKrill method searchIndex.

@Test
public void searchIndex() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    Krill ks = new Krill(new QueryBuilder("tokens").seg("s:Buchstaben"));
    CollectionBuilder cb = new CollectionBuilder();
    ks.getCollection().fromBuilder(cb.term("textClass", "reisen"));
    KrillMeta meta = ks.getMeta();
    meta.setCount(3);
    meta.setStartIndex(5);
    meta.getContext().left.setLength(1);
    meta.getContext().right.setLength(1);
    assertTrue(meta.hasSnippets());
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 6);
    assertEquals(kr.getMatches().size(), 1);
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "... dem [[Buchstaben]] A ...");
    JsonNode res = ks.toJsonNode();
    assertEquals(3, res.at("/meta/count").asInt());
    assertEquals(5, res.at("/meta/startIndex").asInt());
    assertEquals("token", res.at("/meta/context/left/0").asText());
    assertEquals(1, res.at("/meta/context/left/1").asInt());
    assertEquals("token", res.at("/meta/context/right/0").asText());
    assertEquals(1, res.at("/meta/context/right/1").asInt());
    assertTrue(res.at("/matches/0/snippet").isMissingNode());
    assertTrue(res.at("/matches/0/tokens").isMissingNode());
    res = kr.toJsonNode();
    assertFalse(res.at("/matches/0/snippet").isMissingNode());
    assertTrue(res.at("/matches/0/tokens").isMissingNode());
    // Handle count=0 correctly
    meta = ks.getMeta();
    meta.setCount(0);
    kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 6);
    assertEquals(kr.getItemsPerPage(), 0);
    assertEquals(kr.getMatches().size(), 0);
    // Handle tokens=true and
    // snippet=false correctly
    meta = ks.getMeta();
    meta.setCount(1);
    meta.setTokens(true);
    meta.setSnippets(false);
    kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 6);
    assertEquals(kr.getMatches().size(), 1);
    res = kr.toJsonNode();
    assertFalse(res.at("/matches/0/hasSnippet").asBoolean());
    assertTrue(res.at("/matches/0/hasTokens").asBoolean());
    assertTrue(res.at("/matches/0/snippet").isMissingNode());
    assertEquals("dem", res.at("/matches/0/tokens/left/0").asText());
    assertEquals("Buchstaben", res.at("/matches/0/tokens/match/0").asText());
}
Also used : Krill(de.ids_mannheim.korap.Krill) KrillMeta(de.ids_mannheim.korap.KrillMeta) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) JsonNode(com.fasterxml.jackson.databind.JsonNode) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 9 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestKrill method queryJSONcosmasSentenceNegationBug.

@Test
public void queryJSONcosmasSentenceNegationBug() throws IOException {
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    String json = getJsonString(getClass().getResource("/queries/bugs/cosmas-exclude.jsonld").getFile());
    Result kr = new Krill(json).apply(ki);
    assertEquals(0, kr.getTotalResults());
}
Also used : Krill(de.ids_mannheim.korap.Krill) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 10 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestKrill method searchJSONmultipleClassesBug.

@Test
public void searchJSONmultipleClassesBug() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    ki.addDoc(1, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
    ki.addDoc(2, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
    ki.commit();
    String json = getJsonString(getClass().getResource("/queries/bugs/multiple_classes.jsonld").getFile());
    Krill ks = new Krill(json);
    Result kr = ks.apply(ki);
    assertEquals(kr.getSerialQuery(), "{4: spanNext({1: spanNext({2: tokens:s:ins}, " + "{3: tokens:s:Leben})}, tokens:s:gerufen)}");
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "... sozialistischen Initiative\" eine neue politische" + " Gruppierung [[{4:{1:{2:ins} {3:Leben}} gerufen}]] hatten. " + "Pressemeldungen zufolge haben sich in ...");
    assertEquals(kr.getTotalResults(), 2);
    assertEquals(0, kr.getStartIndex());
}
Also used : Krill(de.ids_mannheim.korap.Krill) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

KrillIndex (de.ids_mannheim.korap.KrillIndex)320 Test (org.junit.Test)309 Result (de.ids_mannheim.korap.response.Result)143 SpanQuery (org.apache.lucene.search.spans.SpanQuery)132 Term (org.apache.lucene.index.Term)93 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)84 Krill (de.ids_mannheim.korap.Krill)82 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)56 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)42 KrillCollection (de.ids_mannheim.korap.KrillCollection)39 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)38 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)37 Match (de.ids_mannheim.korap.response.Match)37 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)33 JsonNode (com.fasterxml.jackson.databind.JsonNode)28 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)27 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)26 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)25 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)20 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)18