Search in sources :

Example 36 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrill method searchJSONmultitermRewriteBug.

@Test
public void searchJSONmultitermRewriteBug() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    assertEquals(ki.numberOf("documents"), 0);
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
    ki.commit();
    assertEquals(ki.numberOf("documents"), 1);
    assertEquals("BZK", fd.getCorpusSigle());
    // [tt/p="A.*"]{0,3}[tt/p="N.*"]
    String json = getJsonString(getClass().getResource("/queries/bugs/multiterm_rewrite.jsonld").getFile());
    Krill ks = new Krill(json);
    KrillCollection kc = ks.getCollection();
    // No index was set
    assertEquals(-1, kc.numberOf("documents"));
    kc.setIndex(ki);
    // Index was set but vc restricted to WPD
    assertEquals(0, kc.numberOf("documents"));
    /*
        kc.extend(new CollectionBuilder().or("corpusSigle", "BZK"));
        */
    CollectionBuilder cb = new CollectionBuilder();
    kc.fromBuilder(cb.orGroup().with(kc.getBuilder()).with(cb.term("corpusSigle", "BZK")));
    ks.setCollection(kc);
    assertEquals(1, kc.numberOf("documents"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getSerialQuery(), "spanOr([SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/), " + "spanNext(spanRepetition(SpanMultiTermQueryWrapper" + "(tokens:/tt/p:A.*/){1,3}), " + "SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/))])");
    assertEquals(kr.getTotalResults(), 58);
    assertEquals(0, kr.getStartIndex());
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "[[Saragat-Partei]] zerfällt Rom (ADN) die von dem ...");
    assertEquals(kr.getMatch(1).getSnippetBrackets(), "[[Saragat-Partei]] zerfällt Rom (ADN) die von dem ...");
    assertEquals(kr.getMatch(2).getSnippetBrackets(), "Saragat-Partei zerfällt [[Rom]] (ADN) " + "die von dem Rechtssozialisten Saragat ...");
    assertEquals(kr.getMatch(3).getSnippetBrackets(), "Saragat-Partei zerfällt Rom ([[ADN]]) " + "die von dem Rechtssozialisten Saragat geführte ...");
    assertEquals(kr.getMatch(23).getSnippetBrackets(), "... dem Namen \"Einheitsbewegung der sozialistischen " + "Initiative\" [[eine neue politische Gruppierung]] " + "ins Leben gerufen hatten. Pressemeldungen zufolge ...");
}
Also used : Krill(de.ids_mannheim.korap.Krill) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 37 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrill method searchJSONcosmasBoundaryBug.

@Test
public void searchJSONcosmasBoundaryBug() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
    ki.commit();
    String json = getJsonString(getClass().getResource("/queries/bugs/cosmas_boundary.jsonld").getFile());
    QueryBuilder kq = new QueryBuilder("tokens");
    Krill ks = new Krill(kq.focus(1, kq.contains(kq.tag("base/s:s"), kq.nr(1, kq.seg("s:Leben")))));
    Result kr = ks.apply(ki);
    assertEquals(kr.getSerialQuery(), "focus(1: spanContain(<tokens:base/s:s />, {1: tokens:s:Leben}))");
    assertEquals(40, kr.getMatch(0).getStartPos());
    assertEquals(41, kr.getMatch(0).getEndPos());
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "... Initiative\" eine neue politische Gruppierung ins " + "[[{1:Leben}]] gerufen hatten. Pressemeldungen zufolge haben sich ...");
    // Try with high class - don't highlight
    ks = new Krill(kq.focus(129, kq.contains(kq.tag("base/s:s"), kq.nr(129, kq.seg("s:Leben")))));
    kr = ks.apply(ki);
    assertEquals(kr.getSerialQuery(), "focus(129: spanContain(<tokens:base/s:s />, {129: tokens:s:Leben}))");
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "... Initiative\" eine neue politische Gruppierung ins " + "[[Leben]] gerufen hatten. Pressemeldungen zufolge haben sich ...");
    ks = new Krill(json);
    kr = ks.apply(ki);
    assertEquals(kr.getSerialQuery(), "focus(129: spanElementDistance({129: tokens:s:Namen}, " + "{129: tokens:s:Leben}, [(base/s:s[0:1], notOrdered, notExcluded)]))");
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "... ihren Austritt erklärt und unter dem [[Namen \"Einheitsbewegung " + "der sozialistischen Initiative\" eine neue politische Gruppierung " + "ins Leben]] gerufen hatten. Pressemeldungen zufolge haben sich ...");
    assertEquals(kr.getTotalResults(), 1);
    assertEquals(0, kr.getStartIndex());
}
Also used : Krill(de.ids_mannheim.korap.Krill) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 38 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrill method searchJSONnewJSON2.

@Test
public void searchJSONnewJSON2() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
    ki.commit();
    assertEquals(fd.getUID(), 1);
    assertEquals(fd.getTextSigle(), "BZK_D59.00089");
    assertEquals(fd.getDocSigle(), "BZK_D59");
    assertEquals(fd.getCorpusSigle(), "BZK");
    assertEquals(fd.getTitle(), "Saragat-Partei zerfällt");
    assertEquals(fd.getPubDate().toString(), "19590219");
    assertNull(fd.getSubTitle());
    assertNull(fd.getAuthor());
    assertNull(fd.getEditor());
    assertEquals(fd.getPubPlace(), "Berlin");
    assertNull(fd.getPublisher());
    assertEquals(fd.getTextType(), "Zeitung: Tageszeitung");
    assertNull(fd.getTextTypeArt());
    assertEquals(fd.getTextTypeRef(), "Tageszeitung");
    assertEquals(fd.getTextDomain(), "Politik");
    assertEquals(fd.getCreationDate().toString(), "19590219");
    assertEquals(fd.getLicense(), "ACA-NC-LC");
    assertEquals(fd.getTextColumn(), "POLITIK");
    // assertNull(fd.getPages());
    assertEquals(fd.getTextClass(), "politik ausland");
    assertNull(fd.getFileEditionStatement());
    assertNull(fd.getBiblEditionStatement());
    assertEquals(fd.getLanguage(), "de");
    assertEquals(fd.getReference(), "Neues Deutschland, [Tageszeitung], 19.02.1959, Jg. 14," + " Berliner Ausgabe, S. 7. - Sachgebiet: Politik, " + "Originalressort: POLITIK; Saragat-Partei zerfällt");
    assertNull(fd.getPublisher());
    assertNull(fd.getKeywords());
    assertEquals(fd.getTokenSource(), "opennlp#tokens");
    assertEquals(fd.getFoundries(), "base base/paragraphs base/sentences corenlp " + "corenlp/constituency corenlp/morpho corenlp/namedentities" + " corenlp/sentences glemm glemm/morpho mate mate/morpho" + " opennlp opennlp/morpho opennlp/sentences treetagger" + " treetagger/morpho treetagger/sentences");
    assertEquals(fd.getLayerInfos(), "base/s=spans corenlp/c=spans corenlp/ne=tokens" + " corenlp/p=tokens corenlp/s=spans glemm/l=tokens" + " mate/l=tokens mate/m=tokens mate/p=tokens" + " opennlp/p=tokens opennlp/s=spans tt/l=tokens" + " tt/p=tokens tt/s=spans");
    assertEquals(fd.getCorpusTitle(), "Bonner Zeitungskorpus");
    assertNull(fd.getCorpusSubTitle());
    assertNull(fd.getCorpusAuthor());
    assertNull(fd.getCorpusEditor());
    assertEquals(fd.getDocTitle(), "Neues Deutschland");
    assertEquals(fd.getDocSubTitle(), "Organ des Zentralkomitees der Sozialistischen " + "Einheitspartei Deutschlands");
    assertNull(fd.getDocEditor());
    assertNull(fd.getDocAuthor());
    Krill ks = new Krill(new QueryBuilder("tokens").seg("mate/m:case:nom").with("mate/m:number:sg"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 6);
    assertEquals(0, kr.getStartIndex());
    assertEquals(25, kr.getItemsPerPage());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 39 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestResult method checkJSONResult.

@Test
public void checkJSONResult() throws Exception {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addString("ID", "doc-1");
    fd.addString("UID", "1");
    fd.addTV("base", "abab", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addString("ID", "doc-2");
    fd.addString("UID", "2");
    fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]");
    ki.addDoc(fd);
    // Commit!
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQuery q = (SpanQuery) kq.or(kq.nr(1, kq.seg("s:a"))).or(kq.nr(2, kq.seg("s:b"))).toQuery();
    Result kr = ki.search(q);
    assertEquals((long) 7, kr.getTotalResults());
    ObjectMapper mapper = new ObjectMapper();
    JsonNode res = mapper.readTree(kr.toJsonString());
    assertEquals(7, res.at("/meta/totalResults").asInt());
    assertEquals("spanOr([{1: base:s:a}, {2: base:s:b}])", res.at("/meta/serialQuery").asText());
    assertEquals(0, res.at("/startIndex").asInt());
    assertEquals(25, res.at("/meta/itemsPerPage").asInt());
    assertEquals("token", res.at("/meta/context/left/0").asText());
    assertEquals(6, res.at("/meta/context/left/1").asInt());
    assertEquals("token", res.at("/meta/context/right/0").asText());
    assertEquals(6, res.at("/meta/context/right/1").asInt());
    assertEquals("base", res.at("/matches/0/field").asText());
    /*
          Probably a Jackson bug
          assertTrue(res.at("/matches/0/startMore").asBoolean());
          assertTrue(res.at("/matches/0/endMore").asBoolean());
        */
    assertEquals(1, res.at("/matches/0/UID").asInt());
    assertEquals("doc-1", res.at("/matches/0/docID").asText());
    assertEquals("match-doc-1-p0-1(1)0-0", res.at("/matches/0/matchID").asText());
    assertEquals("<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-1 level-0\">a</mark></mark></span><span class=\"context-right\">bab</span>", res.at("/matches/0/snippet").asText());
    assertEquals("base", res.at("/matches/6/field").asText());
    /*
          Probably a Jackson bug
          assertEquals(true, res.at("/matches/6/startMore").asBoolean());
          assertEquals(true, res.at("/matches/6/endMore").asBoolean());
        */
    assertEquals(2, res.at("/matches/6/UID").asInt());
    assertEquals("doc-2", res.at("/matches/6/docID").asText());
    assertEquals("match-doc-2-p2-3(1)2-2", res.at("/matches/6/matchID").asText());
    assertEquals("<span class=\"context-left\">ab</span><span class=\"match\"><mark><mark class=\"class-1 level-0\">a</mark></mark></span><span class=\"context-right\"></span>", res.at("/matches/6/snippet").asText());
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test) Test(de.ids_mannheim.korap.Test)

Example 40 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestResult method checkJSONResultForJSONInput.

@Test
public void checkJSONResultForJSONInput() throws Exception {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addString("ID", "doc-1");
    fd.addString("UID", "1");
    fd.addTV("tokens", "abab", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addString("ID", "doc-2");
    fd.addString("UID", "2");
    fd.addTV("tokens", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]");
    ki.addDoc(fd);
    // Commit!
    ki.commit();
    String json = getString(getClass().getResource("/queries/bsp-result-check.jsonld").getFile());
    Krill ks = new Krill(json);
    Result kr = ks.apply(ki);
    assertEquals((long) 7, kr.getTotalResults());
    ObjectMapper mapper = new ObjectMapper();
    JsonNode res = mapper.readTree(kr.toJsonString());
    assertEquals(7, res.at("/meta/totalResults").asInt());
    assertEquals("spanOr([tokens:s:a, tokens:s:b])", res.at("/meta/serialQuery").asText());
    assertEquals(5, res.at("/meta/itemsPerPage").asInt());
    assertEquals(0, res.at("/meta/startIndex").asInt());
    // Request meta
    // assertEquals(1, res.at("/meta/startPage").asInt());
    assertEquals(5, res.at("/meta/count").asInt());
    assertEquals("token", res.at("/meta/context/left/0").asText());
    assertEquals(3, res.at("/meta/context/left/1").asInt());
    assertEquals("char", res.at("/meta/context/right/0").asText());
    assertEquals(6, res.at("/meta/context/right/1").asInt());
    assertEquals("token", res.at("/meta/context/left/0").asText());
    assertEquals(3, res.at("/meta/context/left/1").asInt());
    assertEquals("char", res.at("/meta/context/right/0").asText());
    assertEquals(6, res.at("/meta/context/right/1").asInt());
    // Query
    assertEquals("koral:group", res.at("/query/@type").asText());
    assertEquals("operation:or", res.at("/query/operation").asText());
    assertEquals("koral:token", res.at("/query/operands/0/@type").asText());
    assertEquals("koral:term", res.at("/query/operands/0/wrap/@type").asText());
    assertEquals("orth", res.at("/query/operands/0/wrap/layer").asText());
    assertEquals("a", res.at("/query/operands/0/wrap/key").asText());
    assertEquals("match:eq", res.at("/query/operands/0/wrap/match").asText());
    assertEquals("koral:token", res.at("/query/operands/1/@type").asText());
    assertEquals("koral:term", res.at("/query/operands/1/wrap/@type").asText());
    assertEquals("orth", res.at("/query/operands/1/wrap/layer").asText());
    assertEquals("b", res.at("/query/operands/1/wrap/key").asText());
    assertEquals("match:eq", res.at("/query/operands/1/wrap/match").asText());
    // Matches
    assertEquals(1, res.at("/matches/0/UID").asInt());
    assertEquals("doc-1", res.at("/matches/0/docID").asText());
    assertEquals("match-doc-1-p0-1", res.at("/matches/0/matchID").asText());
    assertEquals("<span class=\"context-left\"></span><span class=\"match\"><mark>a</mark></span><span class=\"context-right\">bab</span>", res.at("/matches/0/snippet").asText());
    // No primaryData serialization
    assertTrue(res.at("/matches/0/primaryData").isMissingNode());
}
Also used : Krill(de.ids_mannheim.korap.Krill) JsonNode(com.fasterxml.jackson.databind.JsonNode) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test) Test(de.ids_mannheim.korap.Test)

Aggregations

FieldDocument (de.ids_mannheim.korap.index.FieldDocument)40 KrillIndex (de.ids_mannheim.korap.KrillIndex)28 Test (org.junit.Test)28 Result (de.ids_mannheim.korap.response.Result)20 Krill (de.ids_mannheim.korap.Krill)15 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)14 JsonNode (com.fasterxml.jackson.databind.JsonNode)7 Match (de.ids_mannheim.korap.response.Match)7 SpanQuery (org.apache.lucene.search.spans.SpanQuery)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)6 KrillCollection (de.ids_mannheim.korap.KrillCollection)4 Test (de.ids_mannheim.korap.Test)4 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)2 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)2 MultiTermTokenStream (de.ids_mannheim.korap.index.MultiTermTokenStream)1 Response (de.ids_mannheim.korap.response.Response)1 SearchContext (de.ids_mannheim.korap.response.SearchContext)1 Consumes (javax.ws.rs.Consumes)1 PUT (javax.ws.rs.PUT)1 Path (javax.ws.rs.Path)1