Search in sources :

Example 16 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrill method searchNewDeReKoData.

/**
 * This is a Schreibgebrauch ressource that didn't work for
 * element queries.
 */
@Test
public void searchNewDeReKoData() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828-new.json.gz"), true);
    ki.commit();
    assertEquals(fd.getUID(), 1);
    assertEquals(fd.getTextSigle(), "GOE/AGA/03828");
    assertEquals(fd.getDocSigle(), "GOE/AGA");
    assertEquals(fd.getCorpusSigle(), "GOE");
    assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
    assertNull(fd.getSubTitle());
    assertEquals(fd.getTextType(), "Autobiographie");
    assertNull(fd.getTextTypeArt());
    assertNull(fd.getTextTypeRef());
    assertNull(fd.getTextColumn());
    assertNull(fd.getTextDomain());
    // assertEquals(fd.getPages(), "529-547");
    assertEquals(fd.getLicense(), "QAO-NC");
    assertEquals(fd.getCreationDate().toString(), "18200000");
    assertEquals(fd.getPubDate().toString(), "19820000");
    assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
    assertNull(fd.getTextClass());
    assertEquals(fd.getLanguage(), "de");
    assertEquals(fd.getPubPlace(), "München");
    assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
    assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
    assertNull(fd.getEditor());
    assertNull(fd.getFileEditionStatement());
    assertNull(fd.getBiblEditionStatement());
    assertNull(fd.getKeywords());
    assertEquals(fd.getTokenSource(), "base#tokens");
    assertEquals(fd.getFoundries(), "corenlp corenlp/constituency corenlp/morpho corenlp/sentences dereko dereko/structure dereko/structure/base-sentences-paragraphs-pagebreaks malt malt/dependency marmot marmot/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho");
    assertEquals(fd.getLayerInfos(), "corenlp/c=spans corenlp/p=tokens corenlp/s=spans dereko/s=spans malt/d=rels marmot/m=tokens marmot/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens");
    assertEquals(fd.getCorpusTitle(), "Goethes Werke");
    assertNull(fd.getCorpusSubTitle());
    assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
    assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
    assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
    assertNull(fd.getDocSubTitle());
    assertNull(fd.getDocEditor());
    assertNull(fd.getDocAuthor());
    Krill ks = new Krill(new QueryBuilder("tokens").seg("marmot/m:case:nom").with("marmot/m:number:pl"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 141);
    assertEquals(0, kr.getStartIndex());
    assertEquals(25, kr.getItemsPerPage());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 17 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrill method searchJSONwithPagebreaks.

@Test
public void searchJSONwithPagebreaks() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828-pb.json.gz"), true);
    ki.commit();
    assertEquals(fd.getUID(), 1);
    assertEquals(fd.getTextSigle(), "GOE/AGA/03828");
    assertEquals(fd.getDocSigle(), "GOE/AGA");
    assertEquals(fd.getCorpusSigle(), "GOE");
    assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
    assertNull(fd.getSubTitle());
    assertEquals(fd.getTextType(), "Autobiographie");
    assertNull(fd.getTextTypeArt());
    assertNull(fd.getTextTypeRef());
    assertNull(fd.getTextColumn());
    assertNull(fd.getTextDomain());
    // assertEquals(fd.getPages(), "529-547");
    // assertEquals(fd.getAvailability(), "QAO-NC");
    assertEquals(fd.getCreationDate().toString(), "18200000");
    assertEquals(fd.getPubDate().toString(), "19820000");
    assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
    assertNull(fd.getTextClass());
    assertEquals(fd.getLanguage(), "de");
    assertEquals(fd.getPubPlace(), "München");
    assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
    assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
    assertNull(fd.getEditor());
    assertNull(fd.getFileEditionStatement());
    assertNull(fd.getBiblEditionStatement());
    assertNull(fd.getKeywords());
    assertEquals(fd.getTokenSource(), "base#tokens_aggr");
    assertEquals(fd.getFoundries(), "dereko dereko/structure " + "dereko/structure/base-sentences-paragraphs-pagebreaks");
    assertEquals(fd.getLayerInfos(), "dereko/s=spans");
    assertEquals(fd.getCorpusTitle(), "Goethes Werke");
    assertNull(fd.getCorpusSubTitle());
    assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
    assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
    assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
    assertNull(fd.getDocSubTitle());
    assertNull(fd.getDocEditor());
    assertNull(fd.getDocAuthor());
    Krill ks = new Krill(new QueryBuilder("tokens").seg("s:der"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 97);
    assertEquals(0, kr.getStartIndex());
    assertEquals(25, kr.getItemsPerPage());
    Match m = kr.getMatch(5);
    assertEquals("Start page", m.getStartPage(), 529);
    ObjectMapper mapper = new ObjectMapper();
    JsonNode res = mapper.readTree(m.toJsonString());
    assertEquals(529, res.at("/pages/0").asInt());
}
Also used : Krill(de.ids_mannheim.korap.Krill) JsonNode(com.fasterxml.jackson.databind.JsonNode) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Result(de.ids_mannheim.korap.response.Result) Match(de.ids_mannheim.korap.response.Match) Test(org.junit.Test)

Example 18 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestResult method checkJSONResultWarningBug.

@Test
public void checkJSONResultWarningBug() throws Exception {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addString("ID", "doc-1");
    fd.addString("UID", "1");
    fd.addTV("tokens", "abab", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]");
    ki.addDoc(fd);
    ki.commit();
    String json = getString(getClass().getResource("/queries/bugs/optionality_warning.jsonld").getFile());
    Krill ks = new Krill(json);
    Result kr = ks.apply(ki);
    assertEquals((long) 2, kr.getTotalResults());
    ObjectMapper mapper = new ObjectMapper();
    JsonNode res = mapper.readTree(kr.toJsonString());
    // Old:
    // assertEquals("Optionality of query is ignored", res.at("/warning").asText());
    assertEquals("Optionality of query is ignored", res.at("/warnings/0/1").asText());
}
Also used : Krill(de.ids_mannheim.korap.Krill) JsonNode(com.fasterxml.jackson.databind.JsonNode) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test) Test(de.ids_mannheim.korap.Test)

Example 19 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestResult method checkJSONTokenResult.

@Test
public void checkJSONTokenResult() throws Exception {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addString("ID", "doc-1");
    fd.addString("UID", "1");
    fd.addTV("base", "abab", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addString("ID", "doc-2");
    fd.addString("UID", "2");
    fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]");
    ki.addDoc(fd);
    // Commit!
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQuery q = (SpanQuery) kq.seq(kq.seg("s:a")).append(kq.seg("s:b")).toQuery();
    Result kr = ki.search(q);
    assertEquals((long) 3, kr.getTotalResults());
    ObjectMapper mapper = new ObjectMapper();
    JsonNode res = mapper.readTree(kr.toTokenListJsonString());
    assertEquals(3, res.at("/meta/totalResults").asInt());
    assertEquals("spanNext(base:s:a, base:s:b)", res.at("/meta/serialQuery").asText());
    assertEquals(0, res.at("/meta/startIndex").asInt());
    assertEquals(25, res.at("/meta/itemsPerPage").asInt());
    assertEquals("doc-1", res.at("/matches/0/textSigle").asText());
    assertEquals(0, res.at("/matches/0/tokens/0/0").asInt());
    assertEquals(1, res.at("/matches/0/tokens/0/1").asInt());
    assertEquals(1, res.at("/matches/0/tokens/1/0").asInt());
    assertEquals(2, res.at("/matches/0/tokens/1/1").asInt());
    assertEquals("doc-1", res.at("/matches/1/textSigle").asText());
    assertEquals(2, res.at("/matches/1/tokens/0/0").asInt());
    assertEquals(3, res.at("/matches/1/tokens/0/1").asInt());
    assertEquals(3, res.at("/matches/1/tokens/1/0").asInt());
    assertEquals(4, res.at("/matches/1/tokens/1/1").asInt());
    assertEquals("doc-2", res.at("/matches/2/textSigle").asText());
    assertEquals(0, res.at("/matches/2/tokens/0/0").asInt());
    assertEquals(1, res.at("/matches/2/tokens/0/1").asInt());
    assertEquals(1, res.at("/matches/2/tokens/1/0").asInt());
    assertEquals(2, res.at("/matches/2/tokens/1/1").asInt());
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test) Test(de.ids_mannheim.korap.Test)

Example 20 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestMetaFields method searchCollectionFields.

@Test
public void searchCollectionFields() throws IOException {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addString("corpusSigle", "ABC");
    fd.addString("docSigle", "ABC-123");
    fd.addString("textSigle", "ABC-123-0001");
    fd.addText("title", "Die Wahlverwandschaften");
    fd.addText("author", "Johann Wolfgang von Goethe");
    fd.addKeyword("textClass", "reisen wissenschaft");
    fd.addInt("pubDate", 20130617);
    fd.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
    ki.addDoc(fd);
    FieldDocument fd2 = new FieldDocument();
    fd2.addString("corpusSigle", "ABC");
    fd2.addString("docSigle", "ABC-125");
    fd2.addString("textSigle", "ABC-125-0001");
    fd2.addText("title", "Die Glocke");
    fd2.addText("author", "Schiller, Friedrich");
    fd2.addKeyword("textClass", "Reisen geschichte");
    fd2.addInt("pubDate", 20130203);
    fd2.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
    ki.addDoc(fd2);
    ki.commit();
    // textClass = reisen & wissenschaft
    String jsonString = getJsonString(getClass().getResource("/queries/collections/collection_textClass.jsonld").getFile());
    Krill ks = new Krill(jsonString);
    KrillCollection kc = ks.getCollection();
    kc.setIndex(ki);
    assertEquals(1, kc.numberOf("documents"));
    // textClass = reisen
    jsonString = getJsonString(getClass().getResource("/queries/collections/collection_textClass_2.jsonld").getFile());
    ks = new Krill(jsonString);
    kc = ks.getCollection();
    kc.setIndex(ki);
    assertEquals(2, kc.numberOf("documents"));
    /*
        TokenStream ts = fd2.doc.getField("author").tokenStream(
            (Analyzer) ki.writer().getAnalyzer(),
            (TokenStream) null
                                                                  );
        // OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);
        CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
        
        ts.reset();
        while (ts.incrementToken()) {
            String term = charTermAttribute.toString();
            System.err.println(">>" + term + "<<");
        };
        */
    // author = wolfgang
    jsonString = getJsonString(getClass().getResource("/queries/collections/collection_goethe.jsonld").getFile());
    ks = new Krill(jsonString);
    kc = ks.getCollection();
    kc.setIndex(ki);
    assertEquals(1, kc.numberOf("documents"));
    // author = Wolfgang
    jsonString = getJsonString(getClass().getResource("/queries/collections/collection_goethe_2.jsonld").getFile());
    ks = new Krill(jsonString);
    kc = ks.getCollection();
    kc.setIndex(ki);
    assertEquals(1, kc.numberOf("documents"));
    Result kr = ks.apply(ki);
    ObjectMapper mapper = new ObjectMapper();
    JsonNode res = mapper.readTree(kr.toJsonString());
    assertEquals(1, res.at("/meta/totalResults").asInt());
}
Also used : Krill(de.ids_mannheim.korap.Krill) JsonNode(com.fasterxml.jackson.databind.JsonNode) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

FieldDocument (de.ids_mannheim.korap.index.FieldDocument)40 KrillIndex (de.ids_mannheim.korap.KrillIndex)28 Test (org.junit.Test)28 Result (de.ids_mannheim.korap.response.Result)20 Krill (de.ids_mannheim.korap.Krill)15 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)14 JsonNode (com.fasterxml.jackson.databind.JsonNode)7 Match (de.ids_mannheim.korap.response.Match)7 SpanQuery (org.apache.lucene.search.spans.SpanQuery)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)6 KrillCollection (de.ids_mannheim.korap.KrillCollection)4 Test (de.ids_mannheim.korap.Test)4 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)2 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)2 MultiTermTokenStream (de.ids_mannheim.korap.index.MultiTermTokenStream)1 Response (de.ids_mannheim.korap.response.Response)1 SearchContext (de.ids_mannheim.korap.response.SearchContext)1 Consumes (javax.ws.rs.Consumes)1 PUT (javax.ws.rs.PUT)1 Path (javax.ws.rs.Path)1