Search in sources :

Example 31 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestKrill method searchJSONnewJSON.

@Test
public void searchJSONnewJSON() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828.json.gz"), true);
    ki.commit();
    assertEquals(fd.getUID(), 1);
    assertEquals(fd.getTextSigle(), "GOE_AGA.03828");
    assertEquals(fd.getDocSigle(), "GOE_AGA");
    assertEquals(fd.getCorpusSigle(), "GOE");
    assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
    assertNull(fd.getSubTitle());
    assertEquals(fd.getTextType(), "Autobiographie");
    assertNull(fd.getTextTypeArt());
    assertNull(fd.getTextTypeRef());
    assertNull(fd.getTextColumn());
    assertNull(fd.getTextDomain());
    // assertEquals(fd.getPages(), "529-547");
    assertEquals(fd.getLicense(), "QAO-NC");
    assertEquals(fd.getCreationDate().toString(), "18200000");
    assertEquals(fd.getPubDate().toString(), "19820000");
    assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
    assertNull(fd.getTextClass());
    assertEquals(fd.getLanguage(), "de");
    assertEquals(fd.getPubPlace(), "München");
    assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
    assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
    assertNull(fd.getEditor());
    assertNull(fd.getFileEditionStatement());
    assertNull(fd.getBiblEditionStatement());
    assertNull(fd.getKeywords());
    assertEquals(fd.getTokenSource(), "opennlp#tokens");
    assertEquals(fd.getFoundries(), "base base/paragraphs base/sentences corenlp " + "corenlp/constituency corenlp/morpho " + "corenlp/namedentities corenlp/sentences " + "glemm glemm/morpho mate mate/morpho" + " opennlp opennlp/morpho opennlp/sentences" + " treetagger treetagger/morpho " + "treetagger/sentences");
    assertEquals(fd.getLayerInfos(), "base/s=spans corenlp/c=spans corenlp/ne=tokens" + " corenlp/p=tokens corenlp/s=spans glemm/l=tokens" + " mate/l=tokens mate/m=tokens mate/p=tokens" + " opennlp/p=tokens opennlp/s=spans tt/l=tokens" + " tt/p=tokens tt/s=spans");
    assertEquals(fd.getCorpusTitle(), "Goethes Werke");
    assertNull(fd.getCorpusSubTitle());
    assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
    assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
    assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
    assertNull(fd.getDocSubTitle());
    assertNull(fd.getDocEditor());
    assertNull(fd.getDocAuthor());
    Krill ks = new Krill(new QueryBuilder("tokens").seg("mate/m:case:nom").with("mate/m:number:pl"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 148);
    assertEquals(0, kr.getStartIndex());
    assertEquals(25, kr.getItemsPerPage());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 32 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestKrill method searchIndex.

@Test
public void searchIndex() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    Krill ks = new Krill(new QueryBuilder("tokens").seg("s:Buchstaben"));
    CollectionBuilder cb = new CollectionBuilder();
    ks.getCollection().fromBuilder(cb.term("textClass", "reisen"));
    KrillMeta meta = ks.getMeta();
    meta.setCount(3);
    meta.setStartIndex(5);
    meta.getContext().left.setLength(1);
    meta.getContext().right.setLength(1);
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 6);
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "... dem [[Buchstaben]] A ...");
    JsonNode res = ks.toJsonNode();
    assertEquals(3, res.at("/meta/count").asInt());
    assertEquals(5, res.at("/meta/startIndex").asInt());
    assertEquals("token", res.at("/meta/context/left/0").asText());
    assertEquals(1, res.at("/meta/context/left/1").asInt());
    assertEquals("token", res.at("/meta/context/right/0").asText());
    assertEquals(1, res.at("/meta/context/right/1").asInt());
}
Also used : Krill(de.ids_mannheim.korap.Krill) KrillMeta(de.ids_mannheim.korap.KrillMeta) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) JsonNode(com.fasterxml.jackson.databind.JsonNode) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 33 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestKrill method searchNewDeReKoData.

/**
 * This is a Schreibgebrauch ressource that didn't work for
 * element queries.
 */
@Test
public void searchNewDeReKoData() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828-new.json.gz"), true);
    ki.commit();
    assertEquals(fd.getUID(), 1);
    assertEquals(fd.getTextSigle(), "GOE/AGA/03828");
    assertEquals(fd.getDocSigle(), "GOE/AGA");
    assertEquals(fd.getCorpusSigle(), "GOE");
    assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
    assertNull(fd.getSubTitle());
    assertEquals(fd.getTextType(), "Autobiographie");
    assertNull(fd.getTextTypeArt());
    assertNull(fd.getTextTypeRef());
    assertNull(fd.getTextColumn());
    assertNull(fd.getTextDomain());
    // assertEquals(fd.getPages(), "529-547");
    assertEquals(fd.getLicense(), "QAO-NC");
    assertEquals(fd.getCreationDate().toString(), "18200000");
    assertEquals(fd.getPubDate().toString(), "19820000");
    assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
    assertNull(fd.getTextClass());
    assertEquals(fd.getLanguage(), "de");
    assertEquals(fd.getPubPlace(), "München");
    assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
    assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
    assertNull(fd.getEditor());
    assertNull(fd.getFileEditionStatement());
    assertNull(fd.getBiblEditionStatement());
    assertNull(fd.getKeywords());
    assertEquals(fd.getTokenSource(), "base#tokens");
    assertEquals(fd.getFoundries(), "corenlp corenlp/constituency corenlp/morpho corenlp/sentences dereko dereko/structure dereko/structure/base-sentences-paragraphs-pagebreaks malt malt/dependency marmot marmot/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho");
    assertEquals(fd.getLayerInfos(), "corenlp/c=spans corenlp/p=tokens corenlp/s=spans dereko/s=spans malt/d=rels marmot/m=tokens marmot/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens");
    assertEquals(fd.getCorpusTitle(), "Goethes Werke");
    assertNull(fd.getCorpusSubTitle());
    assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
    assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
    assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
    assertNull(fd.getDocSubTitle());
    assertNull(fd.getDocEditor());
    assertNull(fd.getDocAuthor());
    Krill ks = new Krill(new QueryBuilder("tokens").seg("marmot/m:case:nom").with("marmot/m:number:pl"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 141);
    assertEquals(0, kr.getStartIndex());
    assertEquals(25, kr.getItemsPerPage());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 34 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestKrill method searchJSONwithPagebreaks.

@Test
public void searchJSONwithPagebreaks() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828-pb.json.gz"), true);
    ki.commit();
    assertEquals(fd.getUID(), 1);
    assertEquals(fd.getTextSigle(), "GOE/AGA/03828");
    assertEquals(fd.getDocSigle(), "GOE/AGA");
    assertEquals(fd.getCorpusSigle(), "GOE");
    assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
    assertNull(fd.getSubTitle());
    assertEquals(fd.getTextType(), "Autobiographie");
    assertNull(fd.getTextTypeArt());
    assertNull(fd.getTextTypeRef());
    assertNull(fd.getTextColumn());
    assertNull(fd.getTextDomain());
    // assertEquals(fd.getPages(), "529-547");
    // assertEquals(fd.getAvailability(), "QAO-NC");
    assertEquals(fd.getCreationDate().toString(), "18200000");
    assertEquals(fd.getPubDate().toString(), "19820000");
    assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
    assertNull(fd.getTextClass());
    assertEquals(fd.getLanguage(), "de");
    assertEquals(fd.getPubPlace(), "München");
    assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
    assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
    assertNull(fd.getEditor());
    assertNull(fd.getFileEditionStatement());
    assertNull(fd.getBiblEditionStatement());
    assertNull(fd.getKeywords());
    assertEquals(fd.getTokenSource(), "base#tokens_aggr");
    assertEquals(fd.getFoundries(), "dereko dereko/structure " + "dereko/structure/base-sentences-paragraphs-pagebreaks");
    assertEquals(fd.getLayerInfos(), "dereko/s=spans");
    assertEquals(fd.getCorpusTitle(), "Goethes Werke");
    assertNull(fd.getCorpusSubTitle());
    assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
    assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
    assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
    assertNull(fd.getDocSubTitle());
    assertNull(fd.getDocEditor());
    assertNull(fd.getDocAuthor());
    Krill ks = new Krill(new QueryBuilder("tokens").seg("s:der"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 97);
    assertEquals(0, kr.getStartIndex());
    assertEquals(25, kr.getItemsPerPage());
    Match m = kr.getMatch(5);
    assertEquals("Start page", m.getStartPage(), 529);
    ObjectMapper mapper = new ObjectMapper();
    JsonNode res = mapper.readTree(m.toJsonString());
    assertEquals(529, res.at("/pages/0").asInt());
}
Also used : Krill(de.ids_mannheim.korap.Krill) JsonNode(com.fasterxml.jackson.databind.JsonNode) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Result(de.ids_mannheim.korap.response.Result) Match(de.ids_mannheim.korap.response.Match) Test(org.junit.Test)

Example 35 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestKrill method searchCount.

@Test
public void searchCount() {
    Krill k = new Krill(new QueryBuilder("field1").seg("a").with("b"));
    KrillMeta meta = k.getMeta();
    // Count:
    meta.setCount(30);
    assertEquals(meta.getCount(), 30);
    meta.setCount(20);
    assertEquals(meta.getCount(), 20);
    meta.setCount(-50);
    assertEquals(meta.getCount(), 20);
    meta.setCount(500);
    assertEquals(meta.getCount(), meta.getCountMax());
}
Also used : Krill(de.ids_mannheim.korap.Krill) KrillMeta(de.ids_mannheim.korap.KrillMeta) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) Test(org.junit.Test)

Aggregations

QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)72 Test (org.junit.Test)67 SpanQuery (org.apache.lucene.search.spans.SpanQuery)39 KrillIndex (de.ids_mannheim.korap.KrillIndex)33 Result (de.ids_mannheim.korap.response.Result)32 Krill (de.ids_mannheim.korap.Krill)27 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)14 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)11 Match (de.ids_mannheim.korap.response.Match)8 JsonNode (com.fasterxml.jackson.databind.JsonNode)5 KrillQuery (de.ids_mannheim.korap.KrillQuery)5 QueryException (de.ids_mannheim.korap.util.QueryException)5 KrillMeta (de.ids_mannheim.korap.KrillMeta)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 KrillCollection (de.ids_mannheim.korap.KrillCollection)3 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)3 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)3 SearchContext (de.ids_mannheim.korap.response.SearchContext)3 Test (de.ids_mannheim.korap.Test)2 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)1