use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.
the class TestKrill method searchJSONnewJSON.
@Test
public void searchJSONnewJSON() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828.json.gz"), true);
ki.commit();
assertEquals(fd.getUID(), 1);
assertEquals(fd.getTextSigle(), "GOE_AGA.03828");
assertEquals(fd.getDocSigle(), "GOE_AGA");
assertEquals(fd.getCorpusSigle(), "GOE");
assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
assertNull(fd.getSubTitle());
assertEquals(fd.getTextType(), "Autobiographie");
assertNull(fd.getTextTypeArt());
assertNull(fd.getTextTypeRef());
assertNull(fd.getTextColumn());
assertNull(fd.getTextDomain());
// assertEquals(fd.getPages(), "529-547");
assertEquals(fd.getLicense(), "QAO-NC");
assertEquals(fd.getCreationDate().toString(), "18200000");
assertEquals(fd.getPubDate().toString(), "19820000");
assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
assertNull(fd.getTextClass());
assertEquals(fd.getLanguage(), "de");
assertEquals(fd.getPubPlace(), "München");
assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
assertNull(fd.getEditor());
assertNull(fd.getFileEditionStatement());
assertNull(fd.getBiblEditionStatement());
assertNull(fd.getKeywords());
assertEquals(fd.getTokenSource(), "opennlp#tokens");
assertEquals(fd.getFoundries(), "base base/paragraphs base/sentences corenlp " + "corenlp/constituency corenlp/morpho " + "corenlp/namedentities corenlp/sentences " + "glemm glemm/morpho mate mate/morpho" + " opennlp opennlp/morpho opennlp/sentences" + " treetagger treetagger/morpho " + "treetagger/sentences");
assertEquals(fd.getLayerInfos(), "base/s=spans corenlp/c=spans corenlp/ne=tokens" + " corenlp/p=tokens corenlp/s=spans glemm/l=tokens" + " mate/l=tokens mate/m=tokens mate/p=tokens" + " opennlp/p=tokens opennlp/s=spans tt/l=tokens" + " tt/p=tokens tt/s=spans");
assertEquals(fd.getCorpusTitle(), "Goethes Werke");
assertNull(fd.getCorpusSubTitle());
assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
assertNull(fd.getDocSubTitle());
assertNull(fd.getDocEditor());
assertNull(fd.getDocAuthor());
Krill ks = new Krill(new QueryBuilder("tokens").seg("mate/m:case:nom").with("mate/m:number:pl"));
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 148);
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
}
use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.
the class TestKrill method searchIndex.
@Test
public void searchIndex() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
Krill ks = new Krill(new QueryBuilder("tokens").seg("s:Buchstaben"));
CollectionBuilder cb = new CollectionBuilder();
ks.getCollection().fromBuilder(cb.term("textClass", "reisen"));
KrillMeta meta = ks.getMeta();
meta.setCount(3);
meta.setStartIndex(5);
meta.getContext().left.setLength(1);
meta.getContext().right.setLength(1);
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 6);
assertEquals(kr.getMatch(0).getSnippetBrackets(), "... dem [[Buchstaben]] A ...");
JsonNode res = ks.toJsonNode();
assertEquals(3, res.at("/meta/count").asInt());
assertEquals(5, res.at("/meta/startIndex").asInt());
assertEquals("token", res.at("/meta/context/left/0").asText());
assertEquals(1, res.at("/meta/context/left/1").asInt());
assertEquals("token", res.at("/meta/context/right/0").asText());
assertEquals(1, res.at("/meta/context/right/1").asInt());
}
use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.
the class TestKrill method searchNewDeReKoData.
/**
* This is a Schreibgebrauch ressource that didn't work for
* element queries.
*/
@Test
public void searchNewDeReKoData() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
// Indexing test files
FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828-new.json.gz"), true);
ki.commit();
assertEquals(fd.getUID(), 1);
assertEquals(fd.getTextSigle(), "GOE/AGA/03828");
assertEquals(fd.getDocSigle(), "GOE/AGA");
assertEquals(fd.getCorpusSigle(), "GOE");
assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
assertNull(fd.getSubTitle());
assertEquals(fd.getTextType(), "Autobiographie");
assertNull(fd.getTextTypeArt());
assertNull(fd.getTextTypeRef());
assertNull(fd.getTextColumn());
assertNull(fd.getTextDomain());
// assertEquals(fd.getPages(), "529-547");
assertEquals(fd.getLicense(), "QAO-NC");
assertEquals(fd.getCreationDate().toString(), "18200000");
assertEquals(fd.getPubDate().toString(), "19820000");
assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
assertNull(fd.getTextClass());
assertEquals(fd.getLanguage(), "de");
assertEquals(fd.getPubPlace(), "München");
assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
assertNull(fd.getEditor());
assertNull(fd.getFileEditionStatement());
assertNull(fd.getBiblEditionStatement());
assertNull(fd.getKeywords());
assertEquals(fd.getTokenSource(), "base#tokens");
assertEquals(fd.getFoundries(), "corenlp corenlp/constituency corenlp/morpho corenlp/sentences dereko dereko/structure dereko/structure/base-sentences-paragraphs-pagebreaks malt malt/dependency marmot marmot/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho");
assertEquals(fd.getLayerInfos(), "corenlp/c=spans corenlp/p=tokens corenlp/s=spans dereko/s=spans malt/d=rels marmot/m=tokens marmot/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens");
assertEquals(fd.getCorpusTitle(), "Goethes Werke");
assertNull(fd.getCorpusSubTitle());
assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
assertNull(fd.getDocSubTitle());
assertNull(fd.getDocEditor());
assertNull(fd.getDocAuthor());
Krill ks = new Krill(new QueryBuilder("tokens").seg("marmot/m:case:nom").with("marmot/m:number:pl"));
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 141);
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
}
use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.
the class TestKrill method searchJSONwithPagebreaks.
@Test
public void searchJSONwithPagebreaks() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828-pb.json.gz"), true);
ki.commit();
assertEquals(fd.getUID(), 1);
assertEquals(fd.getTextSigle(), "GOE/AGA/03828");
assertEquals(fd.getDocSigle(), "GOE/AGA");
assertEquals(fd.getCorpusSigle(), "GOE");
assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
assertNull(fd.getSubTitle());
assertEquals(fd.getTextType(), "Autobiographie");
assertNull(fd.getTextTypeArt());
assertNull(fd.getTextTypeRef());
assertNull(fd.getTextColumn());
assertNull(fd.getTextDomain());
// assertEquals(fd.getPages(), "529-547");
// assertEquals(fd.getAvailability(), "QAO-NC");
assertEquals(fd.getCreationDate().toString(), "18200000");
assertEquals(fd.getPubDate().toString(), "19820000");
assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
assertNull(fd.getTextClass());
assertEquals(fd.getLanguage(), "de");
assertEquals(fd.getPubPlace(), "München");
assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
assertNull(fd.getEditor());
assertNull(fd.getFileEditionStatement());
assertNull(fd.getBiblEditionStatement());
assertNull(fd.getKeywords());
assertEquals(fd.getTokenSource(), "base#tokens_aggr");
assertEquals(fd.getFoundries(), "dereko dereko/structure " + "dereko/structure/base-sentences-paragraphs-pagebreaks");
assertEquals(fd.getLayerInfos(), "dereko/s=spans");
assertEquals(fd.getCorpusTitle(), "Goethes Werke");
assertNull(fd.getCorpusSubTitle());
assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
assertNull(fd.getDocSubTitle());
assertNull(fd.getDocEditor());
assertNull(fd.getDocAuthor());
Krill ks = new Krill(new QueryBuilder("tokens").seg("s:der"));
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 97);
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
Match m = kr.getMatch(5);
assertEquals("Start page", m.getStartPage(), 529);
ObjectMapper mapper = new ObjectMapper();
JsonNode res = mapper.readTree(m.toJsonString());
assertEquals(529, res.at("/pages/0").asInt());
}
use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.
the class TestKrill method searchCount.
@Test
public void searchCount() {
Krill k = new Krill(new QueryBuilder("field1").seg("a").with("b"));
KrillMeta meta = k.getMeta();
// Count:
meta.setCount(30);
assertEquals(meta.getCount(), 30);
meta.setCount(20);
assertEquals(meta.getCount(), 20);
meta.setCount(-50);
assertEquals(meta.getCount(), 20);
meta.setCount(500);
assertEquals(meta.getCount(), meta.getCountMax());
}
Aggregations