use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestKrill method searchJSONitemsPerResourceServer.
@Test
public void searchJSONitemsPerResourceServer() throws IOException {
/*
* This test is a server-only implementation of
* TestResource#testCollection
*/
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
int uid = 1;
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(uid++, getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
String json = getJsonString(getClass().getResource("/queries/bsp-uid-example.jsonld").getFile());
Krill ks = new Krill(json);
ks.getMeta().setItemsPerResource(1);
KrillCollection kc = new KrillCollection();
kc.filterUIDs(new String[] { "1", "4" });
kc.setIndex(ki);
ks.setCollection(kc);
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 2);
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestKrill method searchJSONnewJSON.
@Test
public void searchJSONnewJSON() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828.json.gz"), true);
ki.commit();
assertEquals(fd.getUID(), 1);
assertEquals(fd.getTextSigle(), "GOE_AGA.03828");
assertEquals(fd.getDocSigle(), "GOE_AGA");
assertEquals(fd.getCorpusSigle(), "GOE");
assertEquals(fd.getFieldValue("title"), "Autobiographische Einzelheiten");
assertNull(fd.getFieldValue("subTitle"));
assertEquals(fd.getFieldValue("textType"), "Autobiographie");
assertNull(fd.getFieldValue("textTypeArt"));
assertNull(fd.getFieldValue("textTypeRef"));
assertNull(fd.getFieldValue("textColumn"));
assertNull(fd.getFieldValue("textDomain"));
// assertEquals(fd.getPages(), "529-547");
assertEquals(fd.getFieldValue("availability"), "QAO-NC");
assertEquals(fd.getFieldValue("creationDate"), "1820");
assertEquals(fd.getFieldValue("pubDate"), "1982");
assertEquals(fd.getFieldValue("author"), "Goethe, Johann Wolfgang von");
assertNull(fd.getFieldValue("textClass"));
assertEquals(fd.getFieldValue("language"), "de");
assertEquals(fd.getFieldValue("pubPlace"), "München");
assertEquals(fd.getFieldValue("reference"), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
assertEquals(fd.getFieldValue("publisher"), "Verlag C. H. Beck");
assertNull(fd.getFieldValue("editor"));
assertNull(fd.getFieldValue("fileEditionStatement"));
assertNull(fd.getFieldValue("biblEditionStatement"));
assertNull(fd.getFieldValue("keywords"));
assertEquals(fd.getFieldValue("tokenSource"), "opennlp#tokens");
assertEquals(fd.getFieldValue("foundries"), "base base/paragraphs base/sentences corenlp " + "corenlp/constituency corenlp/morpho " + "corenlp/namedentities corenlp/sentences " + "glemm glemm/morpho mate mate/morpho" + " opennlp opennlp/morpho opennlp/sentences" + " treetagger treetagger/morpho " + "treetagger/sentences");
assertEquals(fd.getFieldValue("layerInfos"), "base/s=spans corenlp/c=spans corenlp/ne=tokens" + " corenlp/p=tokens corenlp/s=spans glemm/l=tokens" + " mate/l=tokens mate/m=tokens mate/p=tokens" + " opennlp/p=tokens opennlp/s=spans tt/l=tokens" + " tt/p=tokens tt/s=spans");
assertEquals(fd.getFieldValue("corpusTitle"), "Goethes Werke");
assertNull(fd.getFieldValue("corpusSubTitle"));
assertEquals(fd.getFieldValue("corpusAuthor"), "Goethe, Johann Wolfgang von");
assertEquals(fd.getFieldValue("corpusEditor"), "Trunz, Erich");
assertEquals(fd.getFieldValue("docTitle"), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
assertNull(fd.getFieldValue("docSubTitle"));
assertNull(fd.getFieldValue("docEditor"));
assertNull(fd.getFieldValue("docAuthor"));
Krill ks = new Krill(new QueryBuilder("tokens").seg("mate/m:case:nom").with("mate/m:number:pl"));
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 148);
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestKrill method searchIndex.
@Test
public void searchIndex() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
Krill ks = new Krill(new QueryBuilder("tokens").seg("s:Buchstaben"));
CollectionBuilder cb = new CollectionBuilder();
ks.getCollection().fromBuilder(cb.term("textClass", "reisen"));
KrillMeta meta = ks.getMeta();
meta.setCount(3);
meta.setStartIndex(5);
meta.getContext().left.setLength(1);
meta.getContext().right.setLength(1);
assertTrue(meta.hasSnippets());
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 6);
assertEquals(kr.getMatches().size(), 1);
assertEquals(kr.getMatch(0).getSnippetBrackets(), "... dem [[Buchstaben]] A ...");
JsonNode res = ks.toJsonNode();
assertEquals(3, res.at("/meta/count").asInt());
assertEquals(5, res.at("/meta/startIndex").asInt());
assertEquals("token", res.at("/meta/context/left/0").asText());
assertEquals(1, res.at("/meta/context/left/1").asInt());
assertEquals("token", res.at("/meta/context/right/0").asText());
assertEquals(1, res.at("/meta/context/right/1").asInt());
assertTrue(res.at("/matches/0/snippet").isMissingNode());
assertTrue(res.at("/matches/0/tokens").isMissingNode());
res = kr.toJsonNode();
assertFalse(res.at("/matches/0/snippet").isMissingNode());
assertTrue(res.at("/matches/0/tokens").isMissingNode());
// Handle count=0 correctly
meta = ks.getMeta();
meta.setCount(0);
kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 6);
assertEquals(kr.getItemsPerPage(), 0);
assertEquals(kr.getMatches().size(), 0);
// Handle tokens=true and
// snippet=false correctly
meta = ks.getMeta();
meta.setCount(1);
meta.setTokens(true);
meta.setSnippets(false);
kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 6);
assertEquals(kr.getMatches().size(), 1);
res = kr.toJsonNode();
assertFalse(res.at("/matches/0/hasSnippet").asBoolean());
assertTrue(res.at("/matches/0/hasTokens").asBoolean());
assertTrue(res.at("/matches/0/snippet").isMissingNode());
assertEquals("dem", res.at("/matches/0/tokens/left/0").asText());
assertEquals("Buchstaben", res.at("/matches/0/tokens/match/0").asText());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestKrill method queryJSONcosmasSentenceNegationBug.
@Test
public void queryJSONcosmasSentenceNegationBug() throws IOException {
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
String json = getJsonString(getClass().getResource("/queries/bugs/cosmas-exclude.jsonld").getFile());
Result kr = new Krill(json).apply(ki);
assertEquals(0, kr.getTotalResults());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestKrill method searchJSONmultipleClassesBug.
@Test
public void searchJSONmultipleClassesBug() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
ki.addDoc(1, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
ki.addDoc(2, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
ki.commit();
String json = getJsonString(getClass().getResource("/queries/bugs/multiple_classes.jsonld").getFile());
Krill ks = new Krill(json);
Result kr = ks.apply(ki);
assertEquals(kr.getSerialQuery(), "{4: spanNext({1: spanNext({2: tokens:s:ins}, " + "{3: tokens:s:Leben})}, tokens:s:gerufen)}");
assertEquals(kr.getMatch(0).getSnippetBrackets(), "... sozialistischen Initiative\" eine neue politische" + " Gruppierung [[{4:{1:{2:ins} {3:Leben}} gerufen}]] hatten. " + "Pressemeldungen zufolge haben sich in ...");
assertEquals(kr.getTotalResults(), 2);
assertEquals(0, kr.getStartIndex());
}
Aggregations