use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestKrill method searchNewDeReKoData.
/**
* This is a Schreibgebrauch ressource that didn't work for
* element queries.
*/
@Test
public void searchNewDeReKoData() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
// Indexing test files
FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828-new.json.gz"), true);
ki.commit();
assertEquals(fd.getUID(), 1);
assertEquals(fd.getTextSigle(), "GOE/AGA/03828");
assertEquals(fd.getDocSigle(), "GOE/AGA");
assertEquals(fd.getCorpusSigle(), "GOE");
assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
assertNull(fd.getSubTitle());
assertEquals(fd.getTextType(), "Autobiographie");
assertNull(fd.getTextTypeArt());
assertNull(fd.getTextTypeRef());
assertNull(fd.getTextColumn());
assertNull(fd.getTextDomain());
// assertEquals(fd.getPages(), "529-547");
assertEquals(fd.getLicense(), "QAO-NC");
assertEquals(fd.getCreationDate().toString(), "18200000");
assertEquals(fd.getPubDate().toString(), "19820000");
assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
assertNull(fd.getTextClass());
assertEquals(fd.getLanguage(), "de");
assertEquals(fd.getPubPlace(), "München");
assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
assertNull(fd.getEditor());
assertNull(fd.getFileEditionStatement());
assertNull(fd.getBiblEditionStatement());
assertNull(fd.getKeywords());
assertEquals(fd.getTokenSource(), "base#tokens");
assertEquals(fd.getFoundries(), "corenlp corenlp/constituency corenlp/morpho corenlp/sentences dereko dereko/structure dereko/structure/base-sentences-paragraphs-pagebreaks malt malt/dependency marmot marmot/morpho opennlp opennlp/morpho opennlp/sentences treetagger treetagger/morpho");
assertEquals(fd.getLayerInfos(), "corenlp/c=spans corenlp/p=tokens corenlp/s=spans dereko/s=spans malt/d=rels marmot/m=tokens marmot/p=tokens opennlp/p=tokens opennlp/s=spans tt/l=tokens tt/p=tokens");
assertEquals(fd.getCorpusTitle(), "Goethes Werke");
assertNull(fd.getCorpusSubTitle());
assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
assertNull(fd.getDocSubTitle());
assertNull(fd.getDocEditor());
assertNull(fd.getDocAuthor());
Krill ks = new Krill(new QueryBuilder("tokens").seg("marmot/m:case:nom").with("marmot/m:number:pl"));
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 141);
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestKrill method searchJSONwithPagebreaks.
@Test
public void searchJSONwithPagebreaks() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828-pb.json.gz"), true);
ki.commit();
assertEquals(fd.getUID(), 1);
assertEquals(fd.getTextSigle(), "GOE/AGA/03828");
assertEquals(fd.getDocSigle(), "GOE/AGA");
assertEquals(fd.getCorpusSigle(), "GOE");
assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
assertNull(fd.getSubTitle());
assertEquals(fd.getTextType(), "Autobiographie");
assertNull(fd.getTextTypeArt());
assertNull(fd.getTextTypeRef());
assertNull(fd.getTextColumn());
assertNull(fd.getTextDomain());
// assertEquals(fd.getPages(), "529-547");
// assertEquals(fd.getAvailability(), "QAO-NC");
assertEquals(fd.getCreationDate().toString(), "18200000");
assertEquals(fd.getPubDate().toString(), "19820000");
assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
assertNull(fd.getTextClass());
assertEquals(fd.getLanguage(), "de");
assertEquals(fd.getPubPlace(), "München");
assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
assertNull(fd.getEditor());
assertNull(fd.getFileEditionStatement());
assertNull(fd.getBiblEditionStatement());
assertNull(fd.getKeywords());
assertEquals(fd.getTokenSource(), "base#tokens_aggr");
assertEquals(fd.getFoundries(), "dereko dereko/structure " + "dereko/structure/base-sentences-paragraphs-pagebreaks");
assertEquals(fd.getLayerInfos(), "dereko/s=spans");
assertEquals(fd.getCorpusTitle(), "Goethes Werke");
assertNull(fd.getCorpusSubTitle());
assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
assertNull(fd.getDocSubTitle());
assertNull(fd.getDocEditor());
assertNull(fd.getDocAuthor());
Krill ks = new Krill(new QueryBuilder("tokens").seg("s:der"));
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 97);
assertEquals(0, kr.getStartIndex());
assertEquals(25, kr.getItemsPerPage());
Match m = kr.getMatch(5);
assertEquals("Start page", m.getStartPage(), 529);
ObjectMapper mapper = new ObjectMapper();
JsonNode res = mapper.readTree(m.toJsonString());
assertEquals(529, res.at("/pages/0").asInt());
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestResult method checkJSONResultWarningBug.
@Test
public void checkJSONResultWarningBug() throws Exception {
KrillIndex ki = new KrillIndex();
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-1");
fd.addString("UID", "1");
fd.addTV("tokens", "abab", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]");
ki.addDoc(fd);
ki.commit();
String json = getString(getClass().getResource("/queries/bugs/optionality_warning.jsonld").getFile());
Krill ks = new Krill(json);
Result kr = ks.apply(ki);
assertEquals((long) 2, kr.getTotalResults());
ObjectMapper mapper = new ObjectMapper();
JsonNode res = mapper.readTree(kr.toJsonString());
// Old:
// assertEquals("Optionality of query is ignored", res.at("/warning").asText());
assertEquals("Optionality of query is ignored", res.at("/warnings/0/1").asText());
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestResult method checkJSONTokenResult.
@Test
public void checkJSONTokenResult() throws Exception {
KrillIndex ki = new KrillIndex();
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-1");
fd.addString("UID", "1");
fd.addTV("base", "abab", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]");
ki.addDoc(fd);
fd = new FieldDocument();
fd.addString("ID", "doc-2");
fd.addString("UID", "2");
fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]");
ki.addDoc(fd);
// Commit!
ki.commit();
QueryBuilder kq = new QueryBuilder("base");
SpanQuery q = (SpanQuery) kq.seq(kq.seg("s:a")).append(kq.seg("s:b")).toQuery();
Result kr = ki.search(q);
assertEquals((long) 3, kr.getTotalResults());
ObjectMapper mapper = new ObjectMapper();
JsonNode res = mapper.readTree(kr.toTokenListJsonString());
assertEquals(3, res.at("/meta/totalResults").asInt());
assertEquals("spanNext(base:s:a, base:s:b)", res.at("/meta/serialQuery").asText());
assertEquals(0, res.at("/meta/startIndex").asInt());
assertEquals(25, res.at("/meta/itemsPerPage").asInt());
assertEquals("doc-1", res.at("/matches/0/textSigle").asText());
assertEquals(0, res.at("/matches/0/tokens/0/0").asInt());
assertEquals(1, res.at("/matches/0/tokens/0/1").asInt());
assertEquals(1, res.at("/matches/0/tokens/1/0").asInt());
assertEquals(2, res.at("/matches/0/tokens/1/1").asInt());
assertEquals("doc-1", res.at("/matches/1/textSigle").asText());
assertEquals(2, res.at("/matches/1/tokens/0/0").asInt());
assertEquals(3, res.at("/matches/1/tokens/0/1").asInt());
assertEquals(3, res.at("/matches/1/tokens/1/0").asInt());
assertEquals(4, res.at("/matches/1/tokens/1/1").asInt());
assertEquals("doc-2", res.at("/matches/2/textSigle").asText());
assertEquals(0, res.at("/matches/2/tokens/0/0").asInt());
assertEquals(1, res.at("/matches/2/tokens/0/1").asInt());
assertEquals(1, res.at("/matches/2/tokens/1/0").asInt());
assertEquals(2, res.at("/matches/2/tokens/1/1").asInt());
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestMetaFields method searchCollectionFields.
@Test
public void searchCollectionFields() throws IOException {
KrillIndex ki = new KrillIndex();
FieldDocument fd = new FieldDocument();
fd.addString("corpusSigle", "ABC");
fd.addString("docSigle", "ABC-123");
fd.addString("textSigle", "ABC-123-0001");
fd.addText("title", "Die Wahlverwandschaften");
fd.addText("author", "Johann Wolfgang von Goethe");
fd.addKeyword("textClass", "reisen wissenschaft");
fd.addInt("pubDate", 20130617);
fd.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
ki.addDoc(fd);
FieldDocument fd2 = new FieldDocument();
fd2.addString("corpusSigle", "ABC");
fd2.addString("docSigle", "ABC-125");
fd2.addString("textSigle", "ABC-125-0001");
fd2.addText("title", "Die Glocke");
fd2.addText("author", "Schiller, Friedrich");
fd2.addKeyword("textClass", "Reisen geschichte");
fd2.addInt("pubDate", 20130203);
fd2.addTV("tokens", "abc", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:c|i:c|_2#2-3]");
ki.addDoc(fd2);
ki.commit();
// textClass = reisen & wissenschaft
String jsonString = getJsonString(getClass().getResource("/queries/collections/collection_textClass.jsonld").getFile());
Krill ks = new Krill(jsonString);
KrillCollection kc = ks.getCollection();
kc.setIndex(ki);
assertEquals(1, kc.numberOf("documents"));
// textClass = reisen
jsonString = getJsonString(getClass().getResource("/queries/collections/collection_textClass_2.jsonld").getFile());
ks = new Krill(jsonString);
kc = ks.getCollection();
kc.setIndex(ki);
assertEquals(2, kc.numberOf("documents"));
/*
TokenStream ts = fd2.doc.getField("author").tokenStream(
(Analyzer) ki.writer().getAnalyzer(),
(TokenStream) null
);
// OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);
CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {
String term = charTermAttribute.toString();
System.err.println(">>" + term + "<<");
};
*/
// author = wolfgang
jsonString = getJsonString(getClass().getResource("/queries/collections/collection_goethe.jsonld").getFile());
ks = new Krill(jsonString);
kc = ks.getCollection();
kc.setIndex(ki);
assertEquals(1, kc.numberOf("documents"));
// author = Wolfgang
jsonString = getJsonString(getClass().getResource("/queries/collections/collection_goethe_2.jsonld").getFile());
ks = new Krill(jsonString);
kc = ks.getCollection();
kc.setIndex(ki);
assertEquals(1, kc.numberOf("documents"));
Result kr = ks.apply(ki);
ObjectMapper mapper = new ObjectMapper();
JsonNode res = mapper.readTree(kr.toJsonString());
assertEquals(1, res.at("/meta/totalResults").asInt());
}
Aggregations