use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestKrillCollectionIndex method testIndexWithDateRanges.
@Test
public void testIndexWithDateRanges() throws IOException {
ki = new KrillIndex();
ki.addDoc(createDoc1());
ki.addDoc(createDoc2());
ki.addDoc(createDoc3());
ki.commit();
CollectionBuilder cb = new CollectionBuilder();
KrillCollection kcn = new KrillCollection(ki);
kcn.fromBuilder(cb.date("pubDate", "2005"));
assertEquals(3, kcn.docCount());
kcn.fromBuilder(cb.date("pubDate", "2005-12"));
assertEquals(3, kcn.docCount());
kcn.fromBuilder(cb.date("pubDate", "2005-12-10"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.date("pubDate", "2005-12-16"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.date("pubDate", "2005-12-07"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.since("pubDate", "2005-12-07"));
assertEquals(3, kcn.docCount());
kcn.fromBuilder(cb.since("pubDate", "2005-12-10"));
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.since("pubDate", "2005-12-16"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.till("pubDate", "2005-12-16"));
assertEquals(3, kcn.docCount());
kcn.fromBuilder(cb.till("pubDate", "2005-12-10"));
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.till("pubDate", "2005-12-07"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.date("pubDate", "2005-12-10").not());
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.date("pubDate", "2005-12-16").not());
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.date("pubDate", "2005-12-07").not());
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.date("pubDate", "2005-12-09").not());
assertEquals(3, kcn.docCount());
kcn.fromBuilder(cb.till("pubDate", "2005-12-16").not());
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.till("pubDate", "2005-12-15").not());
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.till("pubDate", "2005-12-10").not());
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.till("pubDate", "2005-12-09").not());
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.till("pubDate", "2005-12-07").not());
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.till("pubDate", "2005-12-06").not());
assertEquals(3, kcn.docCount());
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestKrillCollectionIndex method testKrillCollectionWithWrongJson.
@Test
public void testKrillCollectionWithWrongJson() throws IOException {
ki = new KrillIndex();
ki.addDoc(createDoc1());
ki.addDoc(createDoc2());
ki.addDoc(createDoc3());
ki.commit();
KrillCollection kc = new KrillCollection("{lalala}");
assertEquals("Unable to parse JSON", kc.getError(0).getMessage());
kc.setIndex(ki);
long docs = 0, tokens = 0, sentences = 0, paragraphs = 0;
try {
docs = kc.numberOf("documents");
tokens = kc.numberOf("tokens");
sentences = kc.numberOf("sentences");
paragraphs = kc.numberOf("paragraphs");
} catch (IOException e) {
e.printStackTrace();
}
assertEquals(0, docs);
assertEquals(0, tokens);
assertEquals(0, sentences);
assertEquals(0, paragraphs);
assertEquals(1, kc.getErrors().size());
assertEquals(StatusCodes.UNABLE_TO_PARSE_JSON, kc.getErrors().get(0).getCode());
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestKrillCollectionIndex method filterExampleFromLegacy.
@Test
public void filterExampleFromLegacy() throws Exception {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
// Create Virtual collections:
KrillCollection kc = new KrillCollection(ki);
assertEquals("Documents", 7, kc.numberOf("documents"));
// The virtual collection consists of all documents that have
// the textClass "reisen" and "freizeit"
/* kc.filter(kf.and("textClass", "reisen").and("textClass",
"freizeit-unterhaltung"));
*/
kc.fromBuilder(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
assertEquals("Documents", 5, kc.numberOf("documents"));
assertEquals("Tokens", 1678, kc.numberOf("tokens"));
assertEquals("Sentences", 194, kc.numberOf("sentences"));
assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
// Subset this to all documents that have also the text
// kc.filter(kf.and("textClass", "kultur"));
/*
kc.fromBuilder(
kc.build().andGroup().with(
kc.getBuilder()
).with(
kc.build().term("textClass", "kultur")
)
);
*/
kc.filter(kc.build().term("textClass", "kultur"));
assertEquals("Documents", 1, kc.numberOf("documents"));
assertEquals("Tokens", 405, kc.numberOf("tokens"));
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
// kc.filter(kf.and("corpusID", "WPD"));
kc.filter(kc.build().term("corpusID", "WPD"));
assertEquals("Documents", 1, kc.numberOf("documents"));
assertEquals("Tokens", 405, kc.numberOf("tokens"));
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
// Create a query
Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
Result kr = ks.apply(ki);
/*
Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
(short) 5);
*/
assertEquals(kr.getTotalResults(), 70);
kc.extend(kc.build().term("textClass", "uninteresting"));
assertEquals("Documents", 1, kc.numberOf("documents"));
kc.extend(kc.build().term("textClass", "wissenschaft"));
assertEquals("Documents", 3, kc.numberOf("documents"));
assertEquals("Tokens", 1669, kc.numberOf("tokens"));
assertEquals("Sentences", 188, kc.numberOf("sentences"));
assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestKrillCollectionIndex method testIndexWithCollectionBuilder.
@Test
public void testIndexWithCollectionBuilder() throws IOException {
ki = new KrillIndex();
ki.addDoc(createDoc1());
ki.addDoc(createDoc2());
ki.addDoc(createDoc3());
ki.commit();
CollectionBuilder cb = new CollectionBuilder();
KrillCollection kcn = new KrillCollection(ki);
// Simple string tests
kcn.fromBuilder(cb.term("author", "Frank"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Peter"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Sebastian"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Michael"));
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.term("nothing", "nothing"));
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.term("textClass", "reisen"));
assertEquals(3, kcn.docCount());
kcn.fromBuilder(cb.term("textClass", "kultur"));
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.term("textClass", "finanzen"));
assertEquals(1, kcn.docCount());
// Simple orGroup tests
kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Michael")));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Sebastian")));
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Sebastian")).with(cb.term("author", "Peter")));
assertEquals(3, kcn.docCount());
kcn.fromBuilder(cb.orGroup().with(cb.term("author", "Huhu")).with(cb.term("author", "Haha")).with(cb.term("author", "Hehe")));
assertEquals(0, kcn.docCount());
// Multi field orGroup tests
kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Peter")));
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Frank")));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.orGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Michael")));
assertEquals(1, kcn.docCount());
// Simple andGroup tests
kcn.fromBuilder(cb.andGroup().with(cb.term("author", "Frank")).with(cb.term("author", "Michael")));
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.andGroup().with(cb.term("ID", "doc-1")).with(cb.term("author", "Frank")));
assertEquals(1, kcn.docCount());
// andGroup in keyword field test
kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "finanzen")));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "kultur")));
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "finanzen")).with(cb.term("textClass", "kultur")));
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.term("text", "mann"));
assertEquals(3, kcn.docCount());
kcn.fromBuilder(cb.term("text", "frau"));
assertEquals(1, kcn.docCount());
}
use of de.ids_mannheim.korap.KrillCollection in project Krill by KorAP.
the class TestKrillCollectionIndex method testIndexWithNegation.
@Test
public void testIndexWithNegation() throws IOException {
ki = new KrillIndex();
ki.addDoc(createDoc1());
ki.addDoc(createDoc2());
ki.addDoc(createDoc3());
ki.commit();
CollectionBuilder cb = new CollectionBuilder();
KrillCollection kcn = new KrillCollection(ki);
// Simple negation tests
kcn.fromBuilder(cb.term("author", "Frank").not());
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.term("textClass", "reisen").not());
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.term("textClass", "kultur").not());
assertEquals(1, kcn.docCount());
// orGroup with simple Negation
kcn.fromBuilder(cb.orGroup().with(cb.term("textClass", "kultur").not()).with(cb.term("author", "Peter")));
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.orGroup().with(cb.term("textClass", "kultur").not()).with(cb.term("author", "Sebastian")));
assertEquals(1, kcn.docCount());
}
Aggregations