use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.
the class TestKrillCollectionIndex method testIndexWithMultipleCommitsAndDeletes.
@Test
public void testIndexWithMultipleCommitsAndDeletes() throws IOException {
ki = new KrillIndex();
ki.addDoc(createDoc1());
ki.addDoc(createDoc2());
ki.commit();
CollectionBuilder cb = new CollectionBuilder();
KrillCollection kcn = new KrillCollection(ki);
kcn.fromBuilder(cb.term("author", "Frank"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Peter"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Sebastian"));
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Michael").not());
assertEquals(2, kcn.docCount());
// Add Sebastians doc
ki.addDoc(createDoc3());
ki.commit();
kcn.fromBuilder(cb.term("author", "Frank"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Peter"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Sebastian"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Michael").not());
assertEquals(3, kcn.docCount());
// Remove one document
ki.delDocs("author", "Peter");
ki.commit();
kcn.fromBuilder(cb.term("author", "Frank"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Peter"));
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Sebastian"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Michael").not());
assertEquals(2, kcn.docCount());
// Readd Peter's doc
ki.addDoc(createDoc2());
ki.commit();
kcn.fromBuilder(cb.term("author", "Frank"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Peter"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Sebastian"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("author", "Michael").not());
assertEquals(3, kcn.docCount());
}
use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.
the class TestKrill method searchJSONmultitermRewriteBug.
@Test
public void searchJSONmultitermRewriteBug() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
assertEquals(ki.numberOf("documents"), 0);
// Indexing test files
FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/bzk/D59-00089.json.gz"), true);
ki.commit();
assertEquals(ki.numberOf("documents"), 1);
assertEquals("BZK", fd.getCorpusSigle());
// [tt/p="A.*"]{0,3}[tt/p="N.*"]
String json = getJsonString(getClass().getResource("/queries/bugs/multiterm_rewrite.jsonld").getFile());
Krill ks = new Krill(json);
KrillCollection kc = ks.getCollection();
// No index was set
assertEquals(-1, kc.numberOf("documents"));
kc.setIndex(ki);
// Index was set but vc restricted to WPD
assertEquals(0, kc.numberOf("documents"));
/*
kc.extend(new CollectionBuilder().or("corpusSigle", "BZK"));
*/
CollectionBuilder cb = new CollectionBuilder();
kc.fromBuilder(cb.orGroup().with(kc.getBuilder()).with(cb.term("corpusSigle", "BZK")));
ks.setCollection(kc);
assertEquals(1, kc.numberOf("documents"));
Result kr = ks.apply(ki);
assertEquals(kr.getSerialQuery(), "spanOr([SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/), " + "spanNext(spanRepetition(SpanMultiTermQueryWrapper" + "(tokens:/tt/p:A.*/){1,3}), " + "SpanMultiTermQueryWrapper(tokens:/tt/p:N.*/))])");
assertEquals(kr.getTotalResults(), 58);
assertEquals(0, kr.getStartIndex());
assertEquals(kr.getMatch(0).getSnippetBrackets(), "[[Saragat-Partei]] zerfällt Rom (ADN) die von dem ...");
assertEquals(kr.getMatch(1).getSnippetBrackets(), "[[Saragat-Partei]] zerfällt Rom (ADN) die von dem ...");
assertEquals(kr.getMatch(2).getSnippetBrackets(), "Saragat-Partei zerfällt [[Rom]] (ADN) " + "die von dem Rechtssozialisten Saragat ...");
assertEquals(kr.getMatch(3).getSnippetBrackets(), "Saragat-Partei zerfällt Rom ([[ADN]]) " + "die von dem Rechtssozialisten Saragat geführte ...");
assertEquals(kr.getMatch(23).getSnippetBrackets(), "... dem Namen \"Einheitsbewegung der sozialistischen " + "Initiative\" [[eine neue politische Gruppierung]] " + "ins Leben gerufen hatten. Pressemeldungen zufolge ...");
}
use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.
the class TestWPDIndex method testCase4.
/**
* Element distance exclusion
*/
@Test
public void testCase4() throws IOException {
SpanDistanceQuery sq = createElementDistanceQuery("s", "s:weg", "s:fahren", 1, 1, false, true);
ks = new Krill(sq);
kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 979);
// 0.8s
// Check if it includes some results
/*
BooleanFilter bf = new BooleanFilter();
bf.or("ID", "WPD_BBB.04463", "WPD_III.00758");
*/
KrillCollection kc = new KrillCollection();
CollectionBuilder cb = new CollectionBuilder();
kc.fromBuilder(cb.orGroup().with(cb.term("ID", "WPD_BBB.04463")).with(cb.term("ID", "WPD_III.00758")));
// kc.filter(bf);
ks.setCollection(kc);
kr = ks.apply(ki);
assertEquals(1094, kr.getMatch(0).getStartPos());
assertEquals(451, kr.getMatch(1).getEndPos());
}
Aggregations