use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.
the class TestCollectionBuilder method builderAndNestedSimple.
@Test
public void builderAndNestedSimple() throws IOException {
CollectionBuilder kc = new CollectionBuilder();
assertEquals("AndGroup(author:tree title:name)", kc.andGroup().with(kc.andGroup().with(kc.term("author", "tree")).with(kc.term("title", "name"))).toString());
}
use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.
the class TestKrillCollectionIndex method testIndexWithRegex.
@Test
public void testIndexWithRegex() throws IOException {
ki = new KrillIndex();
ki.addDoc(createDoc1());
ki.addDoc(createDoc2());
ki.addDoc(createDoc3());
ki.commit();
CollectionBuilder cb = new CollectionBuilder();
KrillCollection kcn = new KrillCollection(ki);
// Frank, Sebastian
kcn.fromBuilder(cb.re("author", ".*an.*"));
assertEquals(2, kcn.docCount());
// Kultur & Reisen,
// Reisen & Finanzen,
// Nachricht & Kultur & Reisen
kcn.fromBuilder(cb.re("textClass", ".*(ult|eis).*"));
assertEquals(3, kcn.docCount());
// Test in group
kcn.fromBuilder(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "kultur")));
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.andGroup().with(cb.re("textClass", ".*eis.*")).with(cb.re("textClass", ".*ult.*")));
assertEquals(2, kcn.docCount());
kcn.fromBuilder(cb.andGroup().with(cb.re("textClass", ".*eis.*")).with(cb.orGroup().with(cb.re("textClass", ".*ult.*")).with(cb.re("textClass", ".*nan.*"))));
assertEquals(3, kcn.docCount());
}
use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.
the class TestKrillCollectionIndex method filterExampleAtomicLegacy.
@Test
public void filterExampleAtomicLegacy() throws Exception {
// That's exactly the same test class, but with multiple atomic indices
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
ki.commit();
}
;
CollectionBuilder kf = new CollectionBuilder();
// Create Virtual collections:
KrillCollection kc = new KrillCollection(ki);
assertEquals("Documents", 7, kc.numberOf("documents"));
// If this is set - everything is fine automatically ...
kc.filter(kc.build().term("corpusID", "WPD"));
assertEquals("Documents", 7, kc.numberOf("documents"));
// The virtual collection consists of all documents that have the textClass "reisen" and "freizeit"
/*
kc.filter(kf.and("textClass", "reisen").and("textClass",
"freizeit-unterhaltung"));
*/
kc.filter(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
assertEquals("Documents", 5, kc.numberOf("documents"));
assertEquals("Tokens", 1678, kc.numberOf("tokens"));
assertEquals("Sentences", 194, kc.numberOf("sentences"));
assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
// Subset this to all documents that have also the text
// kc.filter(kf.and("textClass", "kultur"));
kc.filter(kc.build().term("textClass", "kultur"));
assertEquals("Documents", 1, kc.numberOf("documents"));
assertEquals("Tokens", 405, kc.numberOf("tokens"));
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
// This is already filtered though ...
// kc.filter(kf.and("corpusID", "WPD"));
kc.filter(kc.build().term("corpusID", "WPD"));
assertEquals("Documents", 1, kc.numberOf("documents"));
assertEquals("Tokens", 405, kc.numberOf("tokens"));
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
// Create a query
Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
Result kr = ks.apply(ki);
/*
Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
(short) 5);
*/
assertEquals(kr.getTotalResults(), 70);
// kc.extend(kf.and("textClass", "uninteresting"));
kc.extend(kc.build().term("textClass", "uninteresting"));
assertEquals("Documents", 1, kc.numberOf("documents"));
kc.extend(kc.build().term("textClass", "wissenschaft"));
assertEquals("Documents", 3, kc.numberOf("documents"));
assertEquals("Tokens", 1669, kc.numberOf("tokens"));
assertEquals("Sentences", 188, kc.numberOf("sentences"));
assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
// System.err.println(kc.toString());
// Test collectionbuilder simplifier!
/*
OrGroup(
AndGroup(
corpusID:WPD
textClass:reisen
textClass:freizeit-unterhaltung
textClass:kultur
corpusID:WPD
)
textClass:uninteresting
textClass:wissenschaft
)
*/
assertTrue(ki.delDocs("textClass", "wissenschaft"));
ki.commit();
assertEquals("Documents", 1, kc.numberOf("documents"));
assertEquals("Tokens", 405, kc.numberOf("tokens"));
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
}
use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.
the class TestKrillCollectionIndex method testIndexWithTextStringQueries.
@Test
public void testIndexWithTextStringQueries() throws IOException {
ki = new KrillIndex();
ki.addDoc(createDoc1());
ki.commit();
CollectionBuilder cb = new CollectionBuilder();
KrillCollection kcn = new KrillCollection(ki);
kcn.fromBuilder(cb.term("text", "mann"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("text", "Der alte Mann ging über die Straße"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.text("text", "Der alte Mann"));
assertEquals(kcn.toString(), "QueryWrapperFilter(text:\"der alte mann\")");
assertEquals(1, kcn.docCount());
}
use of de.ids_mannheim.korap.collection.CollectionBuilder in project Krill by KorAP.
the class TestKrillCollectionIndex method filterExample2Legacy.
@Test
public void filterExample2Legacy() throws Exception {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
ki.addDoc(getClass().getResourceAsStream("/wiki/00012-fakemeta.json.gz"), true);
ki.commit();
/*
CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
// Create Virtual collections:
KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
kc.filter(kf.and("textClass", "reisen").and("textClass",
"freizeit-unterhaltung"));
*/
KrillCollection kc = new KrillCollection(ki);
CollectionBuilder cb = kc.build();
kc.filter(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "freizeit-unterhaltung")));
assertEquals("Documents", 5, kc.numberOf("documents"));
assertEquals("Tokens", 1678, kc.numberOf("tokens"));
assertEquals("Sentences", 194, kc.numberOf("sentences"));
assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
// Create a query
Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 369);
// kc.filter(kf.and("corpusID", "QQQ"));
kc.filter(cb.term("corpusID", "QQQ"));
assertEquals("Documents", 0, kc.numberOf("documents"));
assertEquals("Tokens", 0, kc.numberOf("tokens"));
assertEquals("Sentences", 0, kc.numberOf("sentences"));
assertEquals("Paragraphs", 0, kc.numberOf("paragraphs"));
ks.setCollection(kc);
// Create a query
kr = ks.apply(ki);
/*
kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
(short) 5);
*/
assertEquals(kr.getTotalResults(), 0);
}
Aggregations