use of de.ids_mannheim.korap.response.SearchContext in project Krill by KorAP.
the class TestMatchIndex method indexExampleFocusWithSkip.
@Ignore
public void indexExampleFocusWithSkip() throws IOException {
KrillIndex ki = new KrillIndex();
// abcabcabac
FieldDocument fd = new FieldDocument();
fd.addTV("base", "abcabcabac", // The payload should be ignored
// |<>:p#0-10<i>9]" +
"[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
ki.addDoc(fd);
fd = new FieldDocument();
fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>10<i>9]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:c|i:c|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
ki.addDoc(fd);
fd = new FieldDocument();
fd.addTV("base", "gbcgbcgbgc", "[(0-1)s:g|i:g|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:g|i:g|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:g|i:g|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:g|i:g|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
ki.addDoc(fd);
fd = new FieldDocument();
// contains(<p>, focus(3: contains({2:<s>}, {3:a})))
fd.addTV("base", "acabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10|<>:p$<b>64<i>0<i>9<i>8]" + "[(1-2)s:b|i:b|_1$<i>1<i>2|<>:s$<b>64<i>1<i>5<i>5]" + "[(2-3)s:a|i:a|_2$<i>2<i>3|<>:s$<b>64<i>2<i>7<i>7]" + "[(3-4)s:b|i:b|_3$<i>3<i>4]" + "[(4-5)s:c|i:c|_4$<i>4<i>5]" + "[(5-6)s:a|i:a|_5$<i>5<i>6]" + "[(6-7)s:b|i:b|_6$<i>6<i>7]" + "[(7-8)s:a|i:a|_7$<i>7<i>8]" + "[(8-9)s:c|i:c|_8$<i>8<i>9]");
ki.addDoc(fd);
ki.commit();
SpanQuery sq;
Result kr;
KrillCollection kc = new KrillCollection(ki);
assertEquals("Documents", 4, kc.numberOf("documents"));
// within(<p>, focus(3:within({2:<s>}, {3:a})))
sq = new SpanWithinQuery(new SpanElementQuery("base", "p"), new SpanFocusQuery(new SpanWithinQuery(new SpanClassQuery(new SpanElementQuery("base", "s"), (byte) 2), new SpanClassQuery(new SpanTermQuery(new Term("base", "s:a")), (byte) 3)), (byte) 3));
// fail("Skipping may go horribly wrong! (Known issue)");
Krill ks = new Krill(sq);
ks.getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
kr = ks.apply(ki);
// kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
assertEquals(kr.getSerialQuery(), "spanContain(<base:p />, focus(3: spanContain({2: <base:s />}, {3: base:s:a})))");
assertEquals(12, kr.getTotalResults());
assertEquals("[a{2:bc{3:a}b}cabac]", kr.getMatch(0).getSnippetBrackets());
assertEquals("[ab{2:c{3:a}bcab}ac]", kr.getMatch(1).getSnippetBrackets());
assertEquals("[ab{2:cabc{3:a}}bac]", kr.getMatch(2).getSnippetBrackets());
}
use of de.ids_mannheim.korap.response.SearchContext in project Krill by KorAP.
the class TestKrillCollectionIndex method filterExampleFromLegacy.
@Test
public void filterExampleFromLegacy() throws Exception {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
// Create Virtual collections:
KrillCollection kc = new KrillCollection(ki);
assertEquals("Documents", 7, kc.numberOf("documents"));
// The virtual collection consists of all documents that have
// the textClass "reisen" and "freizeit"
/* kc.filter(kf.and("textClass", "reisen").and("textClass",
"freizeit-unterhaltung"));
*/
kc.fromBuilder(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
assertEquals("Documents", 5, kc.numberOf("documents"));
assertEquals("Tokens", 1678, kc.numberOf("tokens"));
assertEquals("Sentences", 194, kc.numberOf("sentences"));
assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
// Subset this to all documents that have also the text
// kc.filter(kf.and("textClass", "kultur"));
/*
kc.fromBuilder(
kc.build().andGroup().with(
kc.getBuilder()
).with(
kc.build().term("textClass", "kultur")
)
);
*/
kc.filter(kc.build().term("textClass", "kultur"));
assertEquals("Documents", 1, kc.numberOf("documents"));
assertEquals("Tokens", 405, kc.numberOf("tokens"));
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
// kc.filter(kf.and("corpusID", "WPD"));
kc.filter(kc.build().term("corpusID", "WPD"));
assertEquals("Documents", 1, kc.numberOf("documents"));
assertEquals("Tokens", 405, kc.numberOf("tokens"));
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
// Create a query
Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
Result kr = ks.apply(ki);
/*
Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
(short) 5);
*/
assertEquals(kr.getTotalResults(), 70);
kc.extend(kc.build().term("textClass", "uninteresting"));
assertEquals("Documents", 1, kc.numberOf("documents"));
kc.extend(kc.build().term("textClass", "wissenschaft"));
assertEquals("Documents", 3, kc.numberOf("documents"));
assertEquals("Tokens", 1669, kc.numberOf("tokens"));
assertEquals("Sentences", 188, kc.numberOf("sentences"));
assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
}
use of de.ids_mannheim.korap.response.SearchContext in project Krill by KorAP.
the class TestKrillCollectionIndex method uidCollectionLegacy.
@Test
public void uidCollectionLegacy() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
int uid = 1;
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
FieldDocument fd = ki.addDoc(uid++, getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
assertEquals("Documents", 7, ki.numberOf("documents"));
assertEquals("Paragraphs", 174, ki.numberOf("paragraphs"));
assertEquals("Sentences", 281, ki.numberOf("sentences"));
assertEquals("Tokens", 2661, ki.numberOf("tokens"));
SpanQuery sq = new SpanTermQuery(new Term("tokens", "s:der"));
Result kr = ki.search(sq, (short) 10);
assertEquals(86, kr.getTotalResults());
// Create Virtual collections:
KrillCollection kc = new KrillCollection();
kc.filterUIDs(new String[] { "2", "3", "4" });
kc.setIndex(ki);
assertEquals("Documents", 3, kc.numberOf("documents"));
assertEquals("Paragraphs", 46, kc.numberOf("paragraphs"));
assertEquals("Sentences", 103, kc.numberOf("sentences"));
assertEquals("Tokens", 1229, kc.numberOf("tokens"));
Krill ks = new Krill(sq);
ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
kr = ks.apply(ki);
// kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
assertEquals((long) 39, kr.getTotalResults());
}
use of de.ids_mannheim.korap.response.SearchContext in project Krill by KorAP.
the class TestKrillCollectionIndex method filterExampleAtomicLegacy.
@Test
public void filterExampleAtomicLegacy() throws Exception {
// That's exactly the same test class, but with multiple atomic indices
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
ki.commit();
}
;
CollectionBuilder kf = new CollectionBuilder();
// Create Virtual collections:
KrillCollection kc = new KrillCollection(ki);
assertEquals("Documents", 7, kc.numberOf("documents"));
// If this is set - everything is fine automatically ...
kc.filter(kc.build().term("corpusID", "WPD"));
assertEquals("Documents", 7, kc.numberOf("documents"));
// The virtual collection consists of all documents that have the textClass "reisen" and "freizeit"
/*
kc.filter(kf.and("textClass", "reisen").and("textClass",
"freizeit-unterhaltung"));
*/
kc.filter(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
assertEquals("Documents", 5, kc.numberOf("documents"));
assertEquals("Tokens", 1678, kc.numberOf("tokens"));
assertEquals("Sentences", 194, kc.numberOf("sentences"));
assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
// Subset this to all documents that have also the text
// kc.filter(kf.and("textClass", "kultur"));
kc.filter(kc.build().term("textClass", "kultur"));
assertEquals("Documents", 1, kc.numberOf("documents"));
assertEquals("Tokens", 405, kc.numberOf("tokens"));
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
// This is already filtered though ...
// kc.filter(kf.and("corpusID", "WPD"));
kc.filter(kc.build().term("corpusID", "WPD"));
assertEquals("Documents", 1, kc.numberOf("documents"));
assertEquals("Tokens", 405, kc.numberOf("tokens"));
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
// Create a query
Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
Result kr = ks.apply(ki);
/*
Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
(short) 5);
*/
assertEquals(kr.getTotalResults(), 70);
// kc.extend(kf.and("textClass", "uninteresting"));
kc.extend(kc.build().term("textClass", "uninteresting"));
assertEquals("Documents", 1, kc.numberOf("documents"));
kc.extend(kc.build().term("textClass", "wissenschaft"));
assertEquals("Documents", 3, kc.numberOf("documents"));
assertEquals("Tokens", 1669, kc.numberOf("tokens"));
assertEquals("Sentences", 188, kc.numberOf("sentences"));
assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
// System.err.println(kc.toString());
// Test collectionbuilder simplifier!
/*
OrGroup(
AndGroup(
corpusID:WPD
textClass:reisen
textClass:freizeit-unterhaltung
textClass:kultur
corpusID:WPD
)
textClass:uninteresting
textClass:wissenschaft
)
*/
assertTrue(ki.delDocs("textClass", "wissenschaft"));
ki.commit();
assertEquals("Documents", 1, kc.numberOf("documents"));
assertEquals("Tokens", 405, kc.numberOf("tokens"));
assertEquals("Sentences", 75, kc.numberOf("sentences"));
assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
}
use of de.ids_mannheim.korap.response.SearchContext in project Krill by KorAP.
the class TestKrillCollectionIndex method filterExample2Legacy.
@Test
public void filterExample2Legacy() throws Exception {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
ki.addDoc(getClass().getResourceAsStream("/wiki/00012-fakemeta.json.gz"), true);
ki.commit();
/*
CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
// Create Virtual collections:
KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
kc.filter(kf.and("textClass", "reisen").and("textClass",
"freizeit-unterhaltung"));
*/
KrillCollection kc = new KrillCollection(ki);
CollectionBuilder cb = kc.build();
kc.filter(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "freizeit-unterhaltung")));
assertEquals("Documents", 5, kc.numberOf("documents"));
assertEquals("Tokens", 1678, kc.numberOf("tokens"));
assertEquals("Sentences", 194, kc.numberOf("sentences"));
assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
// Create a query
Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
Result kr = ks.apply(ki);
assertEquals(kr.getTotalResults(), 369);
// kc.filter(kf.and("corpusID", "QQQ"));
kc.filter(cb.term("corpusID", "QQQ"));
assertEquals("Documents", 0, kc.numberOf("documents"));
assertEquals("Tokens", 0, kc.numberOf("tokens"));
assertEquals("Sentences", 0, kc.numberOf("sentences"));
assertEquals("Paragraphs", 0, kc.numberOf("paragraphs"));
ks.setCollection(kc);
// Create a query
kr = ks.apply(ki);
/*
kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
(short) 5);
*/
assertEquals(kr.getTotalResults(), 0);
}
Aggregations