use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestKrillCollectionIndex method testIndexWithRegexes.
@Test
public void testIndexWithRegexes() throws IOException {
ki = new KrillIndex();
FieldDocument fd = ki.addDoc(createDoc1());
ki.addDoc(createDoc2());
ki.addDoc(createDoc3());
ki.commit();
CollectionBuilder cb = new CollectionBuilder();
KrillCollection kcn = new KrillCollection(ki);
kcn.fromBuilder(cb.re("author", "Fran.*"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.re("author", "Blin.*"));
assertEquals(0, kcn.docCount());
kcn.fromBuilder(cb.re("author", "Frank|Peter"));
assertEquals(2, kcn.docCount());
// "Frau" requires text request!
kcn.fromBuilder(cb.text("text", "Frau"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.term("text", "frau"));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.re("text", "fra."));
assertEquals(1, kcn.docCount());
kcn.fromBuilder(cb.re("text", "fra.|ma.n"));
assertEquals(3, kcn.docCount());
String sv = fd.doc.getField("text").stringValue();
assertEquals("Der alte Mann ging über die Straße", sv);
kcn.fromBuilder(cb.term("text", sv));
assertEquals(1, kcn.docCount());
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestKrillCollectionIndex method uidCollectionLegacy.
@Test
public void uidCollectionLegacy() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
int uid = 1;
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
FieldDocument fd = ki.addDoc(uid++, getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
assertEquals("Documents", 7, ki.numberOf("documents"));
assertEquals("Paragraphs", 174, ki.numberOf("paragraphs"));
assertEquals("Sentences", 281, ki.numberOf("sentences"));
assertEquals("Tokens", 2661, ki.numberOf("tokens"));
SpanQuery sq = new SpanTermQuery(new Term("tokens", "s:der"));
Result kr = ki.search(sq, (short) 10);
assertEquals(86, kr.getTotalResults());
// Create Virtual collections:
KrillCollection kc = new KrillCollection();
kc.filterUIDs(new String[] { "2", "3", "4" });
kc.setIndex(ki);
assertEquals("Documents", 3, kc.numberOf("documents"));
assertEquals("Paragraphs", 46, kc.numberOf("paragraphs"));
assertEquals("Sentences", 103, kc.numberOf("sentences"));
assertEquals("Tokens", 1229, kc.numberOf("tokens"));
Krill ks = new Krill(sq);
ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
kr = ks.apply(ki);
// kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
assertEquals((long) 39, kr.getTotalResults());
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestMatchIdentifier method createSigleDoc2.
private FieldDocument createSigleDoc2() {
FieldDocument fd = new FieldDocument();
fd.addString("corpusSigle", "c1");
fd.addString("docSigle", "c1/d1");
fd.addString("textSigle", "c1/d1/t1");
fd.addInt("UID", 2);
fd.addTV("tokens", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
return fd;
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestMatchIdentifier method indexDependencyAnnotations.
@Test
public void indexDependencyAnnotations() throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
FieldDocument fd = ki.addDoc(2, getClass().getResourceAsStream("/goe/Corpus-Doc-0002.json"), false);
ki.commit();
Match km;
km = ki.getMatchInfo("match-Corpus/Doc/0002-p0-6", "tokens", "malt", null, true, false);
assertEquals("SnippetHTML (1)", "<span class=\"context-left\">" + "</span>" + "<span class=\"match\">" + "<span xml:id=\"token-Corpus/Doc/0002-p0-6\">" + "<mark>" + "<span xml:id=\"token-Corpus/Doc/0002-p0\">" + "<span xlink:title=\"malt/d:ROOT\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0-6\">Maximen</span>" + "</span>" + " " + "<span xml:id=\"token-Corpus/Doc/0002-p1\">" + "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0\">und</span>" + "</span>" + " " + "<span xlink:title=\"malt/d:CJ\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p1\">Reflexionen</span>" + " " + "<span xml:id=\"token-Corpus/Doc/0002-p3\">" + "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0\">Religion</span>" + "</span>" + " " + "<span xml:id=\"token-Corpus/Doc/0002-p4\">" + "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p3\">und</span>" + "</span>" + " " + "<span xlink:title=\"malt/d:CJ\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p4\">Christentum</span>" + "</mark>" + "</span>" + "</span>" + "<span class=\"context-right\">" + "<span class=\"more\"></span>" + "</span>", km.getSnippetHTML());
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestMatchIdentifier method createSimpleFieldDoc2.
private FieldDocument createSimpleFieldDoc2() {
FieldDocument fd = new FieldDocument();
fd.addString("corpusID", "c1");
fd.addString("ID", "d1");
fd.addTV("tokens", "abcabcabac", "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>3<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|>:x/rel:b$<b>32<i>3<s>0<s>0<s>0|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>32<i>0<s>0<s>0<s>0|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
return fd;
}
Aggregations