use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestKrillIndex method indexUnicode.
/*
* This test demonstrates the behaviour
*/
@Test
public void indexUnicode() throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
FieldDocument fd = new FieldDocument();
fd.addString("name", "Peter");
// These values are canonically equivalent
// But indexed as byte sequences
fd.addTV("base", new String("ju" + "\u006E" + "\u0303" + "o") + " " + new String("ju" + "\u00F1" + "o"), "[(0-5)s:ju" + "\u006E" + "\u0303" + "o|_0$<i>0<i>5|-:t$<i>2]" + "[(6-10)s:ju" + "\u00F1" + "o|_1$<i>6<i>10]");
ki.addDoc(fd);
ki.commit();
assertEquals(1, ki.numberOf("base", "documents"));
QueryBuilder kq = new QueryBuilder("base");
Result kr = ki.search(kq.seg("s:ju" + "\u00F1" + "o").toQuery());
assertEquals(1, kr.getTotalResults());
kr = ki.search(kq.seg("s:ju" + "\u006E" + "\u0303" + "o").toQuery());
assertEquals(1, kr.getTotalResults());
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestMatchIdentifier method createSimpleFieldDoc3.
private FieldDocument createSimpleFieldDoc3() {
FieldDocument fd = new FieldDocument();
fd.addString("corpusID", "c1");
fd.addString("ID", "d3");
fd.addTV("tokens", "aa bb cc aa bb cc aa bb aa cc ", "[(0-2)s:aa|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>2|-:t$<i>10]" + "[(3-5)s:bb|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>3<i>5]" + "[(6-8)s:cc|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>6<i>8|<>:base/s:s$<b>64<i>6<i>14<i>5]" + "[(9-11)s:aa|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>9<i>11]" + "[(12-14)s:bb|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>12<i>14]" + "[(15-17)s:cc|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>15<i>17]" + "[(18-20)s:aa|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>18<i>20]" + "[(21-23)s:bb|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>21<i>23]" + "[(24-26)s:aa|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>24<i>26]" + "[(27-29)s:cc|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>27<i>29]");
return fd;
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestMatchIdentifier method createAttributeFieldDoc.
/*
Check for terms|spans|rels ...
*/
private FieldDocument createAttributeFieldDoc() {
FieldDocument fd = new FieldDocument();
fd.addString("corpusID", "ca1");
fd.addString("ID", "da1");
fd.addTV("tokens", "abcabcabac", "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/s:tag$<b>64<i>7<i>10<i>10<b>0<s>1|@:x/s:key:value$<b>17<i>10<s>1|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
return fd;
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestMatchIdentifier method createSimpleFieldDoc5.
private FieldDocument createSimpleFieldDoc5() {
FieldDocument fd = new FieldDocument();
fd.addString("corpusID", "c1");
fd.addString("ID", "d5");
fd.addTV("tokens", "xyz", "[(0-1)s:x|i:x" + "|<>:x/tag:v$<b>65<i>1" + "|<>:x/tag:c$<b>64<i>1<i>0<i>2" + "|<>:x/tag:a$<b>64<i>2<i>0<i>3" + "|<>:x/tag:b$<b>64<i>2<i>0<i>3" + "|_1$<i>0<i>1]" + "[(1-2)s:y|i:y|_2$<i>1<i>2]" + "[(2-3)s:z|i:z|_3$<i>2<i>3]");
return fd;
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestMatchIdentifier method createSimpleFieldDoc4.
private FieldDocument createSimpleFieldDoc4() {
FieldDocument fd = new FieldDocument();
fd.addString("corpusID", "c1");
fd.addString("ID", "d4");
fd.addTV("tokens", "abcabcabac", "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>4<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>40<i>1<s>0<s>0<s>0|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6|<>:base/s:s$<b>64<i>5<i>7<i>7]" + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
return fd;
}
Aggregations