use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestElementIndex method indexExample4.
@Test
public void indexExample4() throws IOException {
KrillIndex ki = new KrillIndex();
FieldDocument fd = new FieldDocument();
fd.addTV("base", "111111ccc222222fff333333iiijjj", "[(0-3)s:a|_0$<i>0<i>3]" + "[(3-6)s:b|_1$<i>3<i>6]" + "[(6-9)s:c|_2$<i>6<i>9]" + "[(9-12)s:d|_3$<i>9<i>12|<>:a$<b>64<i>9<i>15<i>4<b>0]" + "[(12-15)s:e|_4$<i>12<i>15]" + "[(15-18)s:f|_5$<i>15<i>18]" + "[(18-21)s:g|_6$<i>18<i>21|<>:a$<b>64<i>18<i>24<i>8<b>0]" + "[(21-24)s:h|_7$<i>21<i>24]" + "[(24-27)s:i|_8$<i>24<i>27]" + "[(27-30)s:j|_9$<i>27<i>30]");
ki.addDoc(fd);
// Save documents
ki.commit();
assertEquals(1, ki.numberOf("documents"));
SpanQuery sq = new SpanElementQuery("base", "a");
Result kr = ki.search(sq, 0, (short) 15, false, (short) 3, false, (short) 3);
assertEquals("... ccc[[222222]]fff ...", kr.getMatch(0).getSnippetBrackets());
assertEquals("... fff[[333333]]iii ...", kr.getMatch(1).getSnippetBrackets());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestElementIndex method indexExample6.
@Test
public void indexExample6() throws IOException {
KrillIndex ki = new KrillIndex();
// <a>x<a>y<a>zhij</a>hij</a>hij</a>
FieldDocument fd = new FieldDocument();
fd.addTV("base", "x y z h i j h i j h i j ", // 1
"[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 2
"[(3-6)s:y|_1$<i>3<i>6|<>:a$<b>64<i>3<i>27<i>9<b>0]" + // 3
"[(6-9)s:z|_2$<i>6<i>9|<>:a$<b>64<i>6<i>18<i>6<b>0]" + // 4
"[(9-12)s:h|_3$<i>9<i>12]" + // 5
"[(12-15)s:i|_4$<i>12<i>15]" + // 6
"[(15-18)s:j|_5$<i>15<i>18]" + // 7
"[(18-21)s:h|_6$<i>18<i>21]" + // 8
"[(21-24)s:i|_7$<i>21<i>24]" + // 9
"[(24-27)s:j|_8$<i>24<i>27]" + // 10
"[(27-30)s:h|_9$<i>27<i>30]" + // 11
"[(30-33)s:i|_10$<i>30<i>33]" + // 12
"[(33-36)s:j|_11$<i>33<i>36]");
ki.addDoc(fd);
fd = new FieldDocument();
fd.addTV("base", "x y z h ", // 1
"[(0-3)s:x|_0$<i>0<i>3]" + // 2
"[(3-6)s:y|_1$<i>3<i>6]" + // 3
"[(6-9)s:z|_2$<i>6<i>9]" + // 4
"[(9-12)s:h|_3$<i>9<i>12]");
ki.addDoc(fd);
// Here is a larger offset than expected
fd = new FieldDocument();
fd.addTV("base", "x y z h ", // 1
"[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 2
"[(3-6)s:y|_1$<i>3<i>6]" + // 3
"[(6-9)s:z|_2$<i>6<i>9]" + // 4
"[(9-12)s:h|_3$<i>9<i>12]");
ki.addDoc(fd);
// <a>x<a>y<a>zabc</a>abc</a>abc</a>
fd = new FieldDocument();
fd.addTV("base", "x y z a b c a b c a b c ", // 1
"[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 2
"[(3-6)s:y|_1$<i>3<i>6|<>:a$<b>64<i>3<i>27<i>9<b>0]" + // 3
"[(6-9)s:z|_2$<i>6<i>9|<>:a$<b>64<i>6<i>18<i>6<b>0]" + // 4
"[(9-12)s:a|_3$<i>9<i>12]" + // 5
"[(12-15)s:b|_4$<i>12<i>15]" + // 6
"[(15-18)s:c|_5$<i>15<i>18]" + // 7
"[(18-21)s:a|_6$<i>18<i>21]" + // 8
"[(21-24)s:b|_7$<i>21<i>24]" + // 9
"[(24-27)s:c|_8$<i>24<i>27]" + // 10
"[(27-30)s:a|_9$<i>27<i>30]" + // 11
"[(30-33)s:b|_10$<i>30<i>33]" + // 12
"[(33-36)s:c|_11$<i>33<i>36]");
ki.addDoc(fd);
fd = new FieldDocument();
fd.addTV("base", "x y z h ", // 1
"[(0-3)s:x|_0$<i>0<i>3]" + // 2
"[(3-6)s:y|_1$<i>3<i>6]" + // 3
"[(6-9)s:z|_2$<i>6<i>9]" + // 4
"[(9-12)s:h|_3$<i>9<i>12]");
ki.addDoc(fd);
// Save documents
ki.commit();
SpanQuery sq;
Result kr;
sq = new SpanElementQuery("base", "a");
kr = ki.search(sq, (short) 15);
// System.err.println(kr.toJSON());
assertEquals(5, ki.numberOf("documents"));
assertEquals("totalResults", kr.getTotalResults(), 7);
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestFieldDocument method indexArbitraryMetaDataPartial.
@Test
public void indexArbitraryMetaDataPartial() throws Exception {
String json = createDocString1();
KrillIndex ki = new KrillIndex();
FieldDocument fd = ki.addDoc(json);
ki.commit();
ArrayList hs = new ArrayList<String>();
hs.add("datum");
hs.add("titel");
JsonNode res = ki.getFields("aa/bb/cc", hs).toJsonNode();
assertEquals("type:date", res.at("/document/fields/0/type").asText());
assertEquals("datum", res.at("/document/fields/0/key").asText());
assertEquals("2018-04-03", res.at("/document/fields/0/value").asText());
assertEquals("type:text", res.at("/document/fields/1/type").asText());
assertEquals("titel", res.at("/document/fields/1/key").asText());
assertEquals("Der alte Baum", res.at("/document/fields/1/value").asText());
assertTrue(res.at("/document/fields/2").isMissingNode());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestFieldDocument method indexExample3.
@Test
public void indexExample3() throws IOException {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
FieldDocument fd = ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
}
;
ki.commit();
QueryBuilder kq = new QueryBuilder("tokens");
Krill ks;
Result kr;
// Start creating query
// within(<s>, {1: {2: [mate/p=ADJA & mate/m=number:sg]}[opennlp/p=NN & tt/p=NN]})
ks = new Krill(kq.contains(kq.tag("base/s:s"), kq.nr(1, kq.seq(kq.seg("mate/p:ADJA")).append(kq.seg("opennlp/p:NN")))));
KrillMeta meta = ks.getMeta();
meta.setCount(1);
meta.setCutOff(true);
meta.getContext().left.setCharacter(true).setLength(6);
meta.getContext().right.setToken(true).setLength(6);
assertEquals("... okal. [[Der Buchstabe A hat in {1:deutschen Texten} eine durchschnittliche Häufigkeit von 6,51 %.]] Er ist damit der sechsthäufigste Buchstabe ...", ks.apply(ki).getMatch(0).getSnippetBrackets());
// Do not retrieve snippets
meta.setSnippets(false);
Match km = ks.apply(ki).getMatch(0);
assertEquals("Ruru,Jens.Ol,Aglarech", km.toJsonNode().get("author").asText());
assertTrue(!km.toJsonNode().has("snippet"));
assertEquals("", km.getPrimaryData());
assertFalse(km.toJsonNode().has("startMore"));
assertFalse(km.toJsonNode().has("endMore"));
assertFalse(km.toJsonNode().has("endCutted"));
assertFalse(km.toJsonNode().has("snippet"));
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestFieldDocument method indexNewMetaData.
@Test
public void indexNewMetaData() throws Exception {
String json = new String("{" + " \"data\" : {" + " \"text\" : \"abc\"," + " \"name\" : \"tokens\"," + " \"stream\" : [" + " [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"]," + " [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ]," + " [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]" + " ]" + " }," + " \"fields\" : [" + " {" + " \"@type\" : \"koral:field\"," + " \"type\" : \"type:string\"," + " \"key\" : \"corpusID\"," + " \"value\" : \"WPD\"" + " }," + " {" + " \"@type\" : \"koral:field\"," + " \"type\" : \"type:string\"," + " \"key\" : \"textSigle\"," + " \"value\" : \"x/y/z\"" + " }," + " {" + " \"@type\" : \"koral:field\"," + " \"type\" : \"type:string\"," + " \"key\" : \"ID\"," + " \"value\" : \"WPD-AAA-00001\"" + " }," + " {" + " \"@type\" : \"koral:field\"," + " \"type\" : \"type:string\"," + " \"key\" : \"textClass\"," + " \"value\" : [\"music\",\"entertainment\"]" + " }," + " {" + " \"@type\" : \"koral:field\"," + " \"type\" : \"type:text\"," + " \"key\" : \"author\"," + " \"value\" : \"Peter Frankenfeld\"" + " }," + " {" + " \"@type\" : \"koral:field\"," + " \"type\" : \"type:date\"," + " \"key\" : \"pubDate\"," + " \"value\" : \"2015-05-01\"" + " }," + " {" + " \"@type\" : \"koral:field\"," + " \"type\" : \"type:text\"," + " \"key\" : \"title\"," + " \"value\" : \"Wikipedia\"" + " }," + " {" + " \"@type\" : \"koral:field\"," + " \"type\" : \"type:text\"," + " \"key\" : \"subTitle\"," + " \"value\" : \"Die freie Enzyklopädie\"" + " }," + " {" + " \"@type\" : \"koral:field\"," + " \"type\" : \"type:string\"," + " \"key\" : \"pubPlace\"," + " \"value\" : \"Bochum\"" + " }," + " {" + " \"@type\" : \"koral:field\"," + " \"type\" : \"type:attachement\"," + " \"key\" : \"link\"," + " \"value\" : \"data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel\"" + " }" + " ]" + "}");
KrillIndex ki = new KrillIndex();
FieldDocument fd = ki.addDoc(json);
ki.commit();
assertEquals(fd.getPrimaryData(), "abc");
// assertEquals(fd.doc.getField("corpusID").stringValue(), "WPD");
assertEquals(fd.doc.getField("textSigle").stringValue(), "x/y/z");
assertEquals(fd.doc.getField("ID").stringValue(), "WPD-AAA-00001");
assertEquals(fd.doc.getField("textClass").stringValue(), "music entertainment");
assertEquals(fd.doc.getField("author").stringValue(), "Peter Frankenfeld");
assertEquals(fd.doc.getField("title").stringValue(), "Wikipedia");
assertEquals(fd.doc.getField("subTitle").stringValue(), "Die freie Enzyklopädie");
assertEquals(fd.doc.getField("pubPlace").stringValue(), "Bochum");
assertEquals(fd.doc.getField("pubDate").stringValue(), "20150501");
assertEquals(fd.doc.getField("link").stringValue(), "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel");
JsonNode res = ki.getFields("x/y/z").toJsonNode();
Iterator fieldIter = res.at("/document/fields").elements();
int checkC = 0;
while (fieldIter.hasNext()) {
JsonNode field = (JsonNode) fieldIter.next();
String key = field.at("/key").asText();
switch(key) {
case "corpusID":
assertEquals("type:string", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals("WPD", field.at("/value").asText());
checkC++;
break;
case "textSigle":
assertEquals("type:string", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals("x/y/z", field.at("/value").asText());
checkC++;
break;
case "ID":
assertEquals("type:string", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals("WPD-AAA-00001", field.at("/value").asText());
checkC++;
break;
case "textClass":
assertEquals("type:keywords", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals("music", field.at("/value/0").asText());
assertEquals("entertainment", field.at("/value/1").asText());
checkC++;
break;
case "author":
assertEquals("type:text", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals("Peter Frankenfeld", field.at("/value").asText());
checkC++;
break;
case "title":
assertEquals("type:text", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals("Wikipedia", field.at("/value").asText());
checkC++;
break;
case "subTitle":
assertEquals("type:text", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals("Die freie Enzyklopädie", field.at("/value").asText());
checkC++;
break;
case "pubPlace":
assertEquals("type:string", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals("Bochum", field.at("/value").asText());
checkC++;
break;
case "pubDate":
assertEquals("type:date", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals("2015-05-01", field.at("/value").asText());
checkC++;
break;
case "link":
assertEquals("type:attachement", field.at("/type").asText());
assertEquals("koral:field", field.at("/@type").asText());
assertEquals("data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel", field.at("/value").asText());
checkC++;
break;
default:
fail("Unknown field: " + key);
}
;
}
;
}
Aggregations