use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class Resource method getCorpus.
// Return corpus info
@GET
@Path("/corpus")
@Produces(MediaType.APPLICATION_JSON)
public String getCorpus(@Context UriInfo uri) {
ObjectMapper mapper = new ObjectMapper();
// TODO: Accept fields!!!!
final Response kresp = _initResponse();
if (kresp.hasErrors())
return kresp.toJsonString();
// TODO: Statistics should be node fields - not annotations!
// TODO: This is just temporary
KrillIndex ki = Node.getIndex();
ObjectNode obj = mapper.createObjectNode();
obj.put("tokens", ki.numberOf("tokens"));
obj.put("base/texts", ki.numberOf("base/texts"));
obj.put("base/sentences", ki.numberOf("base/sentences"));
obj.put("base/paragraphs", ki.numberOf("base/paragraphs"));
// <legacy>
obj.put("sentences", ki.numberOf("sentences"));
obj.put("paragraphs", ki.numberOf("paragraphs"));
// </legacy>
kresp.addJsonNode("stats", obj);
return kresp.toJsonString();
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestRegexWildcardIndex method indexRegexCombined.
@Test
public void indexRegexCombined() throws Exception {
KrillIndex ki = new KrillIndex();
// abcabcabac
FieldDocument fd = new FieldDocument();
fd.addTV("base", "affe afffe baum baumgarten steingarten franz hans haus efeu effe", "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10]" + "[(5-10)s:afffe|_1$<i>5<i>10]" + "[(11-15)s:baum|_2$<i>11<i>15]" + "[(16-26)s:baumgarten|_3$<i>16<i>26]" + "[(27-38)s:steingarten|_4$<i>27<i>38]" + "[(39-44)s:franz|_5$<i>39<i>44]" + "[(45-49)s:hans|_6$<i>45<i>49]" + "[(50-54)s:haus|_7$<i>50<i>54]" + "[(55-59)s:efeu|_8$<i>55<i>59]" + "[(60-64)s:effe|_9$<i>60<i>64]");
ki.addDoc(fd);
ki.commit();
QueryBuilder kq = new QueryBuilder("base");
SpanQuery sq = kq.seq(kq.seg("s:affe")).append(kq.re("s:af*e")).toQuery();
assertEquals("spanNext(base:s:affe, SpanMultiTermQueryWrapper(base:/s:af*e/))", sq.toString());
Krill ks = new Krill(sq);
ks.getMeta().getContext().left.setToken(true).setLength(1);
ks.getMeta().getContext().right.setToken(true).setLength(1);
Result kr = ki.search(ks);
assertEquals((long) 1, kr.getTotalResults());
assertEquals("[[affe afffe]] baum ...", kr.getMatch(0).getSnippetBrackets());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestRepetitionIndex method testCase3.
/**
* OR
*/
@Test
public void testCase3() throws IOException {
ki = new KrillIndex();
ki.addDoc(createFieldDoc0());
ki.commit();
SpanQuery sq, sq2;
// ec{1,2}
sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:e")), new SpanOrQuery(new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 1, 1, true), new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:b")), 1, 1, true)));
kr = ki.search(sq, (short) 10);
assertEquals((long) 3, kr.getTotalResults());
assertEquals(1, kr.getMatch(0).startPos);
assertEquals(3, kr.getMatch(0).endPos);
assertEquals(4, kr.getMatch(1).startPos);
assertEquals(6, kr.getMatch(1).endPos);
assertEquals(7, kr.getMatch(2).startPos);
assertEquals(9, kr.getMatch(2).endPos);
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestRepetitionIndex method testCase4.
@Test
public void testCase4() throws IOException {
ki = new KrillIndex();
ki.addDoc(createFieldDoc1());
ki.commit();
SpanQuery sq;
// c{2,2}
sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 1, 3, true);
kr = ki.search(sq, (short) 10);
// 2-3, 2-4, 2-5, 3-4, 3-5, 3-6, 4-5, 4-6, 5-6, 7-8
assertEquals((long) 10, kr.getTotalResults());
sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 2, 3, true);
kr = ki.search(sq, (short) 10);
// 2-4, 2-5, 3-5, 3-6, 4-6
assertEquals((long) 5, kr.getTotalResults());
}
use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.
the class TestRepetitionIndex method testCase5.
@Test
public void testCase5() throws IOException {
ki = new KrillIndex();
ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
ki.commit();
SpanQuery sq0, sq1, sq2;
sq0 = new SpanTermQuery(new Term("tokens", "tt/p:NN"));
sq1 = new SpanRepetitionQuery(new SpanTermQuery(new Term("tokens", "tt/p:ADJA")), 2, 3, true);
sq2 = new SpanNextQuery(sq1, sq0);
kr = ki.search(sq2, (short) 10);
assertEquals((long) 2, kr.getTotalResults());
assertEquals(73, kr.getMatch(0).getStartPos());
assertEquals(77, kr.getMatch(0).getEndPos());
assertEquals(74, kr.getMatch(1).getStartPos());
assertEquals(77, kr.getMatch(1).getEndPos());
sq2 = new SpanNextQuery(new SpanTermQuery(new Term("tokens", "s:offenen")), sq2);
kr = ki.search(sq2, (short) 10);
assertEquals((long) 1, kr.getTotalResults());
assertEquals(73, kr.getMatch(0).getStartPos());
assertEquals(77, kr.getMatch(0).getEndPos());
/*
for (Match km : kr.getMatches()){
System.out.println(km.getSnippetBrackets());
System.out.println(km.getStartPos() +","+km.getEndPos());
}*/
}
Aggregations