use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestBenchmarkSpans method checkBenchmarkIndexDocuments.
@Test
public void checkBenchmarkIndexDocuments() throws IOException {
long t1 = 0, t2 = 0;
int rounds = 10;
ArrayList<String> docs = new ArrayList<String>(700);
for (int a = 0; a < 50; a++) {
for (String d : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
docs.add(d);
}
;
}
;
t1 = System.nanoTime();
double length = 0;
for (int i = 1; i <= rounds; i++) {
// Construct index
KrillIndex ki = new KrillIndex();
// Indexing test files
for (String d : docs) {
FieldDocument fd = ki.addDoc(getClass().getResourceAsStream("/wiki/" + d + ".json.gz"), true);
}
;
ki.commit();
}
;
t2 = System.nanoTime();
double seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds");
// 10 times / 350 docs:
// 36.26158006 seconds
// 32.52575097 seconds
// 31.818091536 seconds
// 32.055321123 seconds
// 32.32125959 seconds
// 31.726277979 seconds
// 31.65826188 seconds
// 31.287057537 seconds
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestHighlight method highlightEmptySpan.
@Test
public void highlightEmptySpan() throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
// <>:s$<b>65<i>38<b>0
// <a>x<a>y<a>zhij</a>hij</a>hij</a>hij</a>
FieldDocument fd = new FieldDocument();
fd.addTV("base", "x y z h i j h i j h i j ", "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]" + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6|<>:a$<b>65<i>6]" + "[(9-12)s:h<b>0]" + "[(12-15)s:i]" + "[(15-18)s:j]" + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]");
ki.addDoc(fd);
// Commit!
ki.commit();
QueryBuilder kq = new QueryBuilder("base");
SpanQuery q = (SpanQuery) kq.tag("a").toQuery();
Krill qs = new Krill(q);
qs.getMeta().getContext().left.setToken(true).setLength((short) 5);
qs.getMeta().getContext().right.setToken(true).setLength((short) 5);
Result kr = ki.search(qs);
assertEquals((long) 4, kr.getTotalResults());
Match km = kr.getMatch(2);
assertEquals("<span class=\"context-left\">" + "</span>" + "<span class=\"match\">" + "<mark>x y z </mark>" + "</span><span class=\"context-right\">h i j h i j h i j </span>", km.getSnippetHTML());
km = kr.getMatch(3);
assertEquals("<span class=\"context-left\"><span class=\"match\"></span></span>", km.getSnippetHTML());
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestHighlight method checkHighlightsManually.
@Test
public void checkHighlightsManually() throws IOException, QueryException {
KrillIndex ki = new KrillIndex();
String json = new String("{" + " \"fields\" : [" + " { " + " \"primaryData\" : \"abc\"" + " }," + " {" + " \"name\" : \"tokens\"," + " \"data\" : [" + " [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"]," + " [ \"s:b\", \"i:b\", \"_1#1-2\" ]," + " [ \"s:c\", \"i:c\", \"_2#2-3\" ]" + " ]" + " }" + " ]" + "}");
FieldDocument fd = ki.addDoc(json);
ki.commit();
QueryBuilder kq = new QueryBuilder("tokens");
Result kr = ki.search((SpanQuery) kq.seq(kq.seg("s:a")).append(kq.seg("s:b")).append(kq.seg("s:c")).toQuery());
Match km = kr.getMatch(0);
km.addHighlight(0, 1, (short) 7);
assertEquals("<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-7 level-0\">ab</mark>c</mark></span><span class=\"context-right\"></span>", km.getSnippetHTML());
km.addHighlight(1, 2, (short) 6);
assertEquals("<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-7 level-0\">a<mark class=\"class-6 level-1\">b</mark></mark><mark class=\"class-6 level-1\">c</mark></mark></span><span class=\"context-right\"></span>", km.getSnippetHTML());
km.addHighlight(0, 1, (short) 5);
assertEquals("[[{5:{7:a{6:b}}}{6:c}]]", km.getSnippetBrackets());
assertEquals("<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-5 level-0\"><mark class=\"class-7 level-1\">a<mark class=\"class-6 level-2\">b</mark></mark></mark><mark class=\"class-6 level-2\">c</mark></mark></span><span class=\"context-right\"></span>", km.getSnippetHTML());
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestKrillCollectionIndex method createDoc1.
private FieldDocument createDoc1() {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-1");
fd.addString("author", "Frank");
fd.addKeyword("textClass", "Nachricht Kultur Reisen");
fd.addInt("pubDate", 20051210);
fd.addText("text", "Der alte Mann ging über die Straße");
return fd;
}
use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.
the class TestKrillCollectionIndex method createDoc3.
private FieldDocument createDoc3() {
FieldDocument fd = new FieldDocument();
fd.addString("ID", "doc-3");
fd.addString("author", "Sebastian");
fd.addKeyword("textClass", "Reisen Finanzen");
fd.addInt("pubDate", 20051216);
fd.addText("text", "Die Frau und der Mann küssten sich");
return fd;
}
Aggregations