Search in sources :

Example 1 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestBenchmarkSpans method checkBenchmarkIndexDocuments.

@Test
public void checkBenchmarkIndexDocuments() throws IOException {
    long t1 = 0, t2 = 0;
    int rounds = 10;
    ArrayList<String> docs = new ArrayList<String>(700);
    for (int a = 0; a < 50; a++) {
        for (String d : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
            docs.add(d);
        }
        ;
    }
    ;
    t1 = System.nanoTime();
    double length = 0;
    for (int i = 1; i <= rounds; i++) {
        // Construct index
        KrillIndex ki = new KrillIndex();
        // Indexing test files
        for (String d : docs) {
            FieldDocument fd = ki.addDoc(getClass().getResourceAsStream("/wiki/" + d + ".json.gz"), true);
        }
        ;
        ki.commit();
    }
    ;
    t2 = System.nanoTime();
    double seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds");
// 10 times / 350 docs:
// 36.26158006 seconds
// 32.52575097 seconds
// 31.818091536 seconds
// 32.055321123 seconds
// 32.32125959 seconds
// 31.726277979 seconds
// 31.65826188 seconds
// 31.287057537 seconds
}
Also used : FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Example 2 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestBenchmarkSpans method checkBenchmarkSentences.

@Test
public void checkBenchmarkSentences() throws IOException {
    Properties prop = new Properties();
    InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
    prop.load(fr);
    // Get the real index
    KrillIndex ki = new KrillIndex(new MMapDirectory(Paths.get(prop.getProperty("krill.indexDir"))));
    // Create a container for virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    long t1 = 0, t2 = 0;
    // / cosmas20.json!!!
    String json = getString(getClass().getResource("/queries/benchmark4.jsonld").getFile());
    int rounds = 10;
    Result kr = new Result();
    t1 = System.nanoTime();
    double length = 0;
    for (int i = 1; i <= rounds; i++) {
        kr = new Krill(json).apply(ki);
    }
    ;
    t2 = System.nanoTime();
    // System.err.println(kr.getMatch(0).toJSON());
    assertEquals("TotalResults1", kr.getTotalResults(), 4116282);
    assertEquals("TotalResults2", kr.getTotalResults(), ki.numberOf("sentences"));
    double seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds");
// 100 rounds
// 56.253 secs
}
Also used : Krill(de.ids_mannheim.korap.Krill) KrillIndex(de.ids_mannheim.korap.KrillIndex) MMapDirectory(org.apache.lucene.store.MMapDirectory) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 3 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestBenchmarkSpans method checkBenchmark2JSON.

@Test
public void checkBenchmark2JSON() throws IOException {
    Properties prop = new Properties();
    InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
    prop.load(fr);
    // Get the real index
    KrillIndex ki = new KrillIndex(new MMapDirectory(Paths.get(prop.getProperty("krill.indexDir"))));
    // Create a container for virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    long t1 = 0, t2 = 0;
    // / cosmas20.json!!!
    String json = getString(getClass().getResource("/queries/benchmark2.jsonld").getFile());
    int rounds = 10000;
    Result kr = new Result();
    String result = new String("");
    t1 = System.nanoTime();
    double length = 0;
    for (int i = 1; i <= rounds; i++) {
        kr = new Krill(json).apply(ki);
        length += kr.toJsonString().length();
    }
    ;
    t2 = System.nanoTime();
    // assertEquals("TotalResults", 30751, kr.getTotalResults());
    // System.err.println(kr.toJSON());
    // long seconds = (long) (t2 - t1 / 1000) % 60 ;
    double seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds");
// 10000 times:
// 77.167124985 sec
}
Also used : Krill(de.ids_mannheim.korap.Krill) KrillIndex(de.ids_mannheim.korap.KrillIndex) MMapDirectory(org.apache.lucene.store.MMapDirectory) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 4 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestHighlight method highlightEmptySpan.

@Test
public void highlightEmptySpan() throws IOException, QueryException {
    KrillIndex ki = new KrillIndex();
    // <>:s$<b>65<i>38<b>0
    // <a>x<a>y<a>zhij</a>hij</a>hij</a>hij</a>
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "x  y  z  h  i  j  h  i  j  h  i  j  ", "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]" + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6|<>:a$<b>65<i>6]" + "[(9-12)s:h<b>0]" + "[(12-15)s:i]" + "[(15-18)s:j]" + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]");
    ki.addDoc(fd);
    // Commit!
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQuery q = (SpanQuery) kq.tag("a").toQuery();
    Krill qs = new Krill(q);
    qs.getMeta().getContext().left.setToken(true).setLength((short) 5);
    qs.getMeta().getContext().right.setToken(true).setLength((short) 5);
    Result kr = ki.search(qs);
    assertEquals((long) 4, kr.getTotalResults());
    Match km = kr.getMatch(2);
    assertEquals("<span class=\"context-left\">" + "</span>" + "<span class=\"match\">" + "<mark>x  y  z  </mark>" + "</span><span class=\"context-right\">h  i  j  h  i  j  h  i  j  </span>", km.getSnippetHTML());
    km = kr.getMatch(3);
    assertEquals("<span class=\"context-left\"><span class=\"match\"></span></span>", km.getSnippetHTML());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Match(de.ids_mannheim.korap.response.Match) Test(org.junit.Test)

Example 5 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestHighlight method checkHighlightsManually.

@Test
public void checkHighlightsManually() throws IOException, QueryException {
    KrillIndex ki = new KrillIndex();
    String json = new String("{" + "  \"fields\" : [" + "    { " + "      \"primaryData\" : \"abc\"" + "    }," + "    {" + "      \"name\" : \"tokens\"," + "      \"data\" : [" + "         [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"]," + "         [ \"s:b\", \"i:b\", \"_1#1-2\" ]," + "         [ \"s:c\", \"i:c\", \"_2#2-3\" ]" + "      ]" + "    }" + "  ]" + "}");
    FieldDocument fd = ki.addDoc(json);
    ki.commit();
    QueryBuilder kq = new QueryBuilder("tokens");
    Result kr = ki.search((SpanQuery) kq.seq(kq.seg("s:a")).append(kq.seg("s:b")).append(kq.seg("s:c")).toQuery());
    Match km = kr.getMatch(0);
    km.addHighlight(0, 1, (short) 7);
    assertEquals("<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-7 level-0\">ab</mark>c</mark></span><span class=\"context-right\"></span>", km.getSnippetHTML());
    km.addHighlight(1, 2, (short) 6);
    assertEquals("<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-7 level-0\">a<mark class=\"class-6 level-1\">b</mark></mark><mark class=\"class-6 level-1\">c</mark></mark></span><span class=\"context-right\"></span>", km.getSnippetHTML());
    km.addHighlight(0, 1, (short) 5);
    assertEquals("[[{5:{7:a{6:b}}}{6:c}]]", km.getSnippetBrackets());
    assertEquals("<span class=\"context-left\"></span><span class=\"match\"><mark><mark class=\"class-5 level-0\"><mark class=\"class-7 level-1\">a<mark class=\"class-6 level-2\">b</mark></mark></mark><mark class=\"class-6 level-2\">c</mark></mark></span><span class=\"context-right\"></span>", km.getSnippetHTML());
}
Also used : QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Match(de.ids_mannheim.korap.response.Match) Test(org.junit.Test)

Aggregations

KrillIndex (de.ids_mannheim.korap.KrillIndex)237 Test (org.junit.Test)231 SpanQuery (org.apache.lucene.search.spans.SpanQuery)110 Result (de.ids_mannheim.korap.response.Result)108 Term (org.apache.lucene.index.Term)66 Krill (de.ids_mannheim.korap.Krill)60 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)57 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)37 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)33 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)31 Match (de.ids_mannheim.korap.response.Match)30 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)28 KrillCollection (de.ids_mannheim.korap.KrillCollection)24 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)24 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)22 JsonNode (com.fasterxml.jackson.databind.JsonNode)21 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)20 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)17 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)17 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)13