Search in sources :

Example 36 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class Resource method getCorpus.

// Return corpus info
@GET
@Path("/corpus")
@Produces(MediaType.APPLICATION_JSON)
public String getCorpus(@Context UriInfo uri) {
    ObjectMapper mapper = new ObjectMapper();
    // TODO: Accept fields!!!!
    final Response kresp = _initResponse();
    if (kresp.hasErrors())
        return kresp.toJsonString();
    // TODO: Statistics should be node fields - not annotations!
    // TODO: This is just temporary
    KrillIndex ki = Node.getIndex();
    ObjectNode obj = mapper.createObjectNode();
    obj.put("tokens", ki.numberOf("tokens"));
    obj.put("base/texts", ki.numberOf("base/texts"));
    obj.put("base/sentences", ki.numberOf("base/sentences"));
    obj.put("base/paragraphs", ki.numberOf("base/paragraphs"));
    // <legacy>
    obj.put("sentences", ki.numberOf("sentences"));
    obj.put("paragraphs", ki.numberOf("paragraphs"));
    // </legacy>
    kresp.addJsonNode("stats", obj);
    return kresp.toJsonString();
}
Also used : Response(de.ids_mannheim.korap.response.Response) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) KrillIndex(de.ids_mannheim.korap.KrillIndex) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Path(jakarta.ws.rs.Path) Produces(jakarta.ws.rs.Produces) GET(jakarta.ws.rs.GET)

Example 37 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestRegexWildcardIndex method indexRegexCombined.

@Test
public void indexRegexCombined() throws Exception {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "affe afffe baum baumgarten steingarten franz hans haus efeu effe", "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10]" + "[(5-10)s:afffe|_1$<i>5<i>10]" + "[(11-15)s:baum|_2$<i>11<i>15]" + "[(16-26)s:baumgarten|_3$<i>16<i>26]" + "[(27-38)s:steingarten|_4$<i>27<i>38]" + "[(39-44)s:franz|_5$<i>39<i>44]" + "[(45-49)s:hans|_6$<i>45<i>49]" + "[(50-54)s:haus|_7$<i>50<i>54]" + "[(55-59)s:efeu|_8$<i>55<i>59]" + "[(60-64)s:effe|_9$<i>60<i>64]");
    ki.addDoc(fd);
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQuery sq = kq.seq(kq.seg("s:affe")).append(kq.re("s:af*e")).toQuery();
    assertEquals("spanNext(base:s:affe, SpanMultiTermQueryWrapper(base:/s:af*e/))", sq.toString());
    Krill ks = new Krill(sq);
    ks.getMeta().getContext().left.setToken(true).setLength(1);
    ks.getMeta().getContext().right.setToken(true).setLength(1);
    Result kr = ki.search(ks);
    assertEquals((long) 1, kr.getTotalResults());
    assertEquals("[[affe afffe]] baum ...", kr.getMatch(0).getSnippetBrackets());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 38 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestRepetitionIndex method testCase3.

/**
 * OR
 */
@Test
public void testCase3() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.commit();
    SpanQuery sq, sq2;
    // ec{1,2}
    sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:e")), new SpanOrQuery(new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 1, 1, true), new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:b")), 1, 1, true)));
    kr = ki.search(sq, (short) 10);
    assertEquals((long) 3, kr.getTotalResults());
    assertEquals(1, kr.getMatch(0).startPos);
    assertEquals(3, kr.getMatch(0).endPos);
    assertEquals(4, kr.getMatch(1).startPos);
    assertEquals(6, kr.getMatch(1).endPos);
    assertEquals(7, kr.getMatch(2).startPos);
    assertEquals(9, kr.getMatch(2).endPos);
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Example 39 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestRepetitionIndex method testCase4.

@Test
public void testCase4() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc1());
    ki.commit();
    SpanQuery sq;
    // c{2,2}
    sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 1, 3, true);
    kr = ki.search(sq, (short) 10);
    // 2-3, 2-4, 2-5, 3-4, 3-5, 3-6, 4-5, 4-6, 5-6, 7-8
    assertEquals((long) 10, kr.getTotalResults());
    sq = new SpanRepetitionQuery(new SpanTermQuery(new Term("base", "s:c")), 2, 3, true);
    kr = ki.search(sq, (short) 10);
    // 2-4, 2-5, 3-5, 3-6, 4-6
    assertEquals((long) 5, kr.getTotalResults());
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Example 40 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestRepetitionIndex method testCase5.

@Test
public void testCase5() throws IOException {
    ki = new KrillIndex();
    ki.addDoc(getClass().getResourceAsStream("/wiki/00001.json.gz"), true);
    ki.commit();
    SpanQuery sq0, sq1, sq2;
    sq0 = new SpanTermQuery(new Term("tokens", "tt/p:NN"));
    sq1 = new SpanRepetitionQuery(new SpanTermQuery(new Term("tokens", "tt/p:ADJA")), 2, 3, true);
    sq2 = new SpanNextQuery(sq1, sq0);
    kr = ki.search(sq2, (short) 10);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals(73, kr.getMatch(0).getStartPos());
    assertEquals(77, kr.getMatch(0).getEndPos());
    assertEquals(74, kr.getMatch(1).getStartPos());
    assertEquals(77, kr.getMatch(1).getEndPos());
    sq2 = new SpanNextQuery(new SpanTermQuery(new Term("tokens", "s:offenen")), sq2);
    kr = ki.search(sq2, (short) 10);
    assertEquals((long) 1, kr.getTotalResults());
    assertEquals(73, kr.getMatch(0).getStartPos());
    assertEquals(77, kr.getMatch(0).getEndPos());
/*
        for (Match km : kr.getMatches()){
        	System.out.println(km.getSnippetBrackets());
        	System.out.println(km.getStartPos() +","+km.getEndPos());
        }*/
}
Also used : SpanRepetitionQuery(de.ids_mannheim.korap.query.SpanRepetitionQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Test(org.junit.Test)

Aggregations

KrillIndex (de.ids_mannheim.korap.KrillIndex)321 Test (org.junit.Test)310 Result (de.ids_mannheim.korap.response.Result)143 SpanQuery (org.apache.lucene.search.spans.SpanQuery)132 Term (org.apache.lucene.index.Term)93 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)84 Krill (de.ids_mannheim.korap.Krill)82 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)56 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)42 KrillCollection (de.ids_mannheim.korap.KrillCollection)39 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)38 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)37 Match (de.ids_mannheim.korap.response.Match)37 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)33 JsonNode (com.fasterxml.jackson.databind.JsonNode)28 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)27 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)26 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)25 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)20 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)18