Search in sources :

Example 26 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestWithinIndex method indexExample4.

@Test
public void indexExample4() throws IOException {
    KrillIndex ki = new KrillIndex();
    // Case 1, 6, 7, 13
    // xy<a><a>x</a>b<a>c</a></a>x
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "x  y  x  b  c  x  ", "[(0-3)s:x|_0$<i>0<i>3]" + "[(3-6)s:y|_1$<i>3<i>6]" + "[(6-9)s:x|_2$<i>6<i>9|<>:a$<b>64<i>6<i>9<i>3<b>0|" + "<>:a$<b>64<i>6<i>15<i>5<b>0]" + "[(9-12)s:b|_3$<i>9<i>12]" + "[(12-15)s:c|_4$<i>12<i>15|<>:a$<b>64<i>12<i>15<i>5<b>0]" + "[(15-18)s:x|_5$<i>15<i>18]");
    ki.addDoc(fd);
    // Save documents
    ki.commit();
    assertEquals(1, ki.numberOf("documents"));
    SpanQuery sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), new SpanTermQuery(new Term("base", "s:x")));
    assertEquals("spanContain(<base:a />, base:s:x)", sq.toString());
    Result kr = ki.search(sq, (short) 10);
    assertEquals("totalResults", kr.getTotalResults(), 2);
    assertEquals("x  y  [[x  ]]b  c  x  ", kr.getMatch(0).getSnippetBrackets());
    assertEquals("x  y  [[x  b  c  ]]x  ", kr.getMatch(1).getSnippetBrackets());
    assertEquals("StartPos (0)", 2, kr.getMatch(0).startPos);
    assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
    assertEquals("StartPos (1)", 2, kr.getMatch(1).startPos);
    assertEquals("EndPos (1)", 5, kr.getMatch(1).endPos);
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 27 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestWithinIndex method indexExample7.

@Test
public void indexExample7() throws IOException {
    KrillIndex ki = new KrillIndex();
    // 4,5,11,13
    // x<a>x h</a>i j h<a>i j</a>
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "xx hi j hi j", "[(0-1)s:x|i:x|_0$<i>0<i>1|-:a$<i>2|-:t$<i>8]" + "[(1-2)s:x|i:x|_1$<i>1<i>2|<>:a$<b>64<i>1<i>4<i>3<b>0]" + "[(3-4)s:h|i:h|_2$<i>3<i>4]" + "[(4-5)s:i|i:i|_3$<i>4<i>5]" + "[(6-7)s:j|i:j|_4$<i>6<i>7]" + "[(8-9)s:h|i:h|_5$<i>8<i>9]" + "[(9-10)s:i|i:i|_6$<i>9<i>10|<>:a$<b>64<i>9<i>12<i>8<b>0]" + "[(11-12)s:j|i:j|_7$<i>11<i>12]");
    ki.addDoc(fd);
    // Save documents
    ki.commit();
    assertEquals(1, ki.numberOf("documents"));
    SpanQuery sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), new SpanNextQuery(new SpanTermQuery(new Term("base", "s:h")), new SpanNextQuery(new SpanTermQuery(new Term("base", "s:i")), new SpanTermQuery(new Term("base", "s:j")))));
    Result kr = ki.search(sq, (short) 10);
    assertEquals("totalResults", kr.getTotalResults(), 0);
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNextQuery(de.ids_mannheim.korap.query.SpanNextQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 28 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestWithinIndex method indexExample1a.

// Todo: primary data as a non-indexed field separated.
@Test
public void indexExample1a() throws IOException {
    KrillIndex ki = new KrillIndex();
    // <a>x<a>y<a>zhij</a>hij</a>hij</a>
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "x   y   z   h   i   j   h   i   j   h   i   j   ", // 1
    "[(0-3)s:x|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 2
    "[(3-6)s:y|<>:a$<b>64<i>3<i>27<i>9<b>0]" + // 3
    "[(6-9)s:z|<>:a$<b>64<i>6<i>18<i>6<b>0]" + // 4
    "[(9-12)s:h]" + // 5
    "[(12-15)s:i]" + // 6
    "[(15-18)s:j]" + // 7
    "[(18-21)s:h]" + // 8
    "[(21-24)s:i]" + // 9
    "[(24-27)s:j]" + // 10
    "[(27-30)s:h]" + // 11
    "[(30-33)s:i]" + // 12
    "[(33-36)s:j]");
    ki.addDoc(fd);
    ki.commit();
    SpanQuery sq;
    Result kr;
    sq = new SpanWithinQuery(new SpanElementQuery("base", "a"), new SpanTermQuery(new Term("base", "s:h")));
    kr = ki.search(sq, (short) 10);
    assertEquals("totalResults", kr.getTotalResults(), 6);
    assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
    assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos);
    assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
    assertEquals("EndPos (1)", 12, kr.getMatch(1).endPos);
    assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
    assertEquals("EndPos (2)", 12, kr.getMatch(2).endPos);
    assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos);
    assertEquals("EndPos (3)", 9, kr.getMatch(3).endPos);
    assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos);
    assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos);
    assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos);
    assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos);
    assertEquals(1, ki.numberOf("documents"));
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) Term(org.apache.lucene.index.Term) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 29 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestTemporaryQueryLimitations method classRefCheckNotSupported.

@Test
public void classRefCheckNotSupported() throws IOException, QueryException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    String json = new String("{" + "  \"fields\" : [" + "    { " + "      \"primaryData\" : \"abc\"" + "    }," + "    {" + "      \"name\" : \"tokens\"," + "      \"data\" : [" + "         [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"]," + "         [ \"s:b\", \"i:b\", \"_1#1-2\" ]," + "         [ \"s:c\", \"i:c\", \"_2#2-3\" ]" + "      ]" + "    }" + "  ]" + "}");
    FieldDocument fd = ki.addDoc(json);
    ki.commit();
    json = getJsonString(getClass().getResource("/queries/bugs/cosmas_classrefcheck.jsonld").getFile());
    Krill ks = new Krill(json);
    Result kr = ks.apply(ki);
    assertEquals(kr.getSerialQuery(), "focus(130: {131: spanContain({129: <tokens:s />}, {130: tokens:s:wegen})},sorting)");
    assertEquals(kr.getTotalResults(), 0);
    assertEquals(kr.getStartIndex(), 0);
    assertEquals("This is a warning coming from the serialization", kr.getWarning(1).getMessage());
    assertEquals("Class reference checks are currently not supported" + " - results may not be correct", kr.getWarning(0).getMessage());
    assertEquals(2, kr.getWarnings().size());
}
Also used : Krill(de.ids_mannheim.korap.Krill) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 30 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestMetaFields method searchMetaFields.

@Test
public void searchMetaFields() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    String jsonString = getJsonString(getClass().getResource("/queries/metas/fields.jsonld").getFile());
    Krill ks = new Krill(jsonString);
    Result kr = ks.apply(ki);
    assertEquals((long) 17, kr.getTotalResults());
    assertEquals(0, kr.getStartIndex());
    assertEquals(9, kr.getItemsPerPage());
    ObjectMapper mapper = new ObjectMapper();
    JsonNode res = mapper.readTree(kr.toJsonString());
    // mirror fields
    assertEquals(9, res.at("/meta/count").asInt());
    if (res.at("/meta/fields/0").asText().equals("UID")) {
        assertEquals("corpusID", res.at("/meta/fields/1").asText());
    } else {
        assertEquals("corpusID", res.at("/meta/fields/0").asText());
        assertEquals("UID", res.at("/meta/fields/1").asText());
    }
    ;
    assertEquals(0, res.at("/matches/0/UID").asInt());
    assertEquals("WPD", res.at("/matches/0/corpusID").asText());
    assertTrue(res.at("/matches/0/docID").isMissingNode());
    assertTrue(res.at("/matches/0/textSigle").isMissingNode());
    assertTrue(res.at("/matches/0/ID").isMissingNode());
    assertTrue(res.at("/matches/0/author").isMissingNode());
    assertTrue(res.at("/matches/0/title").isMissingNode());
    assertTrue(res.at("/matches/0/subTitle").isMissingNode());
    assertTrue(res.at("/matches/0/textClass").isMissingNode());
    assertTrue(res.at("/matches/0/pubPlace").isMissingNode());
    assertTrue(res.at("/matches/0/pubDate").isMissingNode());
    assertTrue(res.at("/matches/0/foundries").isMissingNode());
    assertTrue(res.at("/matches/0/layerInfos").isMissingNode());
    assertTrue(res.at("/matches/0/tokenization").isMissingNode());
    jsonString = getJsonString(getClass().getResource("/queries/metas/fields_2.jsonld").getFile());
    ks = new Krill(jsonString);
    kr = ks.apply(ki);
    assertEquals((long) 17, kr.getTotalResults());
    assertEquals(0, kr.getStartIndex());
    assertEquals(2, kr.getItemsPerPage());
    mapper = new ObjectMapper();
    res = mapper.readTree(kr.toJsonString());
    assertEquals(0, res.at("/matches/0/UID").asInt());
    assertTrue(res.at("/matches/0/corpusID").isMissingNode());
    assertEquals("Ruru,Jens.Ol,Aglarech", res.at("/matches/0/author").asText());
    assertEquals("A", res.at("/matches/0/title").asText());
    assertEquals("WPD_AAA.00001", res.at("/matches/0/docID").asText());
    assertTrue(res.at("/matches/0/textSigle").isMissingNode());
    assertEquals("match-WPD_AAA.00001-p6-7", res.at("/matches/0/matchID").asText());
    // assertEquals("p6-7", res.at("/matches/0/matchID").asText());
    assertTrue(res.at("/matches/0/subTitle").isMissingNode());
    assertEquals("", res.at("/matches/0/subTitle").asText());
    assertEquals("", res.at("/matches/0/textClass").asText());
    assertEquals("", res.at("/matches/0/pubPlace").asText());
    assertEquals("", res.at("/matches/0/pubDate").asText());
    assertEquals("", res.at("/matches/0/foundries").asText());
    assertEquals("", res.at("/matches/0/layerInfo").asText());
    assertEquals("", res.at("/matches/0/tokenization").asText());
}
Also used : Krill(de.ids_mannheim.korap.Krill) JsonNode(com.fasterxml.jackson.databind.JsonNode) KrillIndex(de.ids_mannheim.korap.KrillIndex) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

KrillIndex (de.ids_mannheim.korap.KrillIndex)320 Test (org.junit.Test)309 Result (de.ids_mannheim.korap.response.Result)143 SpanQuery (org.apache.lucene.search.spans.SpanQuery)132 Term (org.apache.lucene.index.Term)93 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)84 Krill (de.ids_mannheim.korap.Krill)82 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)56 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)42 KrillCollection (de.ids_mannheim.korap.KrillCollection)39 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)38 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)37 Match (de.ids_mannheim.korap.response.Match)37 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)33 JsonNode (com.fasterxml.jackson.databind.JsonNode)28 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)27 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)26 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)25 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)20 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)18