Search in sources :

Example 71 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestElementIndex method indexExample4.

@Test
public void indexExample4() throws IOException {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "111111ccc222222fff333333iiijjj", "[(0-3)s:a|_0$<i>0<i>3]" + "[(3-6)s:b|_1$<i>3<i>6]" + "[(6-9)s:c|_2$<i>6<i>9]" + "[(9-12)s:d|_3$<i>9<i>12|<>:a$<b>64<i>9<i>15<i>4<b>0]" + "[(12-15)s:e|_4$<i>12<i>15]" + "[(15-18)s:f|_5$<i>15<i>18]" + "[(18-21)s:g|_6$<i>18<i>21|<>:a$<b>64<i>18<i>24<i>8<b>0]" + "[(21-24)s:h|_7$<i>21<i>24]" + "[(24-27)s:i|_8$<i>24<i>27]" + "[(27-30)s:j|_9$<i>27<i>30]");
    ki.addDoc(fd);
    // Save documents
    ki.commit();
    assertEquals(1, ki.numberOf("documents"));
    SpanQuery sq = new SpanElementQuery("base", "a");
    Result kr = ki.search(sq, 0, (short) 15, false, (short) 3, false, (short) 3);
    assertEquals("... ccc[[222222]]fff ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... fff[[333333]]iii ...", kr.getMatch(1).getSnippetBrackets());
}
Also used : KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 72 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestElementIndex method indexExample6.

@Test
public void indexExample6() throws IOException {
    KrillIndex ki = new KrillIndex();
    // <a>x<a>y<a>zhij</a>hij</a>hij</a>
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "x  y  z  h  i  j  h  i  j  h  i  j  ", // 1
    "[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 2
    "[(3-6)s:y|_1$<i>3<i>6|<>:a$<b>64<i>3<i>27<i>9<b>0]" + // 3
    "[(6-9)s:z|_2$<i>6<i>9|<>:a$<b>64<i>6<i>18<i>6<b>0]" + // 4
    "[(9-12)s:h|_3$<i>9<i>12]" + // 5
    "[(12-15)s:i|_4$<i>12<i>15]" + // 6
    "[(15-18)s:j|_5$<i>15<i>18]" + // 7
    "[(18-21)s:h|_6$<i>18<i>21]" + // 8
    "[(21-24)s:i|_7$<i>21<i>24]" + // 9
    "[(24-27)s:j|_8$<i>24<i>27]" + // 10
    "[(27-30)s:h|_9$<i>27<i>30]" + // 11
    "[(30-33)s:i|_10$<i>30<i>33]" + // 12
    "[(33-36)s:j|_11$<i>33<i>36]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  h  ", // 1
    "[(0-3)s:x|_0$<i>0<i>3]" + // 2
    "[(3-6)s:y|_1$<i>3<i>6]" + // 3
    "[(6-9)s:z|_2$<i>6<i>9]" + // 4
    "[(9-12)s:h|_3$<i>9<i>12]");
    ki.addDoc(fd);
    // Here is a larger offset than expected
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  h  ", // 1
    "[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 2
    "[(3-6)s:y|_1$<i>3<i>6]" + // 3
    "[(6-9)s:z|_2$<i>6<i>9]" + // 4
    "[(9-12)s:h|_3$<i>9<i>12]");
    ki.addDoc(fd);
    // <a>x<a>y<a>zabc</a>abc</a>abc</a>
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  a  b  c  a  b  c  a  b  c  ", // 1
    "[(0-3)s:x|_0$<i>0<i>3|<>:a$<b>64<i>0<i>36<i>12<b>0]" + // 2
    "[(3-6)s:y|_1$<i>3<i>6|<>:a$<b>64<i>3<i>27<i>9<b>0]" + // 3
    "[(6-9)s:z|_2$<i>6<i>9|<>:a$<b>64<i>6<i>18<i>6<b>0]" + // 4
    "[(9-12)s:a|_3$<i>9<i>12]" + // 5
    "[(12-15)s:b|_4$<i>12<i>15]" + // 6
    "[(15-18)s:c|_5$<i>15<i>18]" + // 7
    "[(18-21)s:a|_6$<i>18<i>21]" + // 8
    "[(21-24)s:b|_7$<i>21<i>24]" + // 9
    "[(24-27)s:c|_8$<i>24<i>27]" + // 10
    "[(27-30)s:a|_9$<i>27<i>30]" + // 11
    "[(30-33)s:b|_10$<i>30<i>33]" + // 12
    "[(33-36)s:c|_11$<i>33<i>36]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addTV("base", "x  y  z  h  ", // 1
    "[(0-3)s:x|_0$<i>0<i>3]" + // 2
    "[(3-6)s:y|_1$<i>3<i>6]" + // 3
    "[(6-9)s:z|_2$<i>6<i>9]" + // 4
    "[(9-12)s:h|_3$<i>9<i>12]");
    ki.addDoc(fd);
    // Save documents
    ki.commit();
    SpanQuery sq;
    Result kr;
    sq = new SpanElementQuery("base", "a");
    kr = ki.search(sq, (short) 15);
    // System.err.println(kr.toJSON());
    assertEquals(5, ki.numberOf("documents"));
    assertEquals("totalResults", kr.getTotalResults(), 7);
}
Also used : KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanElementQuery(de.ids_mannheim.korap.query.SpanElementQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 73 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestFieldDocument method indexArbitraryMetaDataPartial.

@Test
public void indexArbitraryMetaDataPartial() throws Exception {
    String json = createDocString1();
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = ki.addDoc(json);
    ki.commit();
    ArrayList hs = new ArrayList<String>();
    hs.add("datum");
    hs.add("titel");
    JsonNode res = ki.getFields("aa/bb/cc", hs).toJsonNode();
    assertEquals("type:date", res.at("/document/fields/0/type").asText());
    assertEquals("datum", res.at("/document/fields/0/key").asText());
    assertEquals("2018-04-03", res.at("/document/fields/0/value").asText());
    assertEquals("type:text", res.at("/document/fields/1/type").asText());
    assertEquals("titel", res.at("/document/fields/1/key").asText());
    assertEquals("Der alte Baum", res.at("/document/fields/1/value").asText());
    assertTrue(res.at("/document/fields/2").isMissingNode());
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Example 74 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestFieldDocument method indexExample3.

@Test
public void indexExample3() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        FieldDocument fd = ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    QueryBuilder kq = new QueryBuilder("tokens");
    Krill ks;
    Result kr;
    // Start creating query
    // within(<s>, {1: {2: [mate/p=ADJA & mate/m=number:sg]}[opennlp/p=NN & tt/p=NN]})
    ks = new Krill(kq.contains(kq.tag("base/s:s"), kq.nr(1, kq.seq(kq.seg("mate/p:ADJA")).append(kq.seg("opennlp/p:NN")))));
    KrillMeta meta = ks.getMeta();
    meta.setCount(1);
    meta.setCutOff(true);
    meta.getContext().left.setCharacter(true).setLength(6);
    meta.getContext().right.setToken(true).setLength(6);
    assertEquals("... okal. [[Der Buchstabe A hat in {1:deutschen Texten} eine durchschnittliche Häufigkeit von 6,51 %.]] Er ist damit der sechsthäufigste Buchstabe ...", ks.apply(ki).getMatch(0).getSnippetBrackets());
    // Do not retrieve snippets
    meta.setSnippets(false);
    Match km = ks.apply(ki).getMatch(0);
    assertEquals("Ruru,Jens.Ol,Aglarech", km.toJsonNode().get("author").asText());
    assertTrue(!km.toJsonNode().has("snippet"));
    assertEquals("", km.getPrimaryData());
    assertFalse(km.toJsonNode().has("startMore"));
    assertFalse(km.toJsonNode().has("endMore"));
    assertFalse(km.toJsonNode().has("endCutted"));
    assertFalse(km.toJsonNode().has("snippet"));
}
Also used : Krill(de.ids_mannheim.korap.Krill) KrillMeta(de.ids_mannheim.korap.KrillMeta) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Match(de.ids_mannheim.korap.response.Match) Test(org.junit.Test)

Example 75 with KrillIndex

use of de.ids_mannheim.korap.KrillIndex in project Krill by KorAP.

the class TestFieldDocument method indexNewMetaData.

@Test
public void indexNewMetaData() throws Exception {
    String json = new String("{" + "  \"data\" : {" + "    \"text\" : \"abc\"," + "    \"name\" : \"tokens\"," + "    \"stream\" : [" + "       [ \"s:a\", \"i:a\", \"_0$<i>0<i>1\", \"-:t$<i>3\"]," + "       [ \"s:b\", \"i:b\", \"_1$<i>1<i>2\" ]," + "       [ \"s:c\", \"i:c\", \"_2$<i>2<i>3\" ]" + "    ]" + "  }," + "  \"fields\" : [" + "    {" + "      \"@type\" : \"koral:field\"," + "      \"type\" : \"type:string\"," + "      \"key\" : \"corpusID\"," + "      \"value\" : \"WPD\"" + "    }," + "    {" + "      \"@type\" : \"koral:field\"," + "      \"type\" : \"type:string\"," + "      \"key\" : \"textSigle\"," + "      \"value\" : \"x/y/z\"" + "    }," + "    {" + "      \"@type\" : \"koral:field\"," + "      \"type\" : \"type:string\"," + "      \"key\" : \"ID\"," + "      \"value\" : \"WPD-AAA-00001\"" + "    }," + "    {" + "      \"@type\" : \"koral:field\"," + "      \"type\" : \"type:string\"," + "      \"key\" : \"textClass\"," + "      \"value\" : [\"music\",\"entertainment\"]" + "    }," + "    {" + "      \"@type\" : \"koral:field\"," + "      \"type\" : \"type:text\"," + "      \"key\" : \"author\"," + "      \"value\" : \"Peter Frankenfeld\"" + "    }," + "    {" + "      \"@type\" : \"koral:field\"," + "      \"type\" : \"type:date\"," + "      \"key\" : \"pubDate\"," + "      \"value\" : \"2015-05-01\"" + "    }," + "    {" + "      \"@type\" : \"koral:field\"," + "      \"type\" : \"type:text\"," + "      \"key\" : \"title\"," + "      \"value\" : \"Wikipedia\"" + "    }," + "    {" + "      \"@type\" : \"koral:field\"," + "      \"type\" : \"type:text\"," + "      \"key\" : \"subTitle\"," + "      \"value\" : \"Die freie Enzyklopädie\"" + "    }," + "    {" + "      \"@type\" : \"koral:field\"," + "      \"type\" : \"type:string\"," + "      \"key\" : \"pubPlace\"," + "      \"value\" : \"Bochum\"" + "    }," + "    {" + "      \"@type\" : \"koral:field\"," + "      \"type\" : \"type:attachement\"," + "      \"key\" : \"link\"," + "      \"value\" : \"data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel\"" + "    }" + "  ]" + "}");
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = ki.addDoc(json);
    ki.commit();
    assertEquals(fd.getPrimaryData(), "abc");
    // assertEquals(fd.doc.getField("corpusID").stringValue(), "WPD");
    assertEquals(fd.doc.getField("textSigle").stringValue(), "x/y/z");
    assertEquals(fd.doc.getField("ID").stringValue(), "WPD-AAA-00001");
    assertEquals(fd.doc.getField("textClass").stringValue(), "music entertainment");
    assertEquals(fd.doc.getField("author").stringValue(), "Peter Frankenfeld");
    assertEquals(fd.doc.getField("title").stringValue(), "Wikipedia");
    assertEquals(fd.doc.getField("subTitle").stringValue(), "Die freie Enzyklopädie");
    assertEquals(fd.doc.getField("pubPlace").stringValue(), "Bochum");
    assertEquals(fd.doc.getField("pubDate").stringValue(), "20150501");
    assertEquals(fd.doc.getField("link").stringValue(), "data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel");
    JsonNode res = ki.getFields("x/y/z").toJsonNode();
    Iterator fieldIter = res.at("/document/fields").elements();
    int checkC = 0;
    while (fieldIter.hasNext()) {
        JsonNode field = (JsonNode) fieldIter.next();
        String key = field.at("/key").asText();
        switch(key) {
            case "corpusID":
                assertEquals("type:string", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("WPD", field.at("/value").asText());
                checkC++;
                break;
            case "textSigle":
                assertEquals("type:string", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("x/y/z", field.at("/value").asText());
                checkC++;
                break;
            case "ID":
                assertEquals("type:string", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("WPD-AAA-00001", field.at("/value").asText());
                checkC++;
                break;
            case "textClass":
                assertEquals("type:keywords", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("music", field.at("/value/0").asText());
                assertEquals("entertainment", field.at("/value/1").asText());
                checkC++;
                break;
            case "author":
                assertEquals("type:text", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("Peter Frankenfeld", field.at("/value").asText());
                checkC++;
                break;
            case "title":
                assertEquals("type:text", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("Wikipedia", field.at("/value").asText());
                checkC++;
                break;
            case "subTitle":
                assertEquals("type:text", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("Die freie Enzyklopädie", field.at("/value").asText());
                checkC++;
                break;
            case "pubPlace":
                assertEquals("type:string", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("Bochum", field.at("/value").asText());
                checkC++;
                break;
            case "pubDate":
                assertEquals("type:date", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("2015-05-01", field.at("/value").asText());
                checkC++;
                break;
            case "link":
                assertEquals("type:attachement", field.at("/type").asText());
                assertEquals("koral:field", field.at("/@type").asText());
                assertEquals("data:application/x.korap-link,https://de.wikipedia.org/wiki/Beispiel", field.at("/value").asText());
                checkC++;
                break;
            default:
                fail("Unknown field: " + key);
        }
        ;
    }
    ;
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) KrillIndex(de.ids_mannheim.korap.KrillIndex) Test(org.junit.Test)

Aggregations

KrillIndex (de.ids_mannheim.korap.KrillIndex)321 Test (org.junit.Test)310 Result (de.ids_mannheim.korap.response.Result)143 SpanQuery (org.apache.lucene.search.spans.SpanQuery)132 Term (org.apache.lucene.index.Term)93 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)84 Krill (de.ids_mannheim.korap.Krill)82 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)56 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)42 KrillCollection (de.ids_mannheim.korap.KrillCollection)39 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)38 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)37 Match (de.ids_mannheim.korap.response.Match)37 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)33 JsonNode (com.fasterxml.jackson.databind.JsonNode)28 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)27 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)26 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)25 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)20 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)18