Search in sources :

Example 6 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrillCollectionIndex method testIndexWithRegexes.

@Test
public void testIndexWithRegexes() throws IOException {
    ki = new KrillIndex();
    FieldDocument fd = ki.addDoc(createDoc1());
    ki.addDoc(createDoc2());
    ki.addDoc(createDoc3());
    ki.commit();
    CollectionBuilder cb = new CollectionBuilder();
    KrillCollection kcn = new KrillCollection(ki);
    kcn.fromBuilder(cb.re("author", "Fran.*"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.re("author", "Blin.*"));
    assertEquals(0, kcn.docCount());
    kcn.fromBuilder(cb.re("author", "Frank|Peter"));
    assertEquals(2, kcn.docCount());
    // "Frau" requires text request!
    kcn.fromBuilder(cb.text("text", "Frau"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.term("text", "frau"));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.re("text", "fra."));
    assertEquals(1, kcn.docCount());
    kcn.fromBuilder(cb.re("text", "fra.|ma.n"));
    assertEquals(3, kcn.docCount());
    String sv = fd.doc.getField("text").stringValue();
    assertEquals("Der alte  Mann ging über die Straße", sv);
    kcn.fromBuilder(cb.term("text", sv));
    assertEquals(1, kcn.docCount());
}
Also used : CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Test(org.junit.Test)

Example 7 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrillCollectionIndex method uidCollectionLegacy.

@Test
public void uidCollectionLegacy() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    int uid = 1;
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        FieldDocument fd = ki.addDoc(uid++, getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    assertEquals("Documents", 7, ki.numberOf("documents"));
    assertEquals("Paragraphs", 174, ki.numberOf("paragraphs"));
    assertEquals("Sentences", 281, ki.numberOf("sentences"));
    assertEquals("Tokens", 2661, ki.numberOf("tokens"));
    SpanQuery sq = new SpanTermQuery(new Term("tokens", "s:der"));
    Result kr = ki.search(sq, (short) 10);
    assertEquals(86, kr.getTotalResults());
    // Create Virtual collections:
    KrillCollection kc = new KrillCollection();
    kc.filterUIDs(new String[] { "2", "3", "4" });
    kc.setIndex(ki);
    assertEquals("Documents", 3, kc.numberOf("documents"));
    assertEquals("Paragraphs", 46, kc.numberOf("paragraphs"));
    assertEquals("Sentences", 103, kc.numberOf("sentences"));
    assertEquals("Tokens", 1229, kc.numberOf("tokens"));
    Krill ks = new Krill(sq);
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    kr = ks.apply(ki);
    // kr = ki.search(kc, sq, 0, (short) 20, true, (short) 5, true, (short) 5);
    assertEquals((long) 39, kr.getTotalResults());
}
Also used : Krill(de.ids_mannheim.korap.Krill) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SearchContext(de.ids_mannheim.korap.response.SearchContext) Term(org.apache.lucene.index.Term) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 8 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestMatchIdentifier method createSigleDoc2.

private FieldDocument createSigleDoc2() {
    FieldDocument fd = new FieldDocument();
    fd.addString("corpusSigle", "c1");
    fd.addString("docSigle", "c1/d1");
    fd.addString("textSigle", "c1/d1/t1");
    fd.addInt("UID", 2);
    fd.addTV("tokens", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    return fd;
}
Also used : FieldDocument(de.ids_mannheim.korap.index.FieldDocument)

Example 9 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestMatchIdentifier method indexDependencyAnnotations.

@Test
public void indexDependencyAnnotations() throws IOException, QueryException {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = ki.addDoc(2, getClass().getResourceAsStream("/goe/Corpus-Doc-0002.json"), false);
    ki.commit();
    Match km;
    km = ki.getMatchInfo("match-Corpus/Doc/0002-p0-6", "tokens", "malt", null, true, false);
    assertEquals("SnippetHTML (1)", "<span class=\"context-left\">" + "</span>" + "<span class=\"match\">" + "<span xml:id=\"token-Corpus/Doc/0002-p0-6\">" + "<mark>" + "<span xml:id=\"token-Corpus/Doc/0002-p0\">" + "<span xlink:title=\"malt/d:ROOT\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0-6\">Maximen</span>" + "</span>" + " " + "<span xml:id=\"token-Corpus/Doc/0002-p1\">" + "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0\">und</span>" + "</span>" + " " + "<span xlink:title=\"malt/d:CJ\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p1\">Reflexionen</span>" + " " + "<span xml:id=\"token-Corpus/Doc/0002-p3\">" + "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p0\">Religion</span>" + "</span>" + " " + "<span xml:id=\"token-Corpus/Doc/0002-p4\">" + "<span xlink:title=\"malt/d:KON\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p3\">und</span>" + "</span>" + " " + "<span xlink:title=\"malt/d:CJ\" xlink:show=\"none\" xlink:href=\"#token-Corpus/Doc/0002-p4\">Christentum</span>" + "</mark>" + "</span>" + "</span>" + "<span class=\"context-right\">" + "<span class=\"more\"></span>" + "</span>", km.getSnippetHTML());
}
Also used : FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Match(de.ids_mannheim.korap.response.Match) Test(org.junit.Test)

Example 10 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestMatchIdentifier method createSimpleFieldDoc2.

private FieldDocument createSimpleFieldDoc2() {
    FieldDocument fd = new FieldDocument();
    fd.addString("corpusID", "c1");
    fd.addString("ID", "d1");
    fd.addTV("tokens", "abcabcabac", "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>3<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|>:x/rel:b$<b>32<i>3<s>0<s>0<s>0|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>32<i>0<s>0<s>0<s>0|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
    return fd;
}
Also used : FieldDocument(de.ids_mannheim.korap.index.FieldDocument)

Aggregations

FieldDocument (de.ids_mannheim.korap.index.FieldDocument)40 KrillIndex (de.ids_mannheim.korap.KrillIndex)28 Test (org.junit.Test)28 Result (de.ids_mannheim.korap.response.Result)20 Krill (de.ids_mannheim.korap.Krill)15 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)14 JsonNode (com.fasterxml.jackson.databind.JsonNode)7 Match (de.ids_mannheim.korap.response.Match)7 SpanQuery (org.apache.lucene.search.spans.SpanQuery)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)6 KrillCollection (de.ids_mannheim.korap.KrillCollection)4 Test (de.ids_mannheim.korap.Test)4 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)2 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)2 MultiTermTokenStream (de.ids_mannheim.korap.index.MultiTermTokenStream)1 Response (de.ids_mannheim.korap.response.Response)1 SearchContext (de.ids_mannheim.korap.response.SearchContext)1 Consumes (javax.ws.rs.Consumes)1 PUT (javax.ws.rs.PUT)1 Path (javax.ws.rs.Path)1