Search in sources :

Example 11 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestMatchIdentifier method createSimpleFieldDoc.

private FieldDocument createSimpleFieldDoc() {
    FieldDocument fd = new FieldDocument();
    fd.addString("corpusID", "c1");
    fd.addString("ID", "d1");
    fd.addString("availability", "CC-BY-SA");
    fd.addTV("tokens", "abcabcabac", "[(0-1)s:a|i:a|f/m:eins|f/y:one|x/o:erstens|it/is:1|>:x/rel:a$<b>32<i>3<s>0<s>0<s>0|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|f/m:zwei|f/y:two|x/o:zweitens|it/is:2|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|f/m:drei|f/y:three|x/o:drittens|it/is:3|_2$<i>2<i>3|<>:base/s:s$<b>64<i>2<i>5<i>5]" + "[(3-4)s:a|i:a|f/m:vier|f/y:four|x/o:viertens|it/is:4|<:x/rel:b$<b>32<i>0<s>0<s>0<s>0|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|f/m:fuenf|f/y:five|x/o:fünftens|it/is:5|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|f/m:sechs|f/y:six|x/o:sechstens|it/is:6|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|f/m:sieben|f/y:seven|x/o:siebtens|it/is:7|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|f/m:acht|f/y:eight|x/o:achtens|it/is:8|<>:x/tag$<b>64<i>7<i>10<i>10|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|f/m:neun|f/y:nine|x/o:neuntens|it/is:9|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|f/m:zehn|f/y:ten|x/o:zehntens|it/is:10|_9$<i>9<i>10]");
    return fd;
}
Also used : FieldDocument(de.ids_mannheim.korap.index.FieldDocument)

Example 12 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestMatchIdentifier method createSigleDoc1.

private FieldDocument createSigleDoc1() {
    FieldDocument fd = new FieldDocument();
    fd.addString("corpusSigle", "c1");
    fd.addString("docSigle", "c1/d1");
    fd.addString("textSigle", "c1/d1/t1");
    fd.addInt("UID", 1);
    fd.addTV("tokens", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:a|i:a|_3$<i>3<i>4]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
    return fd;
}
Also used : FieldDocument(de.ids_mannheim.korap.index.FieldDocument)

Example 13 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestMatchIdentifier method indexMultipleSpanStarts.

@Test
public void indexMultipleSpanStarts() throws IOException, QueryException {
    KrillIndex ki = new KrillIndex();
    ki.addDoc(createSimpleFieldDoc5());
    FieldDocument fd = ki.addDoc(2, getClass().getResourceAsStream("/goe/AGA-03828-new.json.gz"), true);
    ki.commit();
    Match km;
    km = ki.getMatchInfo("match-c1!d5-p0-4", "tokens", null, null, true, false);
    assertEquals("SnippetBrackets (with Spans)", "[[{x/tag:a:{x/tag:b:{x/tag:c:{x/tag:v:x}}y}}z]]", km.getSnippetBrackets());
    assertEquals(fd.getTextSigle(), "GOE/AGA/03828");
    assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
    Krill ks = new Krill(new QueryBuilder("tokens").seg("marmot/m:case:nom").with("marmot/m:degree:pos"));
    Result kr = ks.apply(ki);
    assertEquals(83, kr.getTotalResults());
    assertEquals("match-GOE/AGA/03828-p0-1", kr.getMatch(0).getID());
    km = ki.getMatchInfo("match-GOE/AGA/03828-p0-10", "tokens", "malt", null, true, false);
    assertEquals("SnippetBrackets (with Spans)", "[[{malt/d:ATTR>2:Autobiographische} " + "{malt/d:ATTR>2:einzelheiten} " + "{#2:{malt/d:ROOT>0-21:Selbstschilderung}} " + "({malt/d:APP>2:1}) " + "{malt/d:ADV>5:immer} " + "{#5:{malt/d:ATTR>2:tätiger}}, " + "{#6:{malt/d:PP>13:nach}} " + "{#7:{malt/d:PN>6:innen}} " + "{malt/d:KON>7:und} " + "{malt/d:ADV>11:außen}]] " + "...", km.getSnippetBrackets());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Match(de.ids_mannheim.korap.response.Match) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 14 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestTemporaryQueryLimitations method classRefCheckNotSupported.

@Test
public void classRefCheckNotSupported() throws IOException, QueryException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    String json = new String("{" + "  \"fields\" : [" + "    { " + "      \"primaryData\" : \"abc\"" + "    }," + "    {" + "      \"name\" : \"tokens\"," + "      \"data\" : [" + "         [ \"s:a\", \"i:a\", \"_0#0-1\", \"-:t$<i>3\"]," + "         [ \"s:b\", \"i:b\", \"_1#1-2\" ]," + "         [ \"s:c\", \"i:c\", \"_2#2-3\" ]" + "      ]" + "    }" + "  ]" + "}");
    FieldDocument fd = ki.addDoc(json);
    ki.commit();
    json = getJsonString(getClass().getResource("/queries/bugs/cosmas_classrefcheck.jsonld").getFile());
    Krill ks = new Krill(json);
    Result kr = ks.apply(ki);
    assertEquals(kr.getSerialQuery(), "focus(130: {131: spanContain({129: <tokens:s />}, {130: tokens:s:wegen})})");
    assertEquals(kr.getTotalResults(), 0);
    assertEquals(kr.getStartIndex(), 0);
    assertEquals("This is a warning coming from the serialization", kr.getWarning(1).getMessage());
    assertEquals("Class reference checks are currently not supported" + " - results may not be correct", kr.getWarning(0).getMessage());
    assertEquals(2, kr.getWarnings().size());
}
Also used : Krill(de.ids_mannheim.korap.Krill) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 15 with FieldDocument

use of de.ids_mannheim.korap.index.FieldDocument in project Krill by KorAP.

the class TestKrill method searchJSONnewJSON.

@Test
public void searchJSONnewJSON() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    FieldDocument fd = ki.addDoc(1, getClass().getResourceAsStream("/goe/AGA-03828.json.gz"), true);
    ki.commit();
    assertEquals(fd.getUID(), 1);
    assertEquals(fd.getTextSigle(), "GOE_AGA.03828");
    assertEquals(fd.getDocSigle(), "GOE_AGA");
    assertEquals(fd.getCorpusSigle(), "GOE");
    assertEquals(fd.getTitle(), "Autobiographische Einzelheiten");
    assertNull(fd.getSubTitle());
    assertEquals(fd.getTextType(), "Autobiographie");
    assertNull(fd.getTextTypeArt());
    assertNull(fd.getTextTypeRef());
    assertNull(fd.getTextColumn());
    assertNull(fd.getTextDomain());
    // assertEquals(fd.getPages(), "529-547");
    assertEquals(fd.getLicense(), "QAO-NC");
    assertEquals(fd.getCreationDate().toString(), "18200000");
    assertEquals(fd.getPubDate().toString(), "19820000");
    assertEquals(fd.getAuthor(), "Goethe, Johann Wolfgang von");
    assertNull(fd.getTextClass());
    assertEquals(fd.getLanguage(), "de");
    assertEquals(fd.getPubPlace(), "München");
    assertEquals(fd.getReference(), "Goethe, Johann Wolfgang von:" + " Autobiographische Einzelheiten," + " (Geschrieben bis 1832), In: Goethe," + " Johann Wolfgang von: Goethes Werke," + " Bd. 10, Autobiographische Schriften" + " II, Hrsg.: Trunz, Erich. München: " + "Verlag C. H. Beck, 1982, S. 529-547");
    assertEquals(fd.getPublisher(), "Verlag C. H. Beck");
    assertNull(fd.getEditor());
    assertNull(fd.getFileEditionStatement());
    assertNull(fd.getBiblEditionStatement());
    assertNull(fd.getKeywords());
    assertEquals(fd.getTokenSource(), "opennlp#tokens");
    assertEquals(fd.getFoundries(), "base base/paragraphs base/sentences corenlp " + "corenlp/constituency corenlp/morpho " + "corenlp/namedentities corenlp/sentences " + "glemm glemm/morpho mate mate/morpho" + " opennlp opennlp/morpho opennlp/sentences" + " treetagger treetagger/morpho " + "treetagger/sentences");
    assertEquals(fd.getLayerInfos(), "base/s=spans corenlp/c=spans corenlp/ne=tokens" + " corenlp/p=tokens corenlp/s=spans glemm/l=tokens" + " mate/l=tokens mate/m=tokens mate/p=tokens" + " opennlp/p=tokens opennlp/s=spans tt/l=tokens" + " tt/p=tokens tt/s=spans");
    assertEquals(fd.getCorpusTitle(), "Goethes Werke");
    assertNull(fd.getCorpusSubTitle());
    assertEquals(fd.getCorpusAuthor(), "Goethe, Johann Wolfgang von");
    assertEquals(fd.getCorpusEditor(), "Trunz, Erich");
    assertEquals(fd.getDocTitle(), "Goethe: Autobiographische Schriften II, (1817-1825, 1832)");
    assertNull(fd.getDocSubTitle());
    assertNull(fd.getDocEditor());
    assertNull(fd.getDocAuthor());
    Krill ks = new Krill(new QueryBuilder("tokens").seg("mate/m:case:nom").with("mate/m:number:pl"));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 148);
    assertEquals(0, kr.getStartIndex());
    assertEquals(25, kr.getItemsPerPage());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

FieldDocument (de.ids_mannheim.korap.index.FieldDocument)40 KrillIndex (de.ids_mannheim.korap.KrillIndex)28 Test (org.junit.Test)28 Result (de.ids_mannheim.korap.response.Result)20 Krill (de.ids_mannheim.korap.Krill)15 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)14 JsonNode (com.fasterxml.jackson.databind.JsonNode)7 Match (de.ids_mannheim.korap.response.Match)7 SpanQuery (org.apache.lucene.search.spans.SpanQuery)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)6 KrillCollection (de.ids_mannheim.korap.KrillCollection)4 Test (de.ids_mannheim.korap.Test)4 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)2 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)2 MultiTermTokenStream (de.ids_mannheim.korap.index.MultiTermTokenStream)1 Response (de.ids_mannheim.korap.response.Response)1 SearchContext (de.ids_mannheim.korap.response.SearchContext)1 Consumes (javax.ws.rs.Consumes)1 PUT (javax.ws.rs.PUT)1 Path (javax.ws.rs.Path)1