Search in sources :

Example 46 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestKrillIndex method indexUnicode.

/*
     * This test demonstrates the behaviour
     */
@Test
public void indexUnicode() throws IOException, QueryException {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addString("name", "Peter");
    // These values are canonically equivalent
    // But indexed as byte sequences
    fd.addTV("base", new String("ju" + "\u006E" + "\u0303" + "o") + " " + new String("ju" + "\u00F1" + "o"), "[(0-5)s:ju" + "\u006E" + "\u0303" + "o|_0$<i>0<i>5|-:t$<i>2]" + "[(6-10)s:ju" + "\u00F1" + "o|_1$<i>6<i>10]");
    ki.addDoc(fd);
    ki.commit();
    assertEquals(1, ki.numberOf("base", "documents"));
    QueryBuilder kq = new QueryBuilder("base");
    Result kr = ki.search(kq.seg("s:ju" + "\u00F1" + "o").toQuery());
    assertEquals(1, kr.getTotalResults());
    kr = ki.search(kq.seg("s:ju" + "\u006E" + "\u0303" + "o").toQuery());
    assertEquals(1, kr.getTotalResults());
}
Also used : QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 47 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestRegexWildcardIndex method indexRegexCaseInsensitive.

@Test
public void indexRegexCaseInsensitive() throws Exception {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "AfFe aFfFE Baum Baumgarten SteinGarten franZ HaNs Haus Efeu effe", "[(0-4)s:AfFe|i:affe|_0$<i>0<i>4|-:t$<i>10]" + "[(5-10)s:aFfFE|i:afffe|_1$<i>5<i>10]" + "[(11-15)s:Baum|i:baum|_2$<i>11<i>15]" + "[(16-26)s:Baumgarten|i:baumgarten|_3$<i>16<i>26]" + "[(27-38)s:SteinGarten|i:steingarten|_4$<i>27<i>38]" + "[(39-44)s:franZ|i:franz|_5$<i>39<i>44]" + "[(45-49)s:HaNs|i:hans|_6$<i>45<i>49]" + "[(50-54)s:Haus|i:haus|_7$<i>50<i>54]" + "[(55-59)s:Efeu|i:efeu|_8$<i>55<i>59]" + "[(60-64)s:effe|i:effe|_9$<i>60<i>64]");
    ki.addDoc(fd);
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQueryWrapper sqw = kq.re("s:Af*e", true);
    assertEquals("SpanMultiTermQueryWrapper(base:/i:af*e/)", sqw.toQuery().toString());
    Krill ks = _newKrill(sqw);
    Result kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("[[AfFe]] aFfFE ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("AfFe [[aFfFE]] Baum ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(new QueryBuilder("base").re("s:Af.*e"));
    kr = ki.search(ks);
    assertEquals((long) 1, kr.getTotalResults());
    assertEquals("[[AfFe]] aFfFE ...", kr.getMatch(0).getSnippetBrackets());
    ks = _newKrill(new QueryBuilder("base").re("s:baum.*", true));
    kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("... aFfFE [[Baum]] Baumgarten ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... Baum [[Baumgarten]] SteinGarten ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(new QueryBuilder("base").re("s:.*garten", true));
    kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("... Baum [[Baumgarten]] SteinGarten ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... Baumgarten [[SteinGarten]] franZ ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(new QueryBuilder("base").re("s:.*garten", false));
    kr = ki.search(ks);
    assertEquals((long) 1, kr.getTotalResults());
    assertEquals("... Baum [[Baumgarten]] SteinGarten ...", kr.getMatch(0).getSnippetBrackets());
    ks = _newKrill(new QueryBuilder("base").re("s:ha.s", true));
    kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("... franZ [[HaNs]] Haus ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... HaNs [[Haus]] Efeu ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(new QueryBuilder("base").re("s:.*f*e", true));
    kr = ki.search(ks);
    assertEquals((long) 3, kr.getTotalResults());
    assertEquals("[[AfFe]] aFfFE ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("AfFe [[aFfFE]] Baum ...", kr.getMatch(1).getSnippetBrackets());
    assertEquals("... Efeu [[effe]]", kr.getMatch(2).getSnippetBrackets());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 48 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestRegexWildcardIndex method indexRegex.

@Test
public void indexRegex() throws Exception {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "affe afffe baum baumgarten steingarten franz hans haus efeu effe", "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10]" + "[(5-10)s:afffe|_1$<i>5<i>10]" + "[(11-15)s:baum|_2$<i>11<i>15]" + "[(16-26)s:baumgarten|_3$<i>16<i>26]" + "[(27-38)s:steingarten|_4$<i>27<i>38]" + "[(39-44)s:franz|_5$<i>39<i>44]" + "[(45-49)s:hans|_6$<i>45<i>49]" + "[(50-54)s:haus|_7$<i>50<i>54]" + "[(55-59)s:efeu|_8$<i>55<i>59]" + "[(60-64)s:effe|_9$<i>60<i>64]");
    ki.addDoc(fd);
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQueryWrapper sqw = kq.re("s:af*e");
    assertEquals("SpanMultiTermQueryWrapper(base:/s:af*e/)", sqw.toQuery().toString());
    Krill ks = _newKrill(sqw);
    Result kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("[[affe]] afffe ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("affe [[afffe]] baum ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(kq.re("s:baum.*"));
    kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("... afffe [[baum]] baumgarten ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... baum [[baumgarten]] steingarten ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(kq.re("s:.....?garten"));
    kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("... baum [[baumgarten]] steingarten ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... baumgarten [[steingarten]] franz ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(kq.re("s:ha.s"));
    kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("... franz [[hans]] haus ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... hans [[haus]] efeu ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(kq.re("s:.*ff.*"));
    kr = ki.search(ks);
    assertEquals((long) 3, kr.getTotalResults());
    assertEquals("[[affe]] afffe ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("affe [[afffe]] baum ...", kr.getMatch(1).getSnippetBrackets());
    assertEquals("... efeu [[effe]]", kr.getMatch(2).getSnippetBrackets());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 49 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestKrill method searchSchreibgebrauchData.

/**
 * This is a Schreibgebrauch ressource that didn't work for
 * element queries.
 */
@Test
public void searchSchreibgebrauchData() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    ki.addDoc(getClass().getResourceAsStream("/sgbr/BSP-2013-01-32.json.gz"), true);
    ki.commit();
    Krill k = new Krill(new QueryBuilder("tokens").tag("base/s:s"));
    assertEquals(k.getSpanQuery().toString(), "<tokens:base/s:s />");
    Result kr = k.apply(ki);
    assertEquals(kr.getTotalResults(), 1);
    assertEquals(kr.getMatch(0).getSnippetBrackets(), "[[Selbst ist der Jeck]]");
    assertEquals(kr.getMatch(0).getTextSigle(), "PRO-DUD_BSP-2013-01.32");
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 50 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestKrill method searchJSONexpansionBug.

/**
 * This is a breaking test for #179
 */
@Test
public void searchJSONexpansionBug() throws IOException {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    ki.addDoc(getClass().getResourceAsStream("/wiki/00002.json.gz"), true);
    ki.commit();
    // Expansion bug
    // der alte Digraph Aa durch Å
    String json = getJsonString(getClass().getResource("/queries/bugs/expansion_bug_2.jsonld").getFile());
    Result kr = new Krill(json).apply(ki);
    assertEquals("... Buchstabe des Alphabetes. In Dänemark ist " + "[[der alte Digraph Aa durch Å]] ersetzt worden, " + "in Eigennamen und Ortsnamen ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
    assertEquals(kr.getTotalResults(), 1);
    // TODO: base/s:t needs to be defined!!!
    QueryBuilder qb = new QueryBuilder("tokens");
    kr = new Krill(qb.tag("base/s:t")).apply(ki);
    assertEquals(kr.getTotalResults(), 1);
    // der alte Digraph Aa durch []
    // Works with one document
    json = getJsonString(getClass().getResource("/queries/bugs/expansion_bug.jsonld").getFile());
    kr = new Krill(json).apply(ki);
    // focus(254: spanContain(<tokens:base/s:t />, {254: spanNext(spanNext(spanNext(spanNext(tokens:s:der, tokens:s:alte), tokens:s:Digraph), tokens:s:Aa), spanExpansion(tokens:s:durch, []{1, 1}, right))}))
    assertEquals("... Buchstabe des Alphabetes. In Dänemark ist " + "[[der alte Digraph Aa durch Å]] ersetzt worden, " + "in Eigennamen und Ortsnamen ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
    assertEquals(kr.getTotalResults(), 1);
    // Now try with one file ahead
    ki = new KrillIndex();
    for (String i : new String[] { "00001", "00002" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    // Expansion bug
    // der alte Digraph Aa durch Å
    json = getJsonString(getClass().getResource("/queries/bugs/expansion_bug_2.jsonld").getFile());
    kr = new Krill(json).apply(ki);
    assertEquals("... Buchstabe des Alphabetes. In Dänemark ist " + "[[der alte Digraph Aa durch Å]] ersetzt worden, " + "in Eigennamen und Ortsnamen ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
    assertEquals(kr.getTotalResults(), 1);
    // der alte Digraph Aa durch []
    json = getJsonString(getClass().getResource("/queries/bugs/expansion_bug.jsonld").getFile());
    kr = new Krill(json).apply(ki);
    assertEquals("... Buchstabe des Alphabetes. In Dänemark ist " + "[[der alte Digraph Aa durch Å]] ersetzt worden, " + "in Eigennamen und Ortsnamen ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("WPD_AAA.00002", kr.getMatch(0).getDocID());
    assertEquals(kr.getTotalResults(), 1);
}
Also used : Krill(de.ids_mannheim.korap.Krill) TestSimple.getJsonString(de.ids_mannheim.korap.TestSimple.getJsonString) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)72 Test (org.junit.Test)67 SpanQuery (org.apache.lucene.search.spans.SpanQuery)39 KrillIndex (de.ids_mannheim.korap.KrillIndex)33 Result (de.ids_mannheim.korap.response.Result)32 Krill (de.ids_mannheim.korap.Krill)27 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)14 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)11 Match (de.ids_mannheim.korap.response.Match)8 JsonNode (com.fasterxml.jackson.databind.JsonNode)5 KrillQuery (de.ids_mannheim.korap.KrillQuery)5 QueryException (de.ids_mannheim.korap.util.QueryException)5 KrillMeta (de.ids_mannheim.korap.KrillMeta)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 KrillCollection (de.ids_mannheim.korap.KrillCollection)3 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)3 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)3 SearchContext (de.ids_mannheim.korap.response.SearchContext)3 Test (de.ids_mannheim.korap.Test)2 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)1