Search in sources :

Example 41 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestHighlight method highlightEscapes.

@Test
public void highlightEscapes() throws IOException, QueryException {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addString("ID", "doc-1");
    fd.addString("UID", "1");
    fd.addString("textSigle", "c1/d1/1");
    // Make this clean for HTML and Brackets!
    fd.addTV("base", "Mit \"Mann\" & {Ma\\us}", "[(0-3)s:Mit|i:mit|_0#0-3|-:t$<i>4|<>:base/t:t$<b>64<i>0<i>20<i>4<b>0]" + "[(4-10)s:\"Mann\"|i:\"mann\"|base/l:\"Mann\"|_1#4-10]" + "[(11-12)s:&|i:&|base/l:&|_2#11-12]" + "[(13-20)s:{Ma\\us}|i:{ma\\us}|_3#13-20]");
    ki.addDoc(fd);
    // Commit!
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQuery q = (SpanQuery) kq.tag("base/t:t").toQuery();
    Krill qs = new Krill(q);
    qs.getMeta().getContext().left.setToken(true).setLength((short) 0);
    qs.getMeta().getContext().right.setToken(true).setLength((short) 0);
    Result kr = ki.search(qs);
    assertEquals((long) 1, kr.getTotalResults());
    assertEquals("[[Mit \"Mann\" & \\{Ma\\\\us\\}]]", kr.getMatch(0).getSnippetBrackets());
    assertEquals("<span class=\"context-left\"></span><span class=\"match\"><mark>Mit &quot;Mann&quot; &amp; {Ma\\us}</mark></span><span class=\"context-right\"></span>", kr.getMatch(0).getSnippetHTML());
    assertEquals("match-c1/d1/1-p0-4", kr.getMatch(0).getID());
    Match km = ki.getMatchInfo("match-c1/d1/1-p0-4", "base", true, (ArrayList) null, (ArrayList) null, true, true, false);
    assertEquals(0, km.getStartPos());
    assertEquals("<span class=\"context-left\"></span>" + "<span class=\"match\"><mark><span title=\"base/t:t\">" + "Mit " + "<span title=\"base/l:&quot;Mann&quot;\">" + "&quot;Mann&quot;" + "</span>" + " " + "<span title=\"base/l:&amp;\">&amp;</span>" + " " + "{Ma\\us}" + "</span>" + "</mark></span>" + "<span class=\"context-right\"></span>", km.getSnippetHTML());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Match(de.ids_mannheim.korap.response.Match) Test(org.junit.Test)

Example 42 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestHighlight method highlightMissingBug.

@Test
public void highlightMissingBug() throws IOException, QueryException {
    KrillIndex ki = new KrillIndex();
    FieldDocument fd = new FieldDocument();
    fd.addString("ID", "doc-1");
    fd.addString("UID", "1");
    fd.addTV("base", "abab", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]");
    ki.addDoc(fd);
    fd = new FieldDocument();
    fd.addString("ID", "doc-2");
    fd.addString("UID", "2");
    fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]");
    ki.addDoc(fd);
    // Commit!
    ki.commit();
    fd = new FieldDocument();
    fd.addString("ID", "doc-3");
    fd.addString("UID", "3");
    fd.addTV("base", "abab", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>4]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]" + "[(3-4)s:b|i:a|_3#3-4]");
    ki.addDoc(fd);
    // Commit!
    ki.commit();
    fd = new FieldDocument();
    fd.addString("ID", "doc-4");
    fd.addString("UID", "4");
    fd.addTV("base", "aba", "[(0-1)s:a|i:a|_0#0-1|-:t$<i>3]" + "[(1-2)s:b|i:b|_1#1-2]" + "[(2-3)s:a|i:c|_2#2-3]");
    ki.addDoc(fd);
    // Commit!
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQuery q = (SpanQuery) kq.or(kq.nr(1, kq.seg("s:a"))).or(kq.nr(2, kq.seg("s:b"))).toQuery();
    Result kr = ki.search(q);
    assertEquals((long) 14, kr.getTotalResults());
    assertEquals("[[{1:a}]]bab", kr.getMatch(0).getSnippetBrackets());
    assertEquals("a[[{2:b}]]ab", kr.getMatch(1).getSnippetBrackets());
    assertEquals("ab[[{1:a}]]b", kr.getMatch(2).getSnippetBrackets());
    assertEquals("aba[[{2:b}]]", kr.getMatch(3).getSnippetBrackets());
    assertEquals("[[{1:a}]]ba", kr.getMatch(4).getSnippetBrackets());
    assertEquals("a[[{2:b}]]a", kr.getMatch(5).getSnippetBrackets());
    assertEquals("ab[[{1:a}]]", kr.getMatch(6).getSnippetBrackets());
    assertEquals("[[{1:a}]]bab", kr.getMatch(7).getSnippetBrackets());
    assertEquals("a[[{2:b}]]ab", kr.getMatch(8).getSnippetBrackets());
    assertEquals("ab[[{1:a}]]b", kr.getMatch(9).getSnippetBrackets());
    assertEquals("aba[[{2:b}]]", kr.getMatch(10).getSnippetBrackets());
    assertEquals("[[{1:a}]]ba", kr.getMatch(11).getSnippetBrackets());
    assertEquals("a[[{2:b}]]a", kr.getMatch(12).getSnippetBrackets());
    assertEquals("ab[[{1:a}]]", kr.getMatch(13).getSnippetBrackets());
    kq = new QueryBuilder("base");
    q = (SpanQuery) kq.or(kq.nr(1, kq.seg("i:a"))).or(kq.nr(2, kq.seg("i:c"))).toQuery();
    Krill qs = new Krill(q);
    qs.getMeta().getContext().left.setToken(true).setLength((short) 1);
    qs.getMeta().getContext().right.setToken(true).setLength((short) 1);
    kr = ki.search(qs);
    assertEquals((long) 10, kr.getTotalResults());
    assertEquals("[[{1:a}]]b ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... b[[{2:a}]]b", kr.getMatch(1).getSnippetBrackets());
    assertEquals("... a[[{1:b}]]", kr.getMatch(2).getSnippetBrackets());
    assertEquals("[[{1:a}]]b ...", kr.getMatch(3).getSnippetBrackets());
    assertEquals("... b[[{2:a}]]", kr.getMatch(4).getSnippetBrackets());
    assertEquals("[[{1:a}]]b ...", kr.getMatch(5).getSnippetBrackets());
    assertEquals("... b[[{2:a}]]b", kr.getMatch(6).getSnippetBrackets());
    assertEquals("... a[[{1:b}]]", kr.getMatch(7).getSnippetBrackets());
    assertEquals("[[{1:a}]]b ...", kr.getMatch(8).getSnippetBrackets());
    assertEquals("... b[[{2:a}]]", kr.getMatch(9).getSnippetBrackets());
    qs.getMeta().getContext().left.setToken(true).setLength((short) 0);
    qs.getMeta().getContext().right.setToken(true).setLength((short) 0);
    kr = ki.search(qs);
    assertEquals((long) 10, kr.getTotalResults());
    assertEquals("[[{1:a}]] ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... [[{2:a}]] ...", kr.getMatch(1).getSnippetBrackets());
    assertEquals("... [[{1:b}]]", kr.getMatch(2).getSnippetBrackets());
    assertEquals("[[{1:a}]] ...", kr.getMatch(3).getSnippetBrackets());
    assertEquals("... [[{2:a}]]", kr.getMatch(4).getSnippetBrackets());
    assertEquals("[[{1:a}]] ...", kr.getMatch(5).getSnippetBrackets());
    assertEquals("... [[{2:a}]] ...", kr.getMatch(6).getSnippetBrackets());
    assertEquals("... [[{1:b}]]", kr.getMatch(7).getSnippetBrackets());
    assertEquals("[[{1:a}]] ...", kr.getMatch(8).getSnippetBrackets());
    assertEquals("... [[{2:a}]]", kr.getMatch(9).getSnippetBrackets());
    q = (SpanQuery) kq.nr(3, kq.or(kq.nr(1, kq.seg("i:a"))).or(kq.nr(2, kq.seg("i:c")))).toQuery();
    qs = new Krill(q);
    qs.getMeta().getContext().left.setToken(true).setLength((short) 0);
    qs.getMeta().getContext().right.setToken(true).setLength((short) 0);
    kr = ki.search(qs);
    assertEquals((long) 10, kr.getTotalResults());
    assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... [[{2:{3:a}}]] ...", kr.getMatch(1).getSnippetBrackets());
    assertEquals("... [[{1:{3:b}}]]", kr.getMatch(2).getSnippetBrackets());
    assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(3).getSnippetBrackets());
    assertEquals("... [[{2:{3:a}}]]", kr.getMatch(4).getSnippetBrackets());
    assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(5).getSnippetBrackets());
    assertEquals("... [[{2:{3:a}}]] ...", kr.getMatch(6).getSnippetBrackets());
    assertEquals("... [[{1:{3:b}}]]", kr.getMatch(7).getSnippetBrackets());
    assertEquals("[[{1:{3:a}}]] ...", kr.getMatch(8).getSnippetBrackets());
    assertEquals("... [[{2:{3:a}}]]", kr.getMatch(9).getSnippetBrackets());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) FieldDocument(de.ids_mannheim.korap.index.FieldDocument) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 43 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestBenchmarkSamples method simpleSegmentQuery.

@Test
public void simpleSegmentQuery() throws Exception {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    t1 = System.nanoTime();
    for (int i = 1; i <= rounds; i++) {
        final QueryBuilder qb = new QueryBuilder("tokens");
        final Krill ks = new Krill(qb.seg("mate/m:gender:masc").toQuery());
        final Result kr = ks.apply(ki);
        assertEquals(kr.getTotalResults(), 497);
    }
    ;
    t2 = System.nanoTime();
    double seconds = (double) (t2 - t1) / 1000000000.0;
    System.err.println("Seconds: " + seconds);
// Seconds: 9.465514311
// Seconds: 9.302011468
// Seconds: 9.052496918
// Seconds: 9.0567007
// Seconds: 9.113724089
// Seconds: 8.700548842
// Seconds: 9.390980437
// Seconds: 8.817503952
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 44 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestKrillCollectionIndex method filterExampleAtomicLegacy.

@Test
public void filterExampleAtomicLegacy() throws Exception {
    // That's exactly the same test class, but with multiple atomic indices
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
        ki.commit();
    }
    ;
    CollectionBuilder kf = new CollectionBuilder();
    // Create Virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    assertEquals("Documents", 7, kc.numberOf("documents"));
    // If this is set - everything is fine automatically ...
    kc.filter(kc.build().term("corpusID", "WPD"));
    assertEquals("Documents", 7, kc.numberOf("documents"));
    // The virtual collection consists of all documents that have the textClass "reisen" and "freizeit"
    /*
        kc.filter(kf.and("textClass", "reisen").and("textClass",
                "freizeit-unterhaltung"));
        */
    kc.filter(kc.build().andGroup().with(kc.build().term("textClass", "reisen")).with(kc.build().term("textClass", "freizeit-unterhaltung")));
    assertEquals("Documents", 5, kc.numberOf("documents"));
    assertEquals("Tokens", 1678, kc.numberOf("tokens"));
    assertEquals("Sentences", 194, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
    // Subset this to all documents that have also the text
    // kc.filter(kf.and("textClass", "kultur"));
    kc.filter(kc.build().term("textClass", "kultur"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
    // This is already filtered though ...
    // kc.filter(kf.and("corpusID", "WPD"));
    kc.filter(kc.build().term("corpusID", "WPD"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
    // Create a query
    Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    Result kr = ks.apply(ki);
    /*
        Result kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
                (short) 5);
        */
    assertEquals(kr.getTotalResults(), 70);
    // kc.extend(kf.and("textClass", "uninteresting"));
    kc.extend(kc.build().term("textClass", "uninteresting"));
    assertEquals("Documents", 1, kc.numberOf("documents"));
    kc.extend(kc.build().term("textClass", "wissenschaft"));
    assertEquals("Documents", 3, kc.numberOf("documents"));
    assertEquals("Tokens", 1669, kc.numberOf("tokens"));
    assertEquals("Sentences", 188, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 130, kc.numberOf("paragraphs"));
    // System.err.println(kc.toString());
    // Test collectionbuilder simplifier!
    /*
        OrGroup(
                AndGroup(
                         corpusID:WPD
                         textClass:reisen
                         textClass:freizeit-unterhaltung
                         textClass:kultur
                         corpusID:WPD
                         )
                textClass:uninteresting
                textClass:wissenschaft
        )
        */
    assertTrue(ki.delDocs("textClass", "wissenschaft"));
    ki.commit();
    assertEquals("Documents", 1, kc.numberOf("documents"));
    assertEquals("Tokens", 405, kc.numberOf("tokens"));
    assertEquals("Sentences", 75, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 48, kc.numberOf("paragraphs"));
}
Also used : Krill(de.ids_mannheim.korap.Krill) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) SearchContext(de.ids_mannheim.korap.response.SearchContext) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 45 with QueryBuilder

use of de.ids_mannheim.korap.query.QueryBuilder in project Krill by KorAP.

the class TestKrillCollectionIndex method filterExample2Legacy.

@Test
public void filterExample2Legacy() throws Exception {
    // Construct index
    KrillIndex ki = new KrillIndex();
    // Indexing test files
    for (String i : new String[] { "00001", "00002", "00003", "00004", "00005", "00006", "02439" }) {
        ki.addDoc(getClass().getResourceAsStream("/wiki/" + i + ".json.gz"), true);
    }
    ;
    ki.commit();
    ki.addDoc(getClass().getResourceAsStream("/wiki/00012-fakemeta.json.gz"), true);
    ki.commit();
    /*
        CollectionBuilderLegacy kf = new CollectionBuilderLegacy();
        
        // Create Virtual collections:
        KrillCollectionLegacy kc = new KrillCollectionLegacy(ki);
        kc.filter(kf.and("textClass", "reisen").and("textClass",
                "freizeit-unterhaltung"));
        */
    KrillCollection kc = new KrillCollection(ki);
    CollectionBuilder cb = kc.build();
    kc.filter(cb.andGroup().with(cb.term("textClass", "reisen")).with(cb.term("textClass", "freizeit-unterhaltung")));
    assertEquals("Documents", 5, kc.numberOf("documents"));
    assertEquals("Tokens", 1678, kc.numberOf("tokens"));
    assertEquals("Sentences", 194, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 139, kc.numberOf("paragraphs"));
    // Create a query
    Krill ks = new Krill(new QueryBuilder("tokens").seg("opennlp/p:NN").with("tt/p:NN"));
    ks.setCollection(kc).getMeta().setStartIndex(0).setCount((short) 20).setContext(new SearchContext(true, (short) 5, true, (short) 5));
    Result kr = ks.apply(ki);
    assertEquals(kr.getTotalResults(), 369);
    // kc.filter(kf.and("corpusID", "QQQ"));
    kc.filter(cb.term("corpusID", "QQQ"));
    assertEquals("Documents", 0, kc.numberOf("documents"));
    assertEquals("Tokens", 0, kc.numberOf("tokens"));
    assertEquals("Sentences", 0, kc.numberOf("sentences"));
    assertEquals("Paragraphs", 0, kc.numberOf("paragraphs"));
    ks.setCollection(kc);
    // Create a query
    kr = ks.apply(ki);
    /*
        kr = ki.search(kc, query, 0, (short) 20, true, (short) 5, true,
                (short) 5);
        */
    assertEquals(kr.getTotalResults(), 0);
}
Also used : Krill(de.ids_mannheim.korap.Krill) CollectionBuilder(de.ids_mannheim.korap.collection.CollectionBuilder) SearchContext(de.ids_mannheim.korap.response.SearchContext) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) KrillIndex(de.ids_mannheim.korap.KrillIndex) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)72 Test (org.junit.Test)67 SpanQuery (org.apache.lucene.search.spans.SpanQuery)39 KrillIndex (de.ids_mannheim.korap.KrillIndex)33 Result (de.ids_mannheim.korap.response.Result)32 Krill (de.ids_mannheim.korap.Krill)27 FieldDocument (de.ids_mannheim.korap.index.FieldDocument)14 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)11 Match (de.ids_mannheim.korap.response.Match)8 JsonNode (com.fasterxml.jackson.databind.JsonNode)5 KrillQuery (de.ids_mannheim.korap.KrillQuery)5 QueryException (de.ids_mannheim.korap.util.QueryException)5 KrillMeta (de.ids_mannheim.korap.KrillMeta)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 KrillCollection (de.ids_mannheim.korap.KrillCollection)3 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)3 CollectionBuilder (de.ids_mannheim.korap.collection.CollectionBuilder)3 SearchContext (de.ids_mannheim.korap.response.SearchContext)3 Test (de.ids_mannheim.korap.Test)2 DistanceConstraint (de.ids_mannheim.korap.query.DistanceConstraint)1