Search in sources :

Example 6 with SpanQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanQueryWrapper in project Krill by KorAP.

the class KrillQuery method _termFromJson.

// Deserialize koral:term
// TODO: Not optimal as it does not respect non-term
private SpanQueryWrapper _termFromJson(JsonNode json, boolean isSpan, RelationDirection direction) throws QueryException {
    if (!json.has("@type")) {
        throw new QueryException(701, "JSON-LD group has no @type attribute");
    }
    ;
    String termType = json.get("@type").asText();
    Boolean isTerm = termType.equals("koral:term") ? true : false;
    Boolean isCaseInsensitive = false;
    if (!json.has("key") || json.get("key").asText().length() < 1) {
        // why must it have an attr?
        if (!json.has("attr")) {
            // return new SpanRepetitionQueryWrapper();
            throw new QueryException(740, "Key definition is missing in term or span");
        }
    }
    ;
    // Empty koral:span hack
    if (isSpan) {
        isTerm = false;
    }
    ;
    // <legacy>
    if (json.has("caseInsensitive") && json.get("caseInsensitive").asBoolean()) {
        isCaseInsensitive = true;
    } else // Flags
    if (json.has("flags") && json.get("flags").isArray()) {
        Iterator<JsonNode> flags = json.get("flags").elements();
        while (flags.hasNext()) {
            String flag = flags.next().asText();
            if (flag.equals("flags:caseInsensitive")) {
                isCaseInsensitive = true;
            } else {
                this.addWarning(748, "Flag is unknown", flag);
            }
            ;
        }
        ;
    }
    ;
    StringBuilder value = new StringBuilder();
    if (direction != null)
        value.append(direction.value());
    if (json.has("foundry") && json.get("foundry").asText().length() > 0) {
        value.append(json.get("foundry").asText()).append('/');
    }
    ;
    // No default foundry defined
    if (json.has("layer") && json.get("layer").asText().length() > 0) {
        String layer = json.get("layer").asText();
        switch(layer) {
            case "lemma":
                layer = "l";
                break;
            case "pos":
                layer = "p";
                break;
            case "orth":
                // TODO: THIS IS AN UGLY HACK! AND SHOULD BE NAMED "SURFACE" or . OR *
                layer = ".";
                break;
            case "struct":
                layer = "s";
                break;
            case "const":
                layer = "c";
                break;
        }
        ;
        if (isCaseInsensitive && isTerm) {
            if (layer.equals("."))
                layer = "i";
            else {
                this.addWarning(767, "Case insensitivity is currently not supported for this layer");
            }
            ;
        }
        ;
        // Ignore foundry for orth layer
        if (layer.equals(".")) {
            layer = "s";
            value.setLength(0);
        } else if (layer.equals("i")) {
            value.setLength(0);
        }
        ;
        value.append(layer).append(':');
    }
    ;
    if (json.has("key") && json.get("key").asText().length() > 0) {
        String key = json.get("key").asText();
        value.append(isCaseInsensitive ? key.toLowerCase() : key);
    }
    ;
    if (json.has("value") && json.get("value").asText().length() > 0)
        value.append(':').append(json.get("value").asText());
    // Regular expression or wildcard
    if (isTerm) {
        String match = "match:eq";
        if (json.has("match")) {
            match = json.get("match").asText();
        }
        ;
        if (json.has("type")) {
            QueryBuilder qb = this.builder();
            // Branch on type
            switch(json.get("type").asText()) {
                case "type:regex":
                    {
                        // The regex can be rewritten to an any token
                        if (value.toString().matches("^[si]:\\.[\\+\\*]\\??$")) {
                            return new SpanRepetitionQueryWrapper();
                        }
                        ;
                        SpanRegexQueryWrapper srqw = qb.re(value.toString(), isCaseInsensitive);
                        if (match.equals("match:ne")) {
                            if (DEBUG)
                                log.trace("Term is negated");
                            // ssqw.makeNegative();
                            return this.builder().seg().without(srqw);
                        } else if (match.equals("match:eq")) {
                            return srqw;
                        }
                        throw new QueryException(741, "Match relation unknown");
                    }
                case "type:wildcard":
                    {
                        SpanWildcardQueryWrapper swcqw = qb.wc(value.toString(), isCaseInsensitive);
                        if (match.equals("match:ne")) {
                            if (DEBUG)
                                log.trace("Term is negated");
                            // ssqw.makeNegative();
                            return this.builder().seg().without(swcqw);
                        } else if (match.equals("match:eq")) {
                            return swcqw;
                        }
                        ;
                        throw new QueryException(741, "Match relation unknown");
                    }
                case "type:string":
                    break;
                default:
                    this.addWarning(746, "Term type is not supported - treated as a string");
            }
            ;
        }
        ;
        SpanSegmentQueryWrapper ssqw = this.builder().seg(value.toString());
        if (match.equals("match:ne")) {
            if (DEBUG)
                log.trace("Term is negated");
            ssqw.makeNegative();
            return this.builder().seg().without(ssqw);
        } else if (match.equals("match:eq")) {
            return ssqw;
        } else {
            throw new QueryException(741, "Match relation unknown");
        }
    }
    ;
    if (json.has("attr")) {
        JsonNode attrNode = json.get("attr");
        if (!attrNode.has("@type")) {
            throw new QueryException(701, "JSON-LD group has no @type attribute");
        }
        if (value.toString().isEmpty()) {
            return _createElementAttrFromJson(null, json, attrNode);
        // this.addWarning(771,
        // "Arbitraty elements with attributes are currently not supported.");
        } else {
            SpanQueryWrapper elementWithIdWrapper = this.builder().tag(value.toString());
            if (elementWithIdWrapper == null) {
                return null;
            }
            return _createElementAttrFromJson(elementWithIdWrapper, json, attrNode);
        }
    }
    ;
    return this.builder().tag(value.toString());
}
Also used : SpanRepetitionQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper) SpanWildcardQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper) QueryException(de.ids_mannheim.korap.util.QueryException) SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) Iterator(java.util.Iterator) JsonNode(com.fasterxml.jackson.databind.JsonNode) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)

Example 7 with SpanQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanQueryWrapper in project Krill by KorAP.

the class KrillQuery method _operationRepetitionFromJson.

// Deserialize operation:repetition
private SpanQueryWrapper _operationRepetitionFromJson(JsonNode json, JsonNode operands) throws QueryException {
    if (operands.size() != 1)
        throw new QueryException(705, "Number of operands is not acceptable");
    int min = 0, max = 100;
    if (json.has("boundary")) {
        Boundary b = new Boundary(json.get("boundary"), 0, 100);
        min = b.min;
        max = b.max;
    } else // <legacyCode>
    {
        this.addMessage(0, "Setting boundary by min and max is deprecated");
        // Set minimum value
        if (json.has("min"))
            min = json.get("min").asInt(0);
        // Set maximum value
        if (json.has("max"))
            max = json.get("max").asInt(100);
    }
    ;
    // Sanitize max
    if (max < 0)
        max = 100;
    else if (max > 100)
        max = 100;
    // Sanitize min
    if (min < 0)
        min = 0;
    else if (min > 100)
        min = 100;
    // Check relation between min and max
    if (min > max)
        max = max;
    SpanQueryWrapper sqw = this._fromKoral(operands.get(0));
    if (sqw.maybeExtension())
        return sqw.setMin(min).setMax(max);
    return new SpanRepetitionQueryWrapper(sqw, min, max);
}
Also used : SpanRepetitionQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper) QueryException(de.ids_mannheim.korap.util.QueryException) SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)

Example 8 with SpanQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanQueryWrapper in project Krill by KorAP.

the class TestReferenceIndex method testCase3.

// multiple references
@Test
public void testCase3() throws IOException, QueryException {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.commit();
    String filepath = getClass().getResource("/queries/reference/distance-multiple-references.jsonld").getFile();
    SpanQueryWrapper sqwi = getJSONQuery(filepath);
    SpanQuery sq = sqwi.toQuery();
    // 'cat="vb" & cat="prp" & cat="nn" & #1 .{0,1} #2 & #1 .{0,2}
    // #3 & #3 ->stanford/d #2 & #1 ->stanford #3' annis
    // without layer=c and + relation key
    assertEquals("spanReference(focus(#[1,2]spanSegment({3: <tokens:nn />}, " + "focus(#2: spanSegment(spanRelation(tokens:>:stanford/d:tag), " + "focus(1: spanReference(focus(#[1,2]spanSegment({2: <tokens:prp />}, " + "focus(#2: spanSegment(spanRelation(tokens:>:stanford/d:tag), " + "focus(3: spanDistance(focus(1: spanDistance({1: <tokens:vb />}, " + "{2: <tokens:prp />}, [(w[1:2], notOrdered, notExcluded)])), " + "{3: <tokens:nn />}, [(w[1:3], notOrdered, notExcluded)])))))), 2)))))), 3)", sq.toString());
    kr = ki.search(sq, (short) 10);
    // for (Match km : kr.getMatches()) {
    // System.out.println(km.getStartPos() + "," + km.getEndPos()
    // + " "
    // + km.getSnippetBrackets());
    // }
    assertEquals(1, kr.getMatch(0).getStartPos());
    assertEquals(4, kr.getMatch(0).getEndPos());
    assertEquals(10, kr.getMatch(1).getStartPos());
    assertEquals(13, kr.getMatch(1).getEndPos());
}
Also used : SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanQuery(org.apache.lucene.search.spans.SpanQuery) Test(org.junit.Test)

Example 9 with SpanQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanQueryWrapper in project Krill by KorAP.

the class TestRegexWildcardIndex method indexWildcard.

@Test
public void indexWildcard() throws Exception {
    KrillIndex ki = new KrillIndex();
    // abcabcabac
    FieldDocument fd = new FieldDocument();
    fd.addTV("base", "affe afffe baum baumgarten steingarten franz hans haus efeu effe", "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10]" + "[(5-10)s:afffe|_1$<i>5<i>10]" + "[(11-15)s:baum|_2$<i>11<i>15]" + "[(16-26)s:baumgarten|_3$<i>16<i>26]" + "[(27-38)s:steingarten|_4$<i>27<i>38]" + "[(39-44)s:franz|_5$<i>39<i>44]" + "[(45-49)s:hans|_6$<i>45<i>49]" + "[(50-54)s:haus|_7$<i>50<i>54]" + "[(55-59)s:efeu|_8$<i>55<i>59]" + "[(60-64)s:effe|_9$<i>60<i>64]");
    ki.addDoc(fd);
    ki.commit();
    QueryBuilder kq = new QueryBuilder("base");
    SpanQueryWrapper sq = kq.wc("s:af*e");
    assertEquals("SpanMultiTermQueryWrapper(base:s:af*e)", sq.toQuery().toString());
    Krill ks = _newKrill(sq);
    Result kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("[[affe]] afffe ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("affe [[afffe]] baum ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(new QueryBuilder("base").wc("s:baum.*"));
    kr = ki.search(ks);
    assertEquals((long) 0, kr.getTotalResults());
    ks = _newKrill(new QueryBuilder("base").wc("s:baum*"));
    kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("... afffe [[baum]] baumgarten ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... baum [[baumgarten]] steingarten ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(new QueryBuilder("base").wc("s:*garten"));
    kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("... baum [[baumgarten]] steingarten ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... baumgarten [[steingarten]] franz ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(new QueryBuilder("base").wc("s:ha?s"));
    kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("... franz [[hans]] haus ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... hans [[haus]] efeu ...", kr.getMatch(1).getSnippetBrackets());
    ks = _newKrill(new QueryBuilder("base").wc("s:?ff?"));
    kr = ki.search(ks);
    assertEquals((long) 2, kr.getTotalResults());
    assertEquals("[[affe]] afffe ...", kr.getMatch(0).getSnippetBrackets());
    assertEquals("... efeu [[effe]]", kr.getMatch(1).getSnippetBrackets());
}
Also used : Krill(de.ids_mannheim.korap.Krill) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Example 10 with SpanQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanQueryWrapper in project Krill by KorAP.

the class TestWithinIndex method queryJSONpoly2.

// contains(<s>, (es wird | wird es))
@Test
public void queryJSONpoly2() throws QueryException, IOException {
    String jsonPath = URLDecoder.decode(getClass().getResource("/queries/poly2.json").getFile(), "UTF-8");
    String jsonPQuery = readFile(jsonPath);
    SpanQueryWrapper sqwi = new KrillQuery("tokens").fromKoral(jsonPQuery);
    SpanWithinQuery sq = (SpanWithinQuery) sqwi.toQuery();
    KrillIndex ki = new KrillIndex();
    ki.addDoc(getClass().getResourceAsStream("/wiki/DDD-08370.json.gz"), true);
    ki.addDoc(getClass().getResourceAsStream("/wiki/PPP-02924.json.gz"), true);
    ki.commit();
    Result kr = ki.search(sq, (short) 10);
    assertEquals(2, kr.getTotalResults());
    assertEquals(0, kr.getMatch(0).getLocalDocID());
    assertEquals(76, kr.getMatch(0).getStartPos());
    assertEquals(93, kr.getMatch(0).getEndPos());
    assertEquals(1, kr.getMatch(1).getLocalDocID());
    assertEquals(237, kr.getMatch(1).getStartPos());
    assertEquals(252, kr.getMatch(1).getEndPos());
}
Also used : KrillQuery(de.ids_mannheim.korap.KrillQuery) SpanWithinQuery(de.ids_mannheim.korap.query.SpanWithinQuery) SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper) KrillIndex(de.ids_mannheim.korap.KrillIndex) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)165 Test (org.junit.Test)150 SpanQuery (org.apache.lucene.search.spans.SpanQuery)65 KrillIndex (de.ids_mannheim.korap.KrillIndex)17 QueryException (de.ids_mannheim.korap.util.QueryException)14 KrillQuery (de.ids_mannheim.korap.KrillQuery)13 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)11 Result (de.ids_mannheim.korap.response.Result)8 JsonNode (com.fasterxml.jackson.databind.JsonNode)7 Krill (de.ids_mannheim.korap.Krill)5 SpanRepetitionQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper)3 TestSimple.getJsonString (de.ids_mannheim.korap.TestSimple.getJsonString)2 SpanClassQuery (de.ids_mannheim.korap.query.SpanClassQuery)2 SpanDistanceQuery (de.ids_mannheim.korap.query.SpanDistanceQuery)2 SpanWithinQuery (de.ids_mannheim.korap.query.SpanWithinQuery)2 SpanAlterQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper)2 SpanRegexQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper)2 SpanSegmentQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper)2 Term (org.apache.lucene.index.Term)2 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)2