Search in sources :

Example 1 with SpanSegmentQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper in project Krill by KorAP.

the class KrillQuery method _termFromJson.

// Deserialize koral:term
// TODO: Not optimal as it does not respect non-term
private SpanQueryWrapper _termFromJson(JsonNode json, boolean isSpan, RelationDirection direction) throws QueryException {
    if (!json.has("@type")) {
        throw new QueryException(701, "JSON-LD group has no @type attribute");
    }
    ;
    String termType = json.get("@type").asText();
    Boolean isTerm = termType.equals("koral:term") ? true : false;
    Boolean isCaseInsensitive = false;
    if (!json.has("key") || json.get("key").asText().length() < 1) {
        // why must it have an attr?
        if (!json.has("attr")) {
            // return new SpanRepetitionQueryWrapper();
            throw new QueryException(740, "Key definition is missing in term or span");
        }
    }
    ;
    // Empty koral:span hack
    if (isSpan) {
        isTerm = false;
    }
    ;
    // <legacy>
    if (json.has("caseInsensitive") && json.get("caseInsensitive").asBoolean()) {
        isCaseInsensitive = true;
    } else // Flags
    if (json.has("flags") && json.get("flags").isArray()) {
        Iterator<JsonNode> flags = json.get("flags").elements();
        while (flags.hasNext()) {
            String flag = flags.next().asText();
            if (flag.equals("flags:caseInsensitive")) {
                isCaseInsensitive = true;
            } else {
                this.addWarning(748, "Flag is unknown", flag);
            }
            ;
        }
        ;
    }
    ;
    StringBuilder value = new StringBuilder();
    if (direction != null)
        value.append(direction.value());
    if (json.has("foundry") && json.get("foundry").asText().length() > 0) {
        value.append(json.get("foundry").asText()).append('/');
    }
    ;
    // No default foundry defined
    if (json.has("layer") && json.get("layer").asText().length() > 0) {
        String layer = json.get("layer").asText();
        switch(layer) {
            case "lemma":
                layer = "l";
                break;
            case "pos":
                layer = "p";
                break;
            case "orth":
                // TODO: THIS IS AN UGLY HACK! AND SHOULD BE NAMED "SURFACE" or . OR *
                layer = ".";
                break;
            case "struct":
                layer = "s";
                break;
            case "const":
                layer = "c";
                break;
        }
        ;
        if (isCaseInsensitive && isTerm) {
            if (layer.equals("."))
                layer = "i";
            else {
                this.addWarning(767, "Case insensitivity is currently not supported for this layer");
            }
            ;
        }
        ;
        // Ignore foundry for orth layer
        if (layer.equals(".")) {
            layer = "s";
            value.setLength(0);
        } else if (layer.equals("i")) {
            value.setLength(0);
        }
        ;
        value.append(layer).append(':');
    }
    ;
    if (json.has("key") && json.get("key").asText().length() > 0) {
        String key = json.get("key").asText();
        value.append(isCaseInsensitive ? key.toLowerCase() : key);
    }
    ;
    if (json.has("value") && json.get("value").asText().length() > 0)
        value.append(':').append(json.get("value").asText());
    // Regular expression or wildcard
    if (isTerm) {
        String match = "match:eq";
        if (json.has("match")) {
            match = json.get("match").asText();
        }
        ;
        if (json.has("type")) {
            QueryBuilder qb = this.builder();
            // Branch on type
            switch(json.get("type").asText()) {
                case "type:regex":
                    {
                        // The regex can be rewritten to an any token
                        if (value.toString().matches("^[si]:\\.[\\+\\*]\\??$")) {
                            return new SpanRepetitionQueryWrapper();
                        }
                        ;
                        SpanRegexQueryWrapper srqw = qb.re(value.toString(), isCaseInsensitive);
                        if (match.equals("match:ne")) {
                            if (DEBUG)
                                log.trace("Term is negated");
                            // ssqw.makeNegative();
                            return this.builder().seg().without(srqw);
                        } else if (match.equals("match:eq")) {
                            return srqw;
                        }
                        throw new QueryException(741, "Match relation unknown");
                    }
                case "type:wildcard":
                    {
                        SpanWildcardQueryWrapper swcqw = qb.wc(value.toString(), isCaseInsensitive);
                        if (match.equals("match:ne")) {
                            if (DEBUG)
                                log.trace("Term is negated");
                            // ssqw.makeNegative();
                            return this.builder().seg().without(swcqw);
                        } else if (match.equals("match:eq")) {
                            return swcqw;
                        }
                        ;
                        throw new QueryException(741, "Match relation unknown");
                    }
                case "type:string":
                    break;
                default:
                    this.addWarning(746, "Term type is not supported - treated as a string");
            }
            ;
        }
        ;
        SpanSegmentQueryWrapper ssqw = this.builder().seg(value.toString());
        if (match.equals("match:ne")) {
            if (DEBUG)
                log.trace("Term is negated");
            ssqw.makeNegative();
            return this.builder().seg().without(ssqw);
        } else if (match.equals("match:eq")) {
            return ssqw;
        } else {
            throw new QueryException(741, "Match relation unknown");
        }
    }
    ;
    if (json.has("attr")) {
        JsonNode attrNode = json.get("attr");
        if (!attrNode.has("@type")) {
            throw new QueryException(701, "JSON-LD group has no @type attribute");
        }
        if (value.toString().isEmpty()) {
            return _createElementAttrFromJson(null, json, attrNode);
        // this.addWarning(771,
        // "Arbitraty elements with attributes are currently not supported.");
        } else {
            SpanQueryWrapper elementWithIdWrapper = this.builder().tag(value.toString());
            if (elementWithIdWrapper == null) {
                return null;
            }
            return _createElementAttrFromJson(elementWithIdWrapper, json, attrNode);
        }
    }
    ;
    return this.builder().tag(value.toString());
}
Also used : SpanRepetitionQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper) SpanWildcardQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper) QueryException(de.ids_mannheim.korap.util.QueryException) SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) Iterator(java.util.Iterator) JsonNode(com.fasterxml.jackson.databind.JsonNode) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)

Example 2 with SpanSegmentQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper in project Krill by KorAP.

the class TestSpanSegmentQuery method spanSegmentAlterQuery.

@Test
public void spanSegmentAlterQuery() throws QueryException {
    SpanSegmentQueryWrapper ssquery = new SpanSegmentQueryWrapper("field");
    assertNull(ssquery.toQuery());
    ssquery.with("a");
    assertEquals("field:a", ssquery.toQuery().toString());
    ssquery.with(new SpanAlterQueryWrapper("field", "c", "d"));
    ssquery.with(new SpanRegexQueryWrapper("field", "a.*b"));
    assertEquals("spanSegment(spanSegment(field:a, spanOr([field:c, field:d])), SpanMultiTermQueryWrapper(field:/a.*b/))", ssquery.toQuery().toString());
}
Also used : SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) SpanAlterQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper) Test(org.junit.Test)

Example 3 with SpanSegmentQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper in project Krill by KorAP.

the class TestSegmentNegationIndex method testcaseNegation.

@Test
public void testcaseNegation() throws Exception {
    ki = new KrillIndex();
    ki.addDoc(createFieldDoc0());
    ki.addDoc(createFieldDoc1());
    ki.addDoc(createFieldDoc2());
    ki.addDoc(createFieldDoc3());
    ki.commit();
    SpanSegmentQueryWrapper ssqw = new SpanSegmentQueryWrapper("tokens", "s:b");
    ssqw.with("s:c");
    SpanSequenceQueryWrapper sqw = new SpanSequenceQueryWrapper("tokens", ssqw).append("s:d");
    kr = ki.search(sqw.toQuery(), (short) 10);
    assertEquals("totalResults", kr.getTotalResults(), 2);
    // Match #0
    assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
    assertEquals("StartPos (0)", 4, kr.getMatch(0).startPos);
    assertEquals("EndPos (0)", 6, kr.getMatch(0).endPos);
    // Match #1 in the other atomic index
    assertEquals("doc-number", 3, kr.getMatch(1).getLocalDocID());
    assertEquals("StartPos (0)", 0, kr.getMatch(1).startPos);
    assertEquals("EndPos (0)", 2, kr.getMatch(1).endPos);
    ssqw = new SpanSegmentQueryWrapper("tokens", "s:b");
    ssqw.without("s:c");
    sqw = new SpanSequenceQueryWrapper("tokens", ssqw).append("s:a");
    kr = ki.search(sqw.toQuery(), (short) 10);
    assertEquals("doc-number", 0, kr.getMatch(0).getLocalDocID());
    assertEquals("StartPos (0)", 2, kr.getMatch(0).startPos);
    assertEquals("EndPos (0)", 4, kr.getMatch(0).endPos);
    assertEquals("doc-number", 1, kr.getMatch(1).getLocalDocID());
    assertEquals("StartPos (1)", 1, kr.getMatch(1).startPos);
    assertEquals("EndPos (1)", 3, kr.getMatch(1).endPos);
    assertEquals("doc-number", 1, kr.getMatch(2).getLocalDocID());
    assertEquals("StartPos (2)", 2, kr.getMatch(2).startPos);
    assertEquals("EndPos (2)", 4, kr.getMatch(2).endPos);
    assertEquals("doc-number", 2, kr.getMatch(3).getLocalDocID());
    assertEquals("StartPos (3)", 1, kr.getMatch(3).startPos);
    assertEquals("EndPos (3)", 3, kr.getMatch(3).endPos);
}
Also used : SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) KrillIndex(de.ids_mannheim.korap.KrillIndex) SpanSequenceQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper) Test(org.junit.Test)

Example 4 with SpanSegmentQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper in project Krill by KorAP.

the class TestSpanSegmentAlterQuery method spanAlterQuery4.

@Test
public void spanAlterQuery4() throws QueryException {
    SpanSegmentQueryWrapper segquery = new SpanSegmentQueryWrapper("field", "a", "b", "c");
    SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
    ssaquery.or("d").or(segquery).or("e");
    assertEquals("spanOr([field:d, spanSegment(spanSegment(field:a, field:b), field:c), field:e])", ssaquery.toQuery().toString());
}
Also used : SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) SpanAlterQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper) Test(org.junit.Test)

Example 5 with SpanSegmentQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper in project Krill by KorAP.

the class TestSpanSegmentSequenceQuery method spanSegmentSequenceQuery2.

@Test
public void spanSegmentSequenceQuery2() throws QueryException {
    SpanSegmentQueryWrapper ssq = new SpanSegmentQueryWrapper("field", "-c", "-d", "-e");
    SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper("field", "a", "b");
    sssq.append(ssq);
    assertEquals("spanNext(spanNext(field:a, field:b), spanSegment(spanSegment(field:-c, field:-d), field:-e))", sssq.toQuery().toString());
}
Also used : SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) SpanSequenceQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper) Test(org.junit.Test)

Aggregations

SpanSegmentQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper)13 Test (org.junit.Test)11 SpanRegexQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper)5 SpanAlterQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper)4 SpanSequenceQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper)3 JsonNode (com.fasterxml.jackson.databind.JsonNode)2 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)2 QueryException (de.ids_mannheim.korap.util.QueryException)2 KrillIndex (de.ids_mannheim.korap.KrillIndex)1 Test (de.ids_mannheim.korap.Test)1 MultiTerm (de.ids_mannheim.korap.index.MultiTerm)1 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)1 SpanRepetitionQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper)1 SpanWildcardQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper)1 Iterator (java.util.Iterator)1 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1