Search in sources :

Example 1 with SpanWildcardQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper in project Krill by KorAP.

the class KrillQuery method _termFromJson.

// Deserialize koral:term
// TODO: Not optimal as it does not respect non-term
private SpanQueryWrapper _termFromJson(JsonNode json, boolean isSpan, RelationDirection direction) throws QueryException {
    if (!json.has("@type")) {
        throw new QueryException(701, "JSON-LD group has no @type attribute");
    }
    ;
    String termType = json.get("@type").asText();
    Boolean isTerm = termType.equals("koral:term") ? true : false;
    Boolean isCaseInsensitive = false;
    if (!json.has("key") || json.get("key").asText().length() < 1) {
        // why must it have an attr?
        if (!json.has("attr")) {
            // return new SpanRepetitionQueryWrapper();
            throw new QueryException(740, "Key definition is missing in term or span");
        }
    }
    ;
    // Empty koral:span hack
    if (isSpan) {
        isTerm = false;
    }
    ;
    // <legacy>
    if (json.has("caseInsensitive") && json.get("caseInsensitive").asBoolean()) {
        isCaseInsensitive = true;
    } else // Flags
    if (json.has("flags") && json.get("flags").isArray()) {
        Iterator<JsonNode> flags = json.get("flags").elements();
        while (flags.hasNext()) {
            String flag = flags.next().asText();
            if (flag.equals("flags:caseInsensitive")) {
                isCaseInsensitive = true;
            } else {
                this.addWarning(748, "Flag is unknown", flag);
            }
            ;
        }
        ;
    }
    ;
    StringBuilder value = new StringBuilder();
    if (direction != null)
        value.append(direction.value());
    if (json.has("foundry") && json.get("foundry").asText().length() > 0) {
        value.append(json.get("foundry").asText()).append('/');
    }
    ;
    // No default foundry defined
    if (json.has("layer") && json.get("layer").asText().length() > 0) {
        String layer = json.get("layer").asText();
        switch(layer) {
            case "lemma":
                layer = "l";
                break;
            case "pos":
                layer = "p";
                break;
            case "orth":
                // TODO: THIS IS AN UGLY HACK! AND SHOULD BE NAMED "SURFACE" or . OR *
                layer = ".";
                break;
            case "struct":
                layer = "s";
                break;
            case "const":
                layer = "c";
                break;
        }
        ;
        if (isCaseInsensitive && isTerm) {
            if (layer.equals("."))
                layer = "i";
            else {
                this.addWarning(767, "Case insensitivity is currently not supported for this layer");
            }
            ;
        }
        ;
        // Ignore foundry for orth layer
        if (layer.equals(".")) {
            layer = "s";
            value.setLength(0);
        } else if (layer.equals("i")) {
            value.setLength(0);
        }
        ;
        value.append(layer).append(':');
    }
    ;
    if (json.has("key") && json.get("key").asText().length() > 0) {
        String key = json.get("key").asText();
        value.append(isCaseInsensitive ? key.toLowerCase() : key);
    }
    ;
    if (json.has("value") && json.get("value").asText().length() > 0)
        value.append(':').append(json.get("value").asText());
    // Regular expression or wildcard
    if (isTerm) {
        String match = "match:eq";
        if (json.has("match")) {
            match = json.get("match").asText();
        }
        ;
        if (json.has("type")) {
            QueryBuilder qb = this.builder();
            // Branch on type
            switch(json.get("type").asText()) {
                case "type:regex":
                    {
                        // The regex can be rewritten to an any token
                        if (value.toString().matches("^[si]:\\.[\\+\\*]\\??$")) {
                            return new SpanRepetitionQueryWrapper();
                        }
                        ;
                        SpanRegexQueryWrapper srqw = qb.re(value.toString(), isCaseInsensitive);
                        if (match.equals("match:ne")) {
                            if (DEBUG)
                                log.trace("Term is negated");
                            // ssqw.makeNegative();
                            return this.builder().seg().without(srqw);
                        } else if (match.equals("match:eq")) {
                            return srqw;
                        }
                        throw new QueryException(741, "Match relation unknown");
                    }
                case "type:wildcard":
                    {
                        SpanWildcardQueryWrapper swcqw = qb.wc(value.toString(), isCaseInsensitive);
                        if (match.equals("match:ne")) {
                            if (DEBUG)
                                log.trace("Term is negated");
                            // ssqw.makeNegative();
                            return this.builder().seg().without(swcqw);
                        } else if (match.equals("match:eq")) {
                            return swcqw;
                        }
                        ;
                        throw new QueryException(741, "Match relation unknown");
                    }
                case "type:string":
                    break;
                default:
                    this.addWarning(746, "Term type is not supported - treated as a string");
            }
            ;
        }
        ;
        SpanSegmentQueryWrapper ssqw = this.builder().seg(value.toString());
        if (match.equals("match:ne")) {
            if (DEBUG)
                log.trace("Term is negated");
            ssqw.makeNegative();
            return this.builder().seg().without(ssqw);
        } else if (match.equals("match:eq")) {
            return ssqw;
        } else {
            throw new QueryException(741, "Match relation unknown");
        }
    }
    ;
    if (json.has("attr")) {
        JsonNode attrNode = json.get("attr");
        if (!attrNode.has("@type")) {
            throw new QueryException(701, "JSON-LD group has no @type attribute");
        }
        if (value.toString().isEmpty()) {
            return _createElementAttrFromJson(null, json, attrNode);
        // this.addWarning(771,
        // "Arbitraty elements with attributes are currently not supported.");
        } else {
            SpanQueryWrapper elementWithIdWrapper = this.builder().tag(value.toString());
            if (elementWithIdWrapper == null) {
                return null;
            }
            return _createElementAttrFromJson(elementWithIdWrapper, json, attrNode);
        }
    }
    ;
    return this.builder().tag(value.toString());
}
Also used : SpanRepetitionQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper) SpanWildcardQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper) QueryException(de.ids_mannheim.korap.util.QueryException) SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) Iterator(java.util.Iterator) JsonNode(com.fasterxml.jackson.databind.JsonNode) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)

Aggregations

JsonNode (com.fasterxml.jackson.databind.JsonNode)1 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)1 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)1 SpanRegexQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper)1 SpanRepetitionQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper)1 SpanSegmentQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper)1 SpanWildcardQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper)1 QueryException (de.ids_mannheim.korap.util.QueryException)1 Iterator (java.util.Iterator)1