use of de.ids_mannheim.korap.query.wrap.SpanQueryWrapper in project Krill by KorAP.
the class KrillQuery method _termFromJson.
// Deserialize koral:term
// TODO: Not optimal as it does not respect non-term
private SpanQueryWrapper _termFromJson(JsonNode json, boolean isSpan, RelationDirection direction) throws QueryException {
if (!json.has("@type")) {
throw new QueryException(701, "JSON-LD group has no @type attribute");
}
;
String termType = json.get("@type").asText();
Boolean isTerm = termType.equals("koral:term") ? true : false;
Boolean isCaseInsensitive = false;
if (!json.has("key") || json.get("key").asText().length() < 1) {
// why must it have an attr?
if (!json.has("attr")) {
// return new SpanRepetitionQueryWrapper();
throw new QueryException(740, "Key definition is missing in term or span");
}
}
;
// Empty koral:span hack
if (isSpan) {
isTerm = false;
}
;
// <legacy>
if (json.has("caseInsensitive") && json.get("caseInsensitive").asBoolean()) {
isCaseInsensitive = true;
} else // Flags
if (json.has("flags") && json.get("flags").isArray()) {
Iterator<JsonNode> flags = json.get("flags").elements();
while (flags.hasNext()) {
String flag = flags.next().asText();
if (flag.equals("flags:caseInsensitive")) {
isCaseInsensitive = true;
} else {
this.addWarning(748, "Flag is unknown", flag);
}
;
}
;
}
;
StringBuilder value = new StringBuilder();
if (direction != null)
value.append(direction.value());
if (json.has("foundry") && json.get("foundry").asText().length() > 0) {
value.append(json.get("foundry").asText()).append('/');
}
;
// No default foundry defined
if (json.has("layer") && json.get("layer").asText().length() > 0) {
String layer = json.get("layer").asText();
switch(layer) {
case "lemma":
layer = "l";
break;
case "pos":
layer = "p";
break;
case "orth":
// TODO: THIS IS AN UGLY HACK! AND SHOULD BE NAMED "SURFACE" or . OR *
layer = ".";
break;
case "struct":
layer = "s";
break;
case "const":
layer = "c";
break;
}
;
if (isCaseInsensitive && isTerm) {
if (layer.equals("."))
layer = "i";
else {
this.addWarning(767, "Case insensitivity is currently not supported for this layer");
}
;
}
;
// Ignore foundry for orth layer
if (layer.equals(".")) {
layer = "s";
value.setLength(0);
} else if (layer.equals("i")) {
value.setLength(0);
}
;
value.append(layer).append(':');
}
;
if (json.has("key") && json.get("key").asText().length() > 0) {
String key = json.get("key").asText();
value.append(isCaseInsensitive ? key.toLowerCase() : key);
}
;
if (json.has("value") && json.get("value").asText().length() > 0)
value.append(':').append(json.get("value").asText());
// Regular expression or wildcard
if (isTerm) {
String match = "match:eq";
if (json.has("match")) {
match = json.get("match").asText();
}
;
if (json.has("type")) {
QueryBuilder qb = this.builder();
// Branch on type
switch(json.get("type").asText()) {
case "type:regex":
{
// The regex can be rewritten to an any token
if (value.toString().matches("^[si]:\\.[\\+\\*]\\??$")) {
return new SpanRepetitionQueryWrapper();
}
;
SpanRegexQueryWrapper srqw = qb.re(value.toString(), isCaseInsensitive);
if (match.equals("match:ne")) {
if (DEBUG)
log.trace("Term is negated");
// ssqw.makeNegative();
return this.builder().seg().without(srqw);
} else if (match.equals("match:eq")) {
return srqw;
}
throw new QueryException(741, "Match relation unknown");
}
case "type:wildcard":
{
SpanWildcardQueryWrapper swcqw = qb.wc(value.toString(), isCaseInsensitive);
if (match.equals("match:ne")) {
if (DEBUG)
log.trace("Term is negated");
// ssqw.makeNegative();
return this.builder().seg().without(swcqw);
} else if (match.equals("match:eq")) {
return swcqw;
}
;
throw new QueryException(741, "Match relation unknown");
}
case "type:string":
break;
default:
this.addWarning(746, "Term type is not supported - treated as a string");
}
;
}
;
SpanSegmentQueryWrapper ssqw = this.builder().seg(value.toString());
if (match.equals("match:ne")) {
if (DEBUG)
log.trace("Term is negated");
ssqw.makeNegative();
return this.builder().seg().without(ssqw);
} else if (match.equals("match:eq")) {
return ssqw;
} else {
throw new QueryException(741, "Match relation unknown");
}
}
;
if (json.has("attr")) {
JsonNode attrNode = json.get("attr");
if (!attrNode.has("@type")) {
throw new QueryException(701, "JSON-LD group has no @type attribute");
}
if (value.toString().isEmpty()) {
return _createElementAttrFromJson(null, json, attrNode);
// this.addWarning(771,
// "Arbitraty elements with attributes are currently not supported.");
} else {
SpanQueryWrapper elementWithIdWrapper = this.builder().tag(value.toString());
if (elementWithIdWrapper == null) {
return null;
}
return _createElementAttrFromJson(elementWithIdWrapper, json, attrNode);
}
}
;
return this.builder().tag(value.toString());
}
use of de.ids_mannheim.korap.query.wrap.SpanQueryWrapper in project Krill by KorAP.
the class KrillQuery method _operationRepetitionFromJson.
// Deserialize operation:repetition
private SpanQueryWrapper _operationRepetitionFromJson(JsonNode json, JsonNode operands) throws QueryException {
if (operands.size() != 1)
throw new QueryException(705, "Number of operands is not acceptable");
int min = 0, max = 100;
if (json.has("boundary")) {
Boundary b = new Boundary(json.get("boundary"), 0, 100);
min = b.min;
max = b.max;
} else // <legacyCode>
{
this.addMessage(0, "Setting boundary by min and max is deprecated");
// Set minimum value
if (json.has("min"))
min = json.get("min").asInt(0);
// Set maximum value
if (json.has("max"))
max = json.get("max").asInt(100);
}
;
// Sanitize max
if (max < 0)
max = 100;
else if (max > 100)
max = 100;
// Sanitize min
if (min < 0)
min = 0;
else if (min > 100)
min = 100;
// Check relation between min and max
if (min > max)
max = max;
SpanQueryWrapper sqw = this._fromKoral(operands.get(0));
if (sqw.maybeExtension())
return sqw.setMin(min).setMax(max);
return new SpanRepetitionQueryWrapper(sqw, min, max);
}
use of de.ids_mannheim.korap.query.wrap.SpanQueryWrapper in project Krill by KorAP.
the class TestReferenceIndex method testCase3.
// multiple references
@Test
public void testCase3() throws IOException, QueryException {
ki = new KrillIndex();
ki.addDoc(createFieldDoc0());
ki.commit();
String filepath = getClass().getResource("/queries/reference/distance-multiple-references.jsonld").getFile();
SpanQueryWrapper sqwi = getJSONQuery(filepath);
SpanQuery sq = sqwi.toQuery();
// 'cat="vb" & cat="prp" & cat="nn" & #1 .{0,1} #2 & #1 .{0,2}
// #3 & #3 ->stanford/d #2 & #1 ->stanford #3' annis
// without layer=c and + relation key
assertEquals("spanReference(focus(#[1,2]spanSegment({3: <tokens:nn />}, " + "focus(#2: spanSegment(spanRelation(tokens:>:stanford/d:tag), " + "focus(1: spanReference(focus(#[1,2]spanSegment({2: <tokens:prp />}, " + "focus(#2: spanSegment(spanRelation(tokens:>:stanford/d:tag), " + "focus(3: spanDistance(focus(1: spanDistance({1: <tokens:vb />}, " + "{2: <tokens:prp />}, [(w[1:2], notOrdered, notExcluded)])), " + "{3: <tokens:nn />}, [(w[1:3], notOrdered, notExcluded)])))))), 2)))))), 3)", sq.toString());
kr = ki.search(sq, (short) 10);
// for (Match km : kr.getMatches()) {
// System.out.println(km.getStartPos() + "," + km.getEndPos()
// + " "
// + km.getSnippetBrackets());
// }
assertEquals(1, kr.getMatch(0).getStartPos());
assertEquals(4, kr.getMatch(0).getEndPos());
assertEquals(10, kr.getMatch(1).getStartPos());
assertEquals(13, kr.getMatch(1).getEndPos());
}
use of de.ids_mannheim.korap.query.wrap.SpanQueryWrapper in project Krill by KorAP.
the class TestRegexWildcardIndex method indexWildcard.
@Test
public void indexWildcard() throws Exception {
KrillIndex ki = new KrillIndex();
// abcabcabac
FieldDocument fd = new FieldDocument();
fd.addTV("base", "affe afffe baum baumgarten steingarten franz hans haus efeu effe", "[(0-4)s:affe|_0$<i>0<i>4|-:t$<i>10]" + "[(5-10)s:afffe|_1$<i>5<i>10]" + "[(11-15)s:baum|_2$<i>11<i>15]" + "[(16-26)s:baumgarten|_3$<i>16<i>26]" + "[(27-38)s:steingarten|_4$<i>27<i>38]" + "[(39-44)s:franz|_5$<i>39<i>44]" + "[(45-49)s:hans|_6$<i>45<i>49]" + "[(50-54)s:haus|_7$<i>50<i>54]" + "[(55-59)s:efeu|_8$<i>55<i>59]" + "[(60-64)s:effe|_9$<i>60<i>64]");
ki.addDoc(fd);
ki.commit();
QueryBuilder kq = new QueryBuilder("base");
SpanQueryWrapper sq = kq.wc("s:af*e");
assertEquals("SpanMultiTermQueryWrapper(base:s:af*e)", sq.toQuery().toString());
Krill ks = _newKrill(sq);
Result kr = ki.search(ks);
assertEquals((long) 2, kr.getTotalResults());
assertEquals("[[affe]] afffe ...", kr.getMatch(0).getSnippetBrackets());
assertEquals("affe [[afffe]] baum ...", kr.getMatch(1).getSnippetBrackets());
ks = _newKrill(new QueryBuilder("base").wc("s:baum.*"));
kr = ki.search(ks);
assertEquals((long) 0, kr.getTotalResults());
ks = _newKrill(new QueryBuilder("base").wc("s:baum*"));
kr = ki.search(ks);
assertEquals((long) 2, kr.getTotalResults());
assertEquals("... afffe [[baum]] baumgarten ...", kr.getMatch(0).getSnippetBrackets());
assertEquals("... baum [[baumgarten]] steingarten ...", kr.getMatch(1).getSnippetBrackets());
ks = _newKrill(new QueryBuilder("base").wc("s:*garten"));
kr = ki.search(ks);
assertEquals((long) 2, kr.getTotalResults());
assertEquals("... baum [[baumgarten]] steingarten ...", kr.getMatch(0).getSnippetBrackets());
assertEquals("... baumgarten [[steingarten]] franz ...", kr.getMatch(1).getSnippetBrackets());
ks = _newKrill(new QueryBuilder("base").wc("s:ha?s"));
kr = ki.search(ks);
assertEquals((long) 2, kr.getTotalResults());
assertEquals("... franz [[hans]] haus ...", kr.getMatch(0).getSnippetBrackets());
assertEquals("... hans [[haus]] efeu ...", kr.getMatch(1).getSnippetBrackets());
ks = _newKrill(new QueryBuilder("base").wc("s:?ff?"));
kr = ki.search(ks);
assertEquals((long) 2, kr.getTotalResults());
assertEquals("[[affe]] afffe ...", kr.getMatch(0).getSnippetBrackets());
assertEquals("... efeu [[effe]]", kr.getMatch(1).getSnippetBrackets());
}
use of de.ids_mannheim.korap.query.wrap.SpanQueryWrapper in project Krill by KorAP.
the class TestWithinIndex method queryJSONpoly2.
// contains(<s>, (es wird | wird es))
@Test
public void queryJSONpoly2() throws QueryException, IOException {
String jsonPath = URLDecoder.decode(getClass().getResource("/queries/poly2.json").getFile(), "UTF-8");
String jsonPQuery = readFile(jsonPath);
SpanQueryWrapper sqwi = new KrillQuery("tokens").fromKoral(jsonPQuery);
SpanWithinQuery sq = (SpanWithinQuery) sqwi.toQuery();
KrillIndex ki = new KrillIndex();
ki.addDoc(getClass().getResourceAsStream("/wiki/DDD-08370.json.gz"), true);
ki.addDoc(getClass().getResourceAsStream("/wiki/PPP-02924.json.gz"), true);
ki.commit();
Result kr = ki.search(sq, (short) 10);
assertEquals(2, kr.getTotalResults());
assertEquals(0, kr.getMatch(0).getLocalDocID());
assertEquals(76, kr.getMatch(0).getStartPos());
assertEquals(93, kr.getMatch(0).getEndPos());
assertEquals(1, kr.getMatch(1).getLocalDocID());
assertEquals(237, kr.getMatch(1).getStartPos());
assertEquals(252, kr.getMatch(1).getEndPos());
}
Aggregations