use of org.exist.xquery.modules.ngram.query.Wildcard in project exist by eXist-db.
the class NGramSearch method parseQuery.
private EvaluatableExpression parseQuery(final String query) throws XPathException {
List<String> queryTokens = tokenizeQuery(query);
LOG.trace("Tokenized query: {}", queryTokens);
if (queryTokens.isEmpty())
return new EmptyExpression();
List<WildcardedExpression> expressions = new ArrayList<>();
if (queryTokens.get(0).equals("^")) {
expressions.add(new StartAnchor());
queryTokens.remove(0);
}
if (queryTokens.isEmpty())
return new EmptyExpression();
boolean endAnchorPresent = false;
if (queryTokens.get(queryTokens.size() - 1).equals("$")) {
endAnchorPresent = true;
queryTokens.remove(queryTokens.size() - 1);
}
if (queryTokens.isEmpty())
return new EmptyExpression();
for (String token : queryTokens) {
if (token.startsWith(".")) {
Wildcard wildcard = null;
if (token.length() == 1) {
wildcard = new Wildcard(1, 1);
} else {
String qualifier = token.substring(1);
switch(qualifier) {
case "?":
wildcard = new Wildcard(0, 1);
break;
case "*":
wildcard = new Wildcard(0, Integer.MAX_VALUE);
break;
case "+":
wildcard = new Wildcard(1, Integer.MAX_VALUE);
break;
default:
Pattern p = Pattern.compile(INTERVAL_QUALIFIER_PATTERN);
Matcher m = p.matcher(qualifier);
if (// Should not happen
!m.matches())
throw new XPathException(this, ErrorCodes.FTDY0020, "query string violates wildcard qualifier syntax");
try {
wildcard = new Wildcard(Integer.parseInt(m.group(1)), Integer.parseInt(m.group(2)));
} catch (NumberFormatException nfe) {
throw new XPathException(this, ErrorCodes.FTDY0020, "query string violates wildcard qualifier syntax", new StringValue(query), nfe);
}
break;
}
}
expressions.add(wildcard);
} else {
if (token.startsWith("[")) {
Set<String> strings = new HashSet<>(token.length() - 2);
for (int i = 1; i < token.length() - 1; i++) strings.add(Character.toString(token.charAt(i)));
expressions.add(new AlternativeStrings(this, strings));
} else {
expressions.add(new FixedString(this, unescape(token)));
}
}
}
if (endAnchorPresent)
expressions.add(new EndAnchor());
return new WildcardedExpressionSequence(expressions);
}
use of org.exist.xquery.modules.ngram.query.Wildcard in project exist by eXist-db.
the class NGramSearch method processMatches.
private NodeSet processMatches(NGramIndexWorker index, DocumentSet docs, List<QName> qnames, String query, NodeSet nodeSet, int axis) throws XPathException {
EvaluatableExpression parsedQuery = null;
if (getLocalName().equals("wildcard-contains"))
parsedQuery = parseQuery(query);
else
parsedQuery = new FixedString(this, query);
LOG.debug("Parsed Query: {}", parsedQuery);
NodeSet result = parsedQuery.eval(index, docs, qnames, nodeSet, axis, this.getExpressionId());
if (getLocalName().startsWith("starts-with"))
result = NodeSets.getNodesMatchingAtStart(result, getExpressionId());
else if (getLocalName().startsWith("ends-with"))
result = NodeSets.getNodesMatchingAtEnd(result, getExpressionId());
result = NodeSets.transformNodes(result, proxy -> NodeProxies.transformOwnMatches(proxy, Match::filterOutOverlappingOffsets, getExpressionId()));
return result;
}
Aggregations