use of org.exist.indexing.ngram.NGramIndexWorker in project exist by eXist-db.
the class NGramSearch method eval.
@Override
public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
if (contextItem != null)
contextSequence = contextItem.toSequence();
NodeSet result;
if (preselectResult == null) {
Sequence input = getArgument(0).eval(contextSequence, contextItem);
if (input.isEmpty())
result = NodeSet.EMPTY_SET;
else {
long start = System.currentTimeMillis();
NodeSet inNodes = input.toNodeSet();
DocumentSet docs = inNodes.getDocumentSet();
NGramIndexWorker index = (NGramIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(NGramIndex.ID);
// Alternate design
// NGramIndexWorker index =
// (NGramIndexWorker)context.getBroker().getBrokerPool().getIndexManager().getIndexById(NGramIndex.ID).getWorker();
String key = getArgument(1).eval(contextSequence, contextItem).getStringValue();
List<QName> qnames = null;
if (contextQName != null) {
qnames = new ArrayList<>(1);
qnames.add(contextQName);
}
result = processMatches(index, docs, qnames, key, inNodes, NodeSet.ANCESTOR);
if (context.getProfiler().traceFunctions()) {
// report index use
context.getProfiler().traceIndexUsage(context, "ngram", this, PerformanceStats.BASIC_INDEX, System.currentTimeMillis() - start);
}
}
} else {
contextStep.setPreloadedData(contextSequence.getDocumentSet(), preselectResult);
result = getArgument(0).eval(contextSequence).toNodeSet();
}
return result;
}
use of org.exist.indexing.ngram.NGramIndexWorker in project exist by eXist-db.
the class NGramSearch method fixedStringSearch.
public NodeSet fixedStringSearch(final NGramIndexWorker index, final DocumentSet docs, final List<QName> qnames, final String query, final NodeSet nodeSet, final int axis) throws XPathException {
String[] ngrams = NGramSearch.getDistinctNGrams(query, index.getN());
// Nothing to search for? The find nothing.
if (ngrams.length == 0)
return new EmptyNodeSet();
String firstNgramm = ngrams[0];
LOG.trace("First NGRAM: {}", firstNgramm);
NodeSet result = index.search(getExpressionId(), docs, qnames, firstNgramm, firstNgramm, context, nodeSet, axis);
for (int i = 1; i < ngrams.length; i++) {
String ngram = ngrams[i];
int len = ngram.codePointCount(0, ngram.length());
int fillSize = index.getN() - len;
String filledNgram = ngram;
// ngrams lead to a considerable performance loss.
if (fillSize > 0) {
String filler = ngrams[i - 1];
StringBuilder buf = new StringBuilder();
int pos = filler.offsetByCodePoints(0, len);
for (int j = 0; j < fillSize; j++) {
int codepoint = filler.codePointAt(pos);
pos += Character.charCount(codepoint);
buf.appendCodePoint(codepoint);
}
buf.append(ngram);
filledNgram = buf.toString();
LOG.debug("Filled: {}", filledNgram);
}
NodeSet nodes = index.search(getExpressionId(), docs, qnames, filledNgram, ngram, context, nodeSet, axis);
final NodeSet nodesContainingFirstINgrams = result;
result = NodeSets.transformNodes(nodes, proxy -> Optional.ofNullable(nodesContainingFirstINgrams.get(proxy)).map(before -> getContinuousMatches(before, proxy)).orElse(null));
}
return result;
}
use of org.exist.indexing.ngram.NGramIndexWorker in project exist by eXist-db.
the class NGramSearch method processMatches.
private NodeSet processMatches(NGramIndexWorker index, DocumentSet docs, List<QName> qnames, String query, NodeSet nodeSet, int axis) throws XPathException {
EvaluatableExpression parsedQuery = null;
if (getLocalName().equals("wildcard-contains"))
parsedQuery = parseQuery(query);
else
parsedQuery = new FixedString(this, query);
LOG.debug("Parsed Query: {}", parsedQuery);
NodeSet result = parsedQuery.eval(index, docs, qnames, nodeSet, axis, this.getExpressionId());
if (getLocalName().startsWith("starts-with"))
result = NodeSets.getNodesMatchingAtStart(result, getExpressionId());
else if (getLocalName().startsWith("ends-with"))
result = NodeSets.getNodesMatchingAtEnd(result, getExpressionId());
result = NodeSets.transformNodes(result, proxy -> NodeProxies.transformOwnMatches(proxy, Match::filterOutOverlappingOffsets, getExpressionId()));
return result;
}
use of org.exist.indexing.ngram.NGramIndexWorker in project exist by eXist-db.
the class HighlightMatches method eval.
public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
if (args[0].isEmpty())
return Sequence.EMPTY_SEQUENCE;
context.pushDocumentContext();
final Serializer serializer = context.getBroker().borrowSerializer();
try (FunctionReference func = (FunctionReference) args[1].itemAt(0)) {
MemTreeBuilder builder = context.getDocumentBuilder();
NGramIndexWorker index = (NGramIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(NGramIndex.ID);
DocumentBuilderReceiver docBuilder = new DocumentBuilderReceiver(builder);
MatchCallback matchCb = new MatchCallback(func, docBuilder);
ValueSequence result = new ValueSequence();
for (SequenceIterator i = args[0].iterate(); i.hasNext(); ) {
NodeValue v = (NodeValue) i.nextItem();
try {
int nodeNr = builder.getDocument().getLastNode();
if (v.getImplementationType() == NodeValue.IN_MEMORY_NODE) {
((NodeImpl) v).copyTo(context.getBroker(), docBuilder);
} else {
NodeProxy p = (NodeProxy) v;
MatchListener ml = index.getMatchListener(context.getBroker(), p, matchCb);
Receiver receiver;
if (ml == null)
receiver = docBuilder;
else {
ml.setNextInChain(docBuilder);
receiver = ml;
}
serializer.setReceiver(receiver);
serializer.toReceiver((NodeProxy) v, false);
}
result.add(builder.getDocument().getNode(++nodeNr));
} catch (SAXException e) {
LOG.warn(e.getMessage(), e);
throw new XPathException(this, e.getMessage());
}
}
return result;
} finally {
context.getBroker().returnSerializer(serializer);
context.popDocumentContext();
}
}
use of org.exist.indexing.ngram.NGramIndexWorker in project exist by eXist-db.
the class NGramSearch method preSelect.
@Override
public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XPathException {
// the expression can be called multiple times, so we need to clear the previous preselectResult
preselectResult = null;
long start = System.currentTimeMillis();
NGramIndexWorker index = (NGramIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(NGramIndex.ID);
DocumentSet docs = contextSequence.getDocumentSet();
String key = getArgument(1).eval(contextSequence).getStringValue();
List<QName> qnames = new ArrayList<>(1);
qnames.add(contextQName);
preselectResult = processMatches(index, docs, qnames, key, useContext ? contextSequence.toNodeSet() : null, NodeSet.DESCENDANT);
if (context.getProfiler().traceFunctions()) {
// report index use
context.getProfiler().traceIndexUsage(context, "ngram", this, PerformanceStats.OPTIMIZED_INDEX, System.currentTimeMillis() - start);
}
return preselectResult;
}
Aggregations