use of org.exist.dom.persistent.NodeSet in project exist by eXist-db.
the class NGramIndexWorker method search.
public NodeSet search(final int contextId, final DocumentSet docs, final List<QName> qnames, final String query, final String ngram, final XQueryContext context, final NodeSet contextSet, final int axis) throws XPathException {
final List<QName> searchQnames;
if (qnames == null || qnames.isEmpty()) {
searchQnames = getDefinedIndexes(context.getBroker(), docs);
} else {
searchQnames = qnames;
}
final NodeSet result = new ExtArrayNodeSet(docs.getDocumentCount(), 250);
for (final Iterator<Collection> iter = docs.getCollectionIterator(); iter.hasNext(); ) {
final int collectionId = iter.next().getId();
for (final QName qname : searchQnames) {
final NGramQNameKey key = new NGramQNameKey(collectionId, qname, index.getBrokerPool().getSymbols(), query);
try (final ManagedLock<ReentrantLock> dbLock = lockManager.acquireBtreeReadLock(index.db.getLockName())) {
final SearchCallback cb = new SearchCallback(contextId, query, ngram, docs, contextSet, context, result, axis == NodeSet.ANCESTOR);
final int op = query.codePointCount(0, query.length()) < getN() ? IndexQuery.TRUNC_RIGHT : IndexQuery.EQ;
index.db.query(new IndexQuery(op, key), cb);
} catch (final LockException e) {
LOG.warn("Failed to acquire lock for '{}'", FileUtils.fileName(index.db.getFile()), e);
} catch (final IOException | BTreeException e) {
LOG.error("{} in '{}'", e.getMessage(), FileUtils.fileName(index.db.getFile()), e);
}
}
}
// ensure result is ready to use
result.iterate();
return result;
}
use of org.exist.dom.persistent.NodeSet in project exist by eXist-db.
the class NGramSearch method eval.
@Override
public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
if (contextItem != null)
contextSequence = contextItem.toSequence();
NodeSet result;
if (preselectResult == null) {
Sequence input = getArgument(0).eval(contextSequence, contextItem);
if (input.isEmpty())
result = NodeSet.EMPTY_SET;
else {
long start = System.currentTimeMillis();
NodeSet inNodes = input.toNodeSet();
DocumentSet docs = inNodes.getDocumentSet();
NGramIndexWorker index = (NGramIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(NGramIndex.ID);
// Alternate design
// NGramIndexWorker index =
// (NGramIndexWorker)context.getBroker().getBrokerPool().getIndexManager().getIndexById(NGramIndex.ID).getWorker();
String key = getArgument(1).eval(contextSequence, contextItem).getStringValue();
List<QName> qnames = null;
if (contextQName != null) {
qnames = new ArrayList<>(1);
qnames.add(contextQName);
}
result = processMatches(index, docs, qnames, key, inNodes, NodeSet.ANCESTOR);
if (context.getProfiler().traceFunctions()) {
// report index use
context.getProfiler().traceIndexUsage(context, "ngram", this, PerformanceStats.BASIC_INDEX, System.currentTimeMillis() - start);
}
}
} else {
contextStep.setPreloadedData(contextSequence.getDocumentSet(), preselectResult);
result = getArgument(0).eval(contextSequence).toNodeSet();
}
return result;
}
use of org.exist.dom.persistent.NodeSet in project exist by eXist-db.
the class NGramSearch method fixedStringSearch.
public NodeSet fixedStringSearch(final NGramIndexWorker index, final DocumentSet docs, final List<QName> qnames, final String query, final NodeSet nodeSet, final int axis) throws XPathException {
String[] ngrams = NGramSearch.getDistinctNGrams(query, index.getN());
// Nothing to search for? The find nothing.
if (ngrams.length == 0)
return new EmptyNodeSet();
String firstNgramm = ngrams[0];
LOG.trace("First NGRAM: {}", firstNgramm);
NodeSet result = index.search(getExpressionId(), docs, qnames, firstNgramm, firstNgramm, context, nodeSet, axis);
for (int i = 1; i < ngrams.length; i++) {
String ngram = ngrams[i];
int len = ngram.codePointCount(0, ngram.length());
int fillSize = index.getN() - len;
String filledNgram = ngram;
// ngrams lead to a considerable performance loss.
if (fillSize > 0) {
String filler = ngrams[i - 1];
StringBuilder buf = new StringBuilder();
int pos = filler.offsetByCodePoints(0, len);
for (int j = 0; j < fillSize; j++) {
int codepoint = filler.codePointAt(pos);
pos += Character.charCount(codepoint);
buf.appendCodePoint(codepoint);
}
buf.append(ngram);
filledNgram = buf.toString();
LOG.debug("Filled: {}", filledNgram);
}
NodeSet nodes = index.search(getExpressionId(), docs, qnames, filledNgram, ngram, context, nodeSet, axis);
final NodeSet nodesContainingFirstINgrams = result;
result = NodeSets.transformNodes(nodes, proxy -> Optional.ofNullable(nodesContainingFirstINgrams.get(proxy)).map(before -> getContinuousMatches(before, proxy)).orElse(null));
}
return result;
}
use of org.exist.dom.persistent.NodeSet in project exist by eXist-db.
the class NGramSearch method processMatches.
private NodeSet processMatches(NGramIndexWorker index, DocumentSet docs, List<QName> qnames, String query, NodeSet nodeSet, int axis) throws XPathException {
EvaluatableExpression parsedQuery = null;
if (getLocalName().equals("wildcard-contains"))
parsedQuery = parseQuery(query);
else
parsedQuery = new FixedString(this, query);
LOG.debug("Parsed Query: {}", parsedQuery);
NodeSet result = parsedQuery.eval(index, docs, qnames, nodeSet, axis, this.getExpressionId());
if (getLocalName().startsWith("starts-with"))
result = NodeSets.getNodesMatchingAtStart(result, getExpressionId());
else if (getLocalName().startsWith("ends-with"))
result = NodeSets.getNodesMatchingAtEnd(result, getExpressionId());
result = NodeSets.transformNodes(result, proxy -> NodeProxies.transformOwnMatches(proxy, Match::filterOutOverlappingOffsets, getExpressionId()));
return result;
}
use of org.exist.dom.persistent.NodeSet in project exist by eXist-db.
the class Query method eval.
public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
if (contextItem != null)
contextSequence = contextItem.toSequence();
if (contextSequence != null && !contextSequence.isPersistentSet())
// in-memory docs won't have an index
return Sequence.EMPTY_SEQUENCE;
NodeSet result;
if (preselectResult == null) {
long start = System.currentTimeMillis();
Sequence input = getArgument(0).eval(contextSequence);
if (!(input instanceof VirtualNodeSet) && input.isEmpty())
result = NodeSet.EMPTY_SET;
else {
NodeSet inNodes = input.toNodeSet();
DocumentSet docs = inNodes.getDocumentSet();
LuceneIndexWorker index = (LuceneIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(LuceneIndex.ID);
Item key = getKey(contextSequence, contextItem);
List<QName> qnames = null;
if (contextQName != null) {
qnames = new ArrayList<>(1);
qnames.add(contextQName);
}
QueryOptions options = parseOptions(this, contextSequence, contextItem, 3);
try {
if (key != null && Type.subTypeOf(key.getType(), Type.ELEMENT)) {
final Element queryXML = (Element) ((NodeValue) key).getNode();
result = index.query(getExpressionId(), docs, inNodes, qnames, queryXML, NodeSet.ANCESTOR, options);
} else {
final String query = key == null ? null : key.getStringValue();
result = index.query(getExpressionId(), docs, inNodes, qnames, query, NodeSet.ANCESTOR, options);
}
} catch (IOException | org.apache.lucene.queryparser.classic.ParseException e) {
throw new XPathException(this, e.getMessage());
}
}
if (context.getProfiler().traceFunctions()) {
context.getProfiler().traceIndexUsage(context, "lucene", this, PerformanceStats.BASIC_INDEX, System.currentTimeMillis() - start);
}
} else {
// DW: contextSequence can be null
contextStep.setPreloadedData(contextSequence.getDocumentSet(), preselectResult);
result = getArgument(0).eval(contextSequence).toNodeSet();
}
return result;
}
Aggregations