Search in sources :

Example 16 with NodeSet

use of org.exist.dom.persistent.NodeSet in project exist by eXist-db.

the class NGramIndexWorker method search.

public NodeSet search(final int contextId, final DocumentSet docs, final List<QName> qnames, final String query, final String ngram, final XQueryContext context, final NodeSet contextSet, final int axis) throws XPathException {
    final List<QName> searchQnames;
    if (qnames == null || qnames.isEmpty()) {
        searchQnames = getDefinedIndexes(context.getBroker(), docs);
    } else {
        searchQnames = qnames;
    }
    final NodeSet result = new ExtArrayNodeSet(docs.getDocumentCount(), 250);
    for (final Iterator<Collection> iter = docs.getCollectionIterator(); iter.hasNext(); ) {
        final int collectionId = iter.next().getId();
        for (final QName qname : searchQnames) {
            final NGramQNameKey key = new NGramQNameKey(collectionId, qname, index.getBrokerPool().getSymbols(), query);
            try (final ManagedLock<ReentrantLock> dbLock = lockManager.acquireBtreeReadLock(index.db.getLockName())) {
                final SearchCallback cb = new SearchCallback(contextId, query, ngram, docs, contextSet, context, result, axis == NodeSet.ANCESTOR);
                final int op = query.codePointCount(0, query.length()) < getN() ? IndexQuery.TRUNC_RIGHT : IndexQuery.EQ;
                index.db.query(new IndexQuery(op, key), cb);
            } catch (final LockException e) {
                LOG.warn("Failed to acquire lock for '{}'", FileUtils.fileName(index.db.getFile()), e);
            } catch (final IOException | BTreeException e) {
                LOG.error("{} in '{}'", e.getMessage(), FileUtils.fileName(index.db.getFile()), e);
            }
        }
    }
    // ensure result is ready to use
    result.iterate();
    return result;
}
Also used : ExtArrayNodeSet(org.exist.dom.persistent.ExtArrayNodeSet) NodeSet(org.exist.dom.persistent.NodeSet) ReentrantLock(java.util.concurrent.locks.ReentrantLock) IndexQuery(org.exist.storage.btree.IndexQuery) QName(org.exist.dom.QName) IOException(java.io.IOException) BTreeException(org.exist.storage.btree.BTreeException) ExtArrayNodeSet(org.exist.dom.persistent.ExtArrayNodeSet) Collection(org.exist.collections.Collection)

Example 17 with NodeSet

use of org.exist.dom.persistent.NodeSet in project exist by eXist-db.

the class NGramSearch method eval.

@Override
public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
    if (contextItem != null)
        contextSequence = contextItem.toSequence();
    NodeSet result;
    if (preselectResult == null) {
        Sequence input = getArgument(0).eval(contextSequence, contextItem);
        if (input.isEmpty())
            result = NodeSet.EMPTY_SET;
        else {
            long start = System.currentTimeMillis();
            NodeSet inNodes = input.toNodeSet();
            DocumentSet docs = inNodes.getDocumentSet();
            NGramIndexWorker index = (NGramIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(NGramIndex.ID);
            // Alternate design
            // NGramIndexWorker index =
            // (NGramIndexWorker)context.getBroker().getBrokerPool().getIndexManager().getIndexById(NGramIndex.ID).getWorker();
            String key = getArgument(1).eval(contextSequence, contextItem).getStringValue();
            List<QName> qnames = null;
            if (contextQName != null) {
                qnames = new ArrayList<>(1);
                qnames.add(contextQName);
            }
            result = processMatches(index, docs, qnames, key, inNodes, NodeSet.ANCESTOR);
            if (context.getProfiler().traceFunctions()) {
                // report index use
                context.getProfiler().traceIndexUsage(context, "ngram", this, PerformanceStats.BASIC_INDEX, System.currentTimeMillis() - start);
            }
        }
    } else {
        contextStep.setPreloadedData(contextSequence.getDocumentSet(), preselectResult);
        result = getArgument(0).eval(contextSequence).toNodeSet();
    }
    return result;
}
Also used : NodeSet(org.exist.dom.persistent.NodeSet) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) QName(org.exist.dom.QName) NGramIndexWorker(org.exist.indexing.ngram.NGramIndexWorker) WildcardedExpressionSequence(org.exist.xquery.modules.ngram.query.WildcardedExpressionSequence) DocumentSet(org.exist.dom.persistent.DocumentSet) FixedString(org.exist.xquery.modules.ngram.query.FixedString)

Example 18 with NodeSet

use of org.exist.dom.persistent.NodeSet in project exist by eXist-db.

the class NGramSearch method fixedStringSearch.

public NodeSet fixedStringSearch(final NGramIndexWorker index, final DocumentSet docs, final List<QName> qnames, final String query, final NodeSet nodeSet, final int axis) throws XPathException {
    String[] ngrams = NGramSearch.getDistinctNGrams(query, index.getN());
    // Nothing to search for? The find nothing.
    if (ngrams.length == 0)
        return new EmptyNodeSet();
    String firstNgramm = ngrams[0];
    LOG.trace("First NGRAM: {}", firstNgramm);
    NodeSet result = index.search(getExpressionId(), docs, qnames, firstNgramm, firstNgramm, context, nodeSet, axis);
    for (int i = 1; i < ngrams.length; i++) {
        String ngram = ngrams[i];
        int len = ngram.codePointCount(0, ngram.length());
        int fillSize = index.getN() - len;
        String filledNgram = ngram;
        // ngrams lead to a considerable performance loss.
        if (fillSize > 0) {
            String filler = ngrams[i - 1];
            StringBuilder buf = new StringBuilder();
            int pos = filler.offsetByCodePoints(0, len);
            for (int j = 0; j < fillSize; j++) {
                int codepoint = filler.codePointAt(pos);
                pos += Character.charCount(codepoint);
                buf.appendCodePoint(codepoint);
            }
            buf.append(ngram);
            filledNgram = buf.toString();
            LOG.debug("Filled: {}", filledNgram);
        }
        NodeSet nodes = index.search(getExpressionId(), docs, qnames, filledNgram, ngram, context, nodeSet, axis);
        final NodeSet nodesContainingFirstINgrams = result;
        result = NodeSets.transformNodes(nodes, proxy -> Optional.ofNullable(nodesContainingFirstINgrams.get(proxy)).map(before -> getContinuousMatches(before, proxy)).orElse(null));
    }
    return result;
}
Also used : NodeSet(org.exist.dom.persistent.NodeSet) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) Match(org.exist.dom.persistent.Match) EvaluatableExpression(org.exist.xquery.modules.ngram.query.EvaluatableExpression) java.util(java.util) QName(org.exist.dom.QName) NodeProxy(org.exist.dom.persistent.NodeProxy) org.exist.xquery.value(org.exist.xquery.value) Wildcard(org.exist.xquery.modules.ngram.query.Wildcard) NodeSet(org.exist.dom.persistent.NodeSet) EmptyExpression(org.exist.xquery.modules.ngram.query.EmptyExpression) org.exist.xquery(org.exist.xquery) NodeProxies(org.exist.xquery.modules.ngram.utils.NodeProxies) NGramIndex(org.exist.indexing.ngram.NGramIndex) Matcher(java.util.regex.Matcher) NodeSets(org.exist.xquery.modules.ngram.utils.NodeSets) ElementValue(org.exist.storage.ElementValue) Error(org.exist.xquery.util.Error) DocumentSet(org.exist.dom.persistent.DocumentSet) AlternativeStrings(org.exist.xquery.modules.ngram.query.AlternativeStrings) StartAnchor(org.exist.xquery.modules.ngram.query.StartAnchor) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) NGramIndexWorker(org.exist.indexing.ngram.NGramIndexWorker) Logger(org.apache.logging.log4j.Logger) FixedString(org.exist.xquery.modules.ngram.query.FixedString) EndAnchor(org.exist.xquery.modules.ngram.query.EndAnchor) WildcardedExpressionSequence(org.exist.xquery.modules.ngram.query.WildcardedExpressionSequence) Pattern(java.util.regex.Pattern) WildcardedExpression(org.exist.xquery.modules.ngram.query.WildcardedExpression) LogManager(org.apache.logging.log4j.LogManager) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) FixedString(org.exist.xquery.modules.ngram.query.FixedString)

Example 19 with NodeSet

use of org.exist.dom.persistent.NodeSet in project exist by eXist-db.

the class NGramSearch method processMatches.

private NodeSet processMatches(NGramIndexWorker index, DocumentSet docs, List<QName> qnames, String query, NodeSet nodeSet, int axis) throws XPathException {
    EvaluatableExpression parsedQuery = null;
    if (getLocalName().equals("wildcard-contains"))
        parsedQuery = parseQuery(query);
    else
        parsedQuery = new FixedString(this, query);
    LOG.debug("Parsed Query: {}", parsedQuery);
    NodeSet result = parsedQuery.eval(index, docs, qnames, nodeSet, axis, this.getExpressionId());
    if (getLocalName().startsWith("starts-with"))
        result = NodeSets.getNodesMatchingAtStart(result, getExpressionId());
    else if (getLocalName().startsWith("ends-with"))
        result = NodeSets.getNodesMatchingAtEnd(result, getExpressionId());
    result = NodeSets.transformNodes(result, proxy -> NodeProxies.transformOwnMatches(proxy, Match::filterOutOverlappingOffsets, getExpressionId()));
    return result;
}
Also used : NodeSet(org.exist.dom.persistent.NodeSet) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) Match(org.exist.dom.persistent.Match) EvaluatableExpression(org.exist.xquery.modules.ngram.query.EvaluatableExpression) java.util(java.util) QName(org.exist.dom.QName) NodeProxy(org.exist.dom.persistent.NodeProxy) org.exist.xquery.value(org.exist.xquery.value) Wildcard(org.exist.xquery.modules.ngram.query.Wildcard) NodeSet(org.exist.dom.persistent.NodeSet) EmptyExpression(org.exist.xquery.modules.ngram.query.EmptyExpression) org.exist.xquery(org.exist.xquery) NodeProxies(org.exist.xquery.modules.ngram.utils.NodeProxies) NGramIndex(org.exist.indexing.ngram.NGramIndex) Matcher(java.util.regex.Matcher) NodeSets(org.exist.xquery.modules.ngram.utils.NodeSets) ElementValue(org.exist.storage.ElementValue) Error(org.exist.xquery.util.Error) DocumentSet(org.exist.dom.persistent.DocumentSet) AlternativeStrings(org.exist.xquery.modules.ngram.query.AlternativeStrings) StartAnchor(org.exist.xquery.modules.ngram.query.StartAnchor) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) NGramIndexWorker(org.exist.indexing.ngram.NGramIndexWorker) Logger(org.apache.logging.log4j.Logger) FixedString(org.exist.xquery.modules.ngram.query.FixedString) EndAnchor(org.exist.xquery.modules.ngram.query.EndAnchor) WildcardedExpressionSequence(org.exist.xquery.modules.ngram.query.WildcardedExpressionSequence) Pattern(java.util.regex.Pattern) WildcardedExpression(org.exist.xquery.modules.ngram.query.WildcardedExpression) LogManager(org.apache.logging.log4j.LogManager) EvaluatableExpression(org.exist.xquery.modules.ngram.query.EvaluatableExpression) FixedString(org.exist.xquery.modules.ngram.query.FixedString) Match(org.exist.dom.persistent.Match)

Example 20 with NodeSet

use of org.exist.dom.persistent.NodeSet in project exist by eXist-db.

the class Query method eval.

public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
    if (contextItem != null)
        contextSequence = contextItem.toSequence();
    if (contextSequence != null && !contextSequence.isPersistentSet())
        // in-memory docs won't have an index
        return Sequence.EMPTY_SEQUENCE;
    NodeSet result;
    if (preselectResult == null) {
        long start = System.currentTimeMillis();
        Sequence input = getArgument(0).eval(contextSequence);
        if (!(input instanceof VirtualNodeSet) && input.isEmpty())
            result = NodeSet.EMPTY_SET;
        else {
            NodeSet inNodes = input.toNodeSet();
            DocumentSet docs = inNodes.getDocumentSet();
            LuceneIndexWorker index = (LuceneIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(LuceneIndex.ID);
            Item key = getKey(contextSequence, contextItem);
            List<QName> qnames = null;
            if (contextQName != null) {
                qnames = new ArrayList<>(1);
                qnames.add(contextQName);
            }
            QueryOptions options = parseOptions(this, contextSequence, contextItem, 3);
            try {
                if (key != null && Type.subTypeOf(key.getType(), Type.ELEMENT)) {
                    final Element queryXML = (Element) ((NodeValue) key).getNode();
                    result = index.query(getExpressionId(), docs, inNodes, qnames, queryXML, NodeSet.ANCESTOR, options);
                } else {
                    final String query = key == null ? null : key.getStringValue();
                    result = index.query(getExpressionId(), docs, inNodes, qnames, query, NodeSet.ANCESTOR, options);
                }
            } catch (IOException | org.apache.lucene.queryparser.classic.ParseException e) {
                throw new XPathException(this, e.getMessage());
            }
        }
        if (context.getProfiler().traceFunctions()) {
            context.getProfiler().traceIndexUsage(context, "lucene", this, PerformanceStats.BASIC_INDEX, System.currentTimeMillis() - start);
        }
    } else {
        // DW: contextSequence can be null
        contextStep.setPreloadedData(contextSequence.getDocumentSet(), preselectResult);
        result = getArgument(0).eval(contextSequence).toNodeSet();
    }
    return result;
}
Also used : NodeSet(org.exist.dom.persistent.NodeSet) VirtualNodeSet(org.exist.dom.persistent.VirtualNodeSet) QName(org.exist.dom.QName) Element(org.w3c.dom.Element) IOException(java.io.IOException) LuceneIndexWorker(org.exist.indexing.lucene.LuceneIndexWorker) VirtualNodeSet(org.exist.dom.persistent.VirtualNodeSet) DocumentSet(org.exist.dom.persistent.DocumentSet)

Aggregations

NodeSet (org.exist.dom.persistent.NodeSet)49 NodeProxy (org.exist.dom.persistent.NodeProxy)18 Sequence (org.exist.xquery.value.Sequence)18 DocumentSet (org.exist.dom.persistent.DocumentSet)14 NewArrayNodeSet (org.exist.dom.persistent.NewArrayNodeSet)12 QName (org.exist.dom.QName)11 ExtArrayNodeSet (org.exist.dom.persistent.ExtArrayNodeSet)10 VirtualNodeSet (org.exist.dom.persistent.VirtualNodeSet)9 ContextItem (org.exist.dom.persistent.ContextItem)8 IOException (java.io.IOException)7 XPathException (org.exist.xquery.XPathException)7 Collator (com.ibm.icu.text.Collator)4 EXistException (org.exist.EXistException)4 DocumentImpl (org.exist.dom.persistent.DocumentImpl)4 EmptyNodeSet (org.exist.dom.persistent.EmptyNodeSet)4 Indexable (org.exist.storage.Indexable)4 SequenceIterator (org.exist.xquery.value.SequenceIterator)4 StringValue (org.exist.xquery.value.StringValue)4 NodeImpl (org.exist.dom.memtree.NodeImpl)3 Match (org.exist.dom.persistent.Match)3