Search in sources :

Example 66 with QName

use of org.exist.dom.QName in project exist by eXist-db.

the class NGramIndexWorker method search.

public NodeSet search(final int contextId, final DocumentSet docs, final List<QName> qnames, final String query, final String ngram, final XQueryContext context, final NodeSet contextSet, final int axis) throws XPathException {
    final List<QName> searchQnames;
    if (qnames == null || qnames.isEmpty()) {
        searchQnames = getDefinedIndexes(context.getBroker(), docs);
    } else {
        searchQnames = qnames;
    }
    final NodeSet result = new ExtArrayNodeSet(docs.getDocumentCount(), 250);
    for (final Iterator<Collection> iter = docs.getCollectionIterator(); iter.hasNext(); ) {
        final int collectionId = iter.next().getId();
        for (final QName qname : searchQnames) {
            final NGramQNameKey key = new NGramQNameKey(collectionId, qname, index.getBrokerPool().getSymbols(), query);
            try (final ManagedLock<ReentrantLock> dbLock = lockManager.acquireBtreeReadLock(index.db.getLockName())) {
                final SearchCallback cb = new SearchCallback(contextId, query, ngram, docs, contextSet, context, result, axis == NodeSet.ANCESTOR);
                final int op = query.codePointCount(0, query.length()) < getN() ? IndexQuery.TRUNC_RIGHT : IndexQuery.EQ;
                index.db.query(new IndexQuery(op, key), cb);
            } catch (final LockException e) {
                LOG.warn("Failed to acquire lock for '{}'", FileUtils.fileName(index.db.getFile()), e);
            } catch (final IOException | BTreeException e) {
                LOG.error("{} in '{}'", e.getMessage(), FileUtils.fileName(index.db.getFile()), e);
            }
        }
    }
    // ensure result is ready to use
    result.iterate();
    return result;
}
Also used : ExtArrayNodeSet(org.exist.dom.persistent.ExtArrayNodeSet) NodeSet(org.exist.dom.persistent.NodeSet) ReentrantLock(java.util.concurrent.locks.ReentrantLock) IndexQuery(org.exist.storage.btree.IndexQuery) QName(org.exist.dom.QName) IOException(java.io.IOException) BTreeException(org.exist.storage.btree.BTreeException) ExtArrayNodeSet(org.exist.dom.persistent.ExtArrayNodeSet) Collection(org.exist.collections.Collection)

Example 67 with QName

use of org.exist.dom.QName in project exist by eXist-db.

the class NGramIndexWorker method getReindexRoot.

@Override
public <T extends IStoredNode> IStoredNode getReindexRoot(final IStoredNode<T> node, final NodePath path, final boolean insert, final boolean includeSelf) {
    if (node.getNodeType() == Node.ATTRIBUTE_NODE) {
        return null;
    }
    final IndexSpec indexConf = node.getOwnerDocument().getCollection().getIndexConfiguration(broker);
    if (indexConf != null) {
        final Map<?, ?> config = (Map<?, ?>) indexConf.getCustomIndexSpec(NGramIndex.ID);
        if (config == null) {
            return null;
        }
        boolean reindexRequired = false;
        final int len = node.getNodeType() == Node.ELEMENT_NODE && !includeSelf ? path.length() - 1 : path.length();
        for (int i = 0; i < len; i++) {
            final QName qn = path.getComponent(i);
            if (config.get(qn) != null) {
                reindexRequired = true;
                break;
            }
        }
        if (reindexRequired) {
            IStoredNode topMost = null;
            IStoredNode<T> currentNode = node;
            while (currentNode != null) {
                if (config.get(currentNode.getQName()) != null) {
                    topMost = currentNode;
                }
                if (currentNode.getOwnerDocument().getCollection().isTempCollection() && currentNode.getNodeId().getTreeLevel() == 2) {
                    break;
                }
                // currentNode = (StoredNode) currentNode.getParentNode();
                currentNode = currentNode.getParentStoredNode();
            }
            return topMost;
        }
    }
    return null;
}
Also used : IndexSpec(org.exist.storage.IndexSpec) QName(org.exist.dom.QName) IStoredNode(org.exist.dom.persistent.IStoredNode)

Example 68 with QName

use of org.exist.dom.QName in project exist by eXist-db.

the class LuceneIndexTest method reindex.

@Test
public void reindex() throws EXistException, CollectionConfigurationException, PermissionDeniedException, SAXException, LockException, IOException, QName.IllegalQNameException {
    final DocumentSet docs = configureAndStore(COLLECTION_CONFIG1, XML1, "dropDocument.xml");
    final BrokerPool pool = existEmbeddedServer.getBrokerPool();
    final TransactionManager transact = pool.getTransactionManager();
    try (final DBBroker broker = pool.get(Optional.of(pool.getSecurityManager().getSystemSubject()));
        final Txn transaction = transact.beginTransaction()) {
        broker.reindexCollection(transaction, TestConstants.TEST_COLLECTION_URI);
        checkIndex(docs, broker, new QName[] { new QName("head") }, "title", 1);
        final Occurrences[] o = checkIndex(docs, broker, new QName[] { new QName("p") }, "with", 1);
        assertEquals(2, o[0].getOccurrences());
        checkIndex(docs, broker, new QName[] { new QName("hi") }, "just", 1);
        checkIndex(docs, broker, null, "in", 1);
        final QName attrQN = new QName("rend", XMLConstants.NULL_NS_URI, ElementValue.ATTRIBUTE);
        checkIndex(docs, broker, new QName[] { attrQN }, null, 2);
        checkIndex(docs, broker, new QName[] { attrQN }, "center", 1);
        transaction.commit();
    }
}
Also used : DBBroker(org.exist.storage.DBBroker) TransactionManager(org.exist.storage.txn.TransactionManager) QName(org.exist.dom.QName) DefaultDocumentSet(org.exist.dom.persistent.DefaultDocumentSet) DocumentSet(org.exist.dom.persistent.DocumentSet) MutableDocumentSet(org.exist.dom.persistent.MutableDocumentSet) Txn(org.exist.storage.txn.Txn) BrokerPool(org.exist.storage.BrokerPool)

Example 69 with QName

use of org.exist.dom.QName in project exist by eXist-db.

the class NGramSearch method eval.

@Override
public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
    if (contextItem != null)
        contextSequence = contextItem.toSequence();
    NodeSet result;
    if (preselectResult == null) {
        Sequence input = getArgument(0).eval(contextSequence, contextItem);
        if (input.isEmpty())
            result = NodeSet.EMPTY_SET;
        else {
            long start = System.currentTimeMillis();
            NodeSet inNodes = input.toNodeSet();
            DocumentSet docs = inNodes.getDocumentSet();
            NGramIndexWorker index = (NGramIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(NGramIndex.ID);
            // Alternate design
            // NGramIndexWorker index =
            // (NGramIndexWorker)context.getBroker().getBrokerPool().getIndexManager().getIndexById(NGramIndex.ID).getWorker();
            String key = getArgument(1).eval(contextSequence, contextItem).getStringValue();
            List<QName> qnames = null;
            if (contextQName != null) {
                qnames = new ArrayList<>(1);
                qnames.add(contextQName);
            }
            result = processMatches(index, docs, qnames, key, inNodes, NodeSet.ANCESTOR);
            if (context.getProfiler().traceFunctions()) {
                // report index use
                context.getProfiler().traceIndexUsage(context, "ngram", this, PerformanceStats.BASIC_INDEX, System.currentTimeMillis() - start);
            }
        }
    } else {
        contextStep.setPreloadedData(contextSequence.getDocumentSet(), preselectResult);
        result = getArgument(0).eval(contextSequence).toNodeSet();
    }
    return result;
}
Also used : NodeSet(org.exist.dom.persistent.NodeSet) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) QName(org.exist.dom.QName) NGramIndexWorker(org.exist.indexing.ngram.NGramIndexWorker) WildcardedExpressionSequence(org.exist.xquery.modules.ngram.query.WildcardedExpressionSequence) DocumentSet(org.exist.dom.persistent.DocumentSet) FixedString(org.exist.xquery.modules.ngram.query.FixedString)

Example 70 with QName

use of org.exist.dom.QName in project exist by eXist-db.

the class NGramSearch method fixedStringSearch.

public NodeSet fixedStringSearch(final NGramIndexWorker index, final DocumentSet docs, final List<QName> qnames, final String query, final NodeSet nodeSet, final int axis) throws XPathException {
    String[] ngrams = NGramSearch.getDistinctNGrams(query, index.getN());
    // Nothing to search for? The find nothing.
    if (ngrams.length == 0)
        return new EmptyNodeSet();
    String firstNgramm = ngrams[0];
    LOG.trace("First NGRAM: {}", firstNgramm);
    NodeSet result = index.search(getExpressionId(), docs, qnames, firstNgramm, firstNgramm, context, nodeSet, axis);
    for (int i = 1; i < ngrams.length; i++) {
        String ngram = ngrams[i];
        int len = ngram.codePointCount(0, ngram.length());
        int fillSize = index.getN() - len;
        String filledNgram = ngram;
        // ngrams lead to a considerable performance loss.
        if (fillSize > 0) {
            String filler = ngrams[i - 1];
            StringBuilder buf = new StringBuilder();
            int pos = filler.offsetByCodePoints(0, len);
            for (int j = 0; j < fillSize; j++) {
                int codepoint = filler.codePointAt(pos);
                pos += Character.charCount(codepoint);
                buf.appendCodePoint(codepoint);
            }
            buf.append(ngram);
            filledNgram = buf.toString();
            LOG.debug("Filled: {}", filledNgram);
        }
        NodeSet nodes = index.search(getExpressionId(), docs, qnames, filledNgram, ngram, context, nodeSet, axis);
        final NodeSet nodesContainingFirstINgrams = result;
        result = NodeSets.transformNodes(nodes, proxy -> Optional.ofNullable(nodesContainingFirstINgrams.get(proxy)).map(before -> getContinuousMatches(before, proxy)).orElse(null));
    }
    return result;
}
Also used : NodeSet(org.exist.dom.persistent.NodeSet) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) Match(org.exist.dom.persistent.Match) EvaluatableExpression(org.exist.xquery.modules.ngram.query.EvaluatableExpression) java.util(java.util) QName(org.exist.dom.QName) NodeProxy(org.exist.dom.persistent.NodeProxy) org.exist.xquery.value(org.exist.xquery.value) Wildcard(org.exist.xquery.modules.ngram.query.Wildcard) NodeSet(org.exist.dom.persistent.NodeSet) EmptyExpression(org.exist.xquery.modules.ngram.query.EmptyExpression) org.exist.xquery(org.exist.xquery) NodeProxies(org.exist.xquery.modules.ngram.utils.NodeProxies) NGramIndex(org.exist.indexing.ngram.NGramIndex) Matcher(java.util.regex.Matcher) NodeSets(org.exist.xquery.modules.ngram.utils.NodeSets) ElementValue(org.exist.storage.ElementValue) Error(org.exist.xquery.util.Error) DocumentSet(org.exist.dom.persistent.DocumentSet) AlternativeStrings(org.exist.xquery.modules.ngram.query.AlternativeStrings) StartAnchor(org.exist.xquery.modules.ngram.query.StartAnchor) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) NGramIndexWorker(org.exist.indexing.ngram.NGramIndexWorker) Logger(org.apache.logging.log4j.Logger) FixedString(org.exist.xquery.modules.ngram.query.FixedString) EndAnchor(org.exist.xquery.modules.ngram.query.EndAnchor) WildcardedExpressionSequence(org.exist.xquery.modules.ngram.query.WildcardedExpressionSequence) Pattern(java.util.regex.Pattern) WildcardedExpression(org.exist.xquery.modules.ngram.query.WildcardedExpression) LogManager(org.apache.logging.log4j.LogManager) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) FixedString(org.exist.xquery.modules.ngram.query.FixedString)

Aggregations

QName (org.exist.dom.QName)271 Test (org.junit.Test)54 Sequence (org.exist.xquery.value.Sequence)39 DBBroker (org.exist.storage.DBBroker)31 MemTreeBuilder (org.exist.dom.memtree.MemTreeBuilder)28 IOException (java.io.IOException)23 Document (org.w3c.dom.Document)23 DocumentSet (org.exist.dom.persistent.DocumentSet)20 Text (org.w3c.dom.Text)20 AttributesImpl (org.xml.sax.helpers.AttributesImpl)18 NameTest (org.exist.xquery.NameTest)17 XPathException (org.exist.xquery.XPathException)17 BrokerPool (org.exist.storage.BrokerPool)15 IllegalQNameException (org.exist.dom.QName.IllegalQNameException)13 Node (org.w3c.dom.Node)12 ReentrantLock (java.util.concurrent.locks.ReentrantLock)11 NodeSet (org.exist.dom.persistent.NodeSet)11 SAXException (org.xml.sax.SAXException)11 DefaultDocumentSet (org.exist.dom.persistent.DefaultDocumentSet)10 MutableDocumentSet (org.exist.dom.persistent.MutableDocumentSet)10