Examples with Occurrences - org.exist.util.Occurrences

Example 1 with Occurrences

use of org.exist.util.Occurrences in project exist by eXist-db.

the class IndexKeys method eval.

/*
     * (non-Javadoc)
     * 
     * @see org.exist.xquery.BasicFunction#eval(org.exist.xquery.value.Sequence[],
     *      org.exist.xquery.value.Sequence)
     */
public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
    if (args[0].isEmpty()) {
        return Sequence.EMPTY_SEQUENCE;
    }
    NodeSet nodes = null;
    DocumentSet docs = null;
    Sequence qnames = null;
    if (isCalledAs("index-keys-by-qname")) {
        qnames = args[0];
        docs = contextSequence == null ? context.getStaticallyKnownDocuments() : contextSequence.getDocumentSet();
    } else {
        nodes = args[0].toNodeSet();
        docs = nodes.getDocumentSet();
    }
    final Sequence result = new ValueSequence();
    try (final FunctionReference ref = (FunctionReference) args[2].itemAt(0)) {
        int max = -1;
        if (args[3].hasOne()) {
            max = ((IntegerValue) args[3].itemAt(0)).getInt();
        }
        // if we have 5 arguments, query the user-specified index
        if (this.getArgumentCount() == 5) {
            final IndexWorker indexWorker = context.getBroker().getIndexController().getWorkerByIndexName(args[4].itemAt(0).getStringValue());
            // IndexWorker indexWorker = context.getBroker().getBrokerPool().getIndexManager().getIndexByName(args[4].itemAt(0).getStringValue()).getWorker();
            if (indexWorker == null) {
                throw new XPathException(this, "Unknown index: " + args[4].itemAt(0).getStringValue());
            }
            final Map<String, Object> hints = new HashMap<>();
            if (max != -1) {
                hints.put(IndexWorker.VALUE_COUNT, new IntegerValue(max));
            }
            if (indexWorker instanceof OrderedValuesIndex) {
                hints.put(OrderedValuesIndex.START_VALUE, args[1].getStringValue());
            } else {
                logger.warn("{} isn't an instance of org.exist.indexing.OrderedValuesIndex. Start value '{}' ignored.", indexWorker.getClass().getName(), args[1]);
            }
            if (qnames != null) {
                final List<QName> qnameList = new ArrayList<>(qnames.getItemCount());
                for (final SequenceIterator i = qnames.iterate(); i.hasNext(); ) {
                    final QNameValue qv = (QNameValue) i.nextItem();
                    qnameList.add(qv.getQName());
                }
                hints.put(QNamedKeysIndex.QNAMES_KEY, qnameList);
            }
            final Occurrences[] occur = indexWorker.scanIndex(context, docs, nodes, hints);
            // TODO : add an extra argument to pass the END_VALUE ?
            final int len = (max != -1 && occur.length > max ? max : occur.length);
            final Sequence[] params = new Sequence[2];
            ValueSequence data = new ValueSequence();
            for (int j = 0; j < len; j++) {
                params[0] = new StringValue(occur[j].getTerm().toString());
                data.add(new IntegerValue(occur[j].getOccurrences(), Type.UNSIGNED_INT));
                data.add(new IntegerValue(occur[j].getDocuments(), Type.UNSIGNED_INT));
                data.add(new IntegerValue(j + 1, Type.UNSIGNED_INT));
                params[1] = data;
                result.addAll(ref.evalFunction(Sequence.EMPTY_SEQUENCE, null, params));
                data.clear();
            }
        // no index specified: use the range index
        } else {
            final Indexable indexable = (Indexable) args[1].itemAt(0);
            ValueOccurrences[] occur = null;
            // First check for indexes defined on qname
            final QName[] allQNames = getDefinedIndexes(context.getBroker(), docs);
            if (allQNames.length > 0) {
                occur = context.getBroker().getValueIndex().scanIndexKeys(docs, nodes, allQNames, indexable);
            }
            // Also check if there's an index defined by path
            ValueOccurrences[] occur2 = context.getBroker().getValueIndex().scanIndexKeys(docs, nodes, indexable);
            // Merge the two results
            if (occur == null || occur.length == 0) {
                occur = occur2;
            } else {
                ValueOccurrences[] t = new ValueOccurrences[occur.length + occur2.length];
                System.arraycopy(occur, 0, t, 0, occur.length);
                System.arraycopy(occur2, 0, t, occur.length, occur2.length);
                occur = t;
            }
            final int len = (max != -1 && occur.length > max ? max : occur.length);
            final Sequence[] params = new Sequence[2];
            ValueSequence data = new ValueSequence();
            for (int j = 0; j < len; j++) {
                params[0] = occur[j].getValue();
                data.add(new IntegerValue(occur[j].getOccurrences(), Type.UNSIGNED_INT));
                data.add(new IntegerValue(occur[j].getDocuments(), Type.UNSIGNED_INT));
                data.add(new IntegerValue(j + 1, Type.UNSIGNED_INT));
                params[1] = data;
                result.addAll(ref.evalFunction(Sequence.EMPTY_SEQUENCE, null, params));
                data.clear();
            }
        }
    }
    logger.debug("Returning: {}", result.getItemCount());
    return result;
}

Also used : OrderedValuesIndex(org.exist.indexing.OrderedValuesIndex) Occurrences(org.exist.util.Occurrences) ValueOccurrences(org.exist.util.ValueOccurrences) IndexWorker(org.exist.indexing.IndexWorker) Indexable(org.exist.storage.Indexable) NodeSet(org.exist.dom.persistent.NodeSet) QName(org.exist.dom.QName) ValueOccurrences(org.exist.util.ValueOccurrences) DocumentSet(org.exist.dom.persistent.DocumentSet)

Example 2 with Occurrences

use of org.exist.util.Occurrences in project exist by eXist-db.

the class LuceneIndexWorker method scanIndexByQName.

private Occurrences[] scanIndexByQName(List<QName> qnames, DocumentSet docs, NodeSet nodes, String start, String end, long max) throws IOException {
    final TreeMap<String, Occurrences> map = new TreeMap<>();
    index.withReader(reader -> {
        for (QName qname : qnames) {
            String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols());
            doScanIndex(docs, nodes, start, end, max, map, reader, field);
        }
        return null;
    });
    Occurrences[] occur = new Occurrences[map.size()];
    return map.values().toArray(occur);
}

Also used : QName(org.exist.dom.QName) Occurrences(org.exist.util.Occurrences)

Example 3 with Occurrences

use of org.exist.util.Occurrences in project exist by eXist-db.

the class NativeStructuralIndexWorker method scanIndex.

/**
 * Collect index statistics. Used by functions like util:index-keys.
 *
 * @param context the xquery context
 * @param docs The documents to which the index entries belong
 * @param contextSet ignored by this index
 * @param hints Some "hints" for retrieving the index entries. See such hints in
 * {@link org.exist.indexing.OrderedValuesIndex} and {@link org.exist.indexing.QNamedKeysIndex}.
 * @return the matching occurrences
 */
public Occurrences[] scanIndex(XQueryContext context, DocumentSet docs, NodeSet contextSet, Map hints) {
    final Map<String, Occurrences> occurrences = new TreeMap<>();
    for (final Iterator<DocumentImpl> i = docs.getDocumentIterator(); i.hasNext(); ) {
        final DocumentImpl doc = i.next();
        final List<QName> qnames = getQNamesForDoc(doc);
        for (final QName qname : qnames) {
            final String name;
            if (qname.getNameType() == ElementValue.ATTRIBUTE) {
                name = "@" + qname.getLocalPart();
            } else {
                name = qname.getLocalPart();
            }
            final byte[] fromKey = computeKey(qname.getNameType(), qname, doc.getDocId());
            final byte[] toKey = computeKey(qname.getNameType(), qname, doc.getDocId() + 1);
            final IndexQuery query = new IndexQuery(IndexQuery.RANGE, new Value(fromKey), new Value(toKey));
            try (final ManagedLock<ReentrantLock> btreeLock = index.lockManager.acquireBtreeReadLock(index.btree.getLockName())) {
                index.btree.query(query, (value, pointer) -> {
                    Occurrences oc = occurrences.get(name);
                    if (oc == null) {
                        oc = new Occurrences(name);
                        occurrences.put(name, oc);
                        oc.addDocument(doc);
                        oc.addOccurrences(1);
                    } else {
                        oc.addOccurrences(1);
                        oc.addDocument(doc);
                    }
                    return true;
                });
            } catch (final LockException e) {
                NativeStructuralIndex.LOG.warn("Failed to lock structural index: {}", e.getMessage(), e);
            } catch (final Exception e) {
                NativeStructuralIndex.LOG.warn("Exception caught while reading structural index for document {}: {}", doc.getURI(), e.getMessage(), e);
            }
        }
    }
    final Occurrences[] result = new Occurrences[occurrences.size()];
    int i = 0;
    for (Occurrences occ : occurrences.values()) {
        result[i++] = occ;
    }
    return result;
}

Also used : ReentrantLock(java.util.concurrent.locks.ReentrantLock) IndexQuery(org.exist.storage.btree.IndexQuery) QName(org.exist.dom.QName) Occurrences(org.exist.util.Occurrences) DocumentImpl(org.exist.dom.persistent.DocumentImpl) PermissionDeniedException(org.exist.security.PermissionDeniedException) LockException(org.exist.util.LockException) DatabaseConfigurationException(org.exist.util.DatabaseConfigurationException) LockException(org.exist.util.LockException) Value(org.exist.storage.btree.Value)

Example 4 with Occurrences

use of org.exist.util.Occurrences in project exist by eXist-db.

the class RangeIndexWorker method scanIndexByQName.

private Occurrences[] scanIndexByQName(List<QName> qnames, DocumentSet docs, NodeSet nodes, String start, String end, long max) throws IOException {
    return index.withReader(reader -> {
        TreeMap<String, Occurrences> map = new TreeMap<>();
        for (QName qname : qnames) {
            String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols());
            scan(docs, nodes, start, end, max, map, reader, field);
        }
        Occurrences[] occur = new Occurrences[map.size()];
        return map.values().toArray(occur);
    });
}

Also used : QName(org.exist.dom.QName) Occurrences(org.exist.util.Occurrences)

Example 5 with Occurrences

use of org.exist.util.Occurrences in project exist by eXist-db.

the class RangeIndexWorker method scan.

private void scan(DocumentSet docs, NodeSet nodes, String start, String end, long max, TreeMap<String, Occurrences> map, IndexReader reader, String field) throws IOException {
    List<AtomicReaderContext> leaves = reader.leaves();
    for (AtomicReaderContext context : leaves) {
        NumericDocValues docIdValues = context.reader().getNumericDocValues(FIELD_DOC_ID);
        BinaryDocValues nodeIdValues = context.reader().getBinaryDocValues(FIELD_NODE_ID);
        Bits liveDocs = context.reader().getLiveDocs();
        Terms terms = context.reader().terms(field);
        if (terms == null)
            continue;
        TermsEnum termsIter = terms.iterator(null);
        if (termsIter.next() == null) {
            continue;
        }
        do {
            if (map.size() >= max) {
                break;
            }
            BytesRef ref = termsIter.term();
            String term = ref.utf8ToString();
            boolean include = true;
            if (end != null) {
                if (term.compareTo(end) > 0)
                    include = false;
            } else if (start != null && !term.startsWith(start))
                include = false;
            if (include) {
                DocsEnum docsEnum = termsIter.docs(null, null);
                while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    if (liveDocs != null && !liveDocs.get(docsEnum.docID())) {
                        continue;
                    }
                    int docId = (int) docIdValues.get(docsEnum.docID());
                    DocumentImpl storedDocument = docs.getDoc(docId);
                    if (storedDocument == null)
                        continue;
                    NodeId nodeId = null;
                    if (nodes != null) {
                        final BytesRef nodeIdRef = nodeIdValues.get(docsEnum.docID());
                        int units = ByteConversion.byteToShort(nodeIdRef.bytes, nodeIdRef.offset);
                        nodeId = index.getBrokerPool().getNodeFactory().createFromData(units, nodeIdRef.bytes, nodeIdRef.offset + 2);
                    }
                    if (nodeId == null || nodes.get(storedDocument, nodeId) != null) {
                        Occurrences oc = map.get(term);
                        if (oc == null) {
                            oc = new Occurrences(term);
                            map.put(term, oc);
                        }
                        oc.addDocument(storedDocument);
                        oc.addOccurrences(docsEnum.freq());
                    }
                }
            }
        } while (termsIter.next() != null);
    }
}

Also used : Occurrences(org.exist.util.Occurrences) DocumentImpl(org.exist.dom.persistent.DocumentImpl) NodeId(org.exist.numbering.NodeId) Bits(org.apache.lucene.util.Bits) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

Occurrences (org.exist.util.Occurrences)11 QName (org.exist.dom.QName)5 DocumentSet (org.exist.dom.persistent.DocumentSet)4 HashMap (java.util.HashMap)3 NodeSet (org.exist.dom.persistent.NodeSet)3 IndexWorker (org.exist.indexing.IndexWorker)3 OrderedValuesIndex (org.exist.indexing.OrderedValuesIndex)3 ValueOccurrences (org.exist.util.ValueOccurrences)3 DocumentImpl (org.exist.dom.persistent.DocumentImpl)2 Indexable (org.exist.storage.Indexable)2 XPathException (org.exist.xquery.XPathException)2 IntegerValue (org.exist.xquery.value.IntegerValue)2 Sequence (org.exist.xquery.value.Sequence)2 URISyntaxException (java.net.URISyntaxException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 List (java.util.List)1 ReentrantLock (java.util.concurrent.locks.ReentrantLock)1 Stream (java.util.stream.Stream)1 Bits (org.apache.lucene.util.Bits)1