Search in sources :

Example 1 with Occurrences

use of org.exist.util.Occurrences in project exist by eXist-db.

the class IndexKeys method eval.

     * (non-Javadoc)
     * @see org.exist.xquery.BasicFunction#eval(org.exist.xquery.value.Sequence[],
     *      org.exist.xquery.value.Sequence)
public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
    if (args[0].isEmpty()) {
        return Sequence.EMPTY_SEQUENCE;
    NodeSet nodes = null;
    DocumentSet docs = null;
    Sequence qnames = null;
    if (isCalledAs("index-keys-by-qname")) {
        qnames = args[0];
        docs = contextSequence == null ? context.getStaticallyKnownDocuments() : contextSequence.getDocumentSet();
    } else {
        nodes = args[0].toNodeSet();
        docs = nodes.getDocumentSet();
    final Sequence result = new ValueSequence();
    try (final FunctionReference ref = (FunctionReference) args[2].itemAt(0)) {
        int max = -1;
        if (args[3].hasOne()) {
            max = ((IntegerValue) args[3].itemAt(0)).getInt();
        // if we have 5 arguments, query the user-specified index
        if (this.getArgumentCount() == 5) {
            final IndexWorker indexWorker = context.getBroker().getIndexController().getWorkerByIndexName(args[4].itemAt(0).getStringValue());
            // IndexWorker indexWorker = context.getBroker().getBrokerPool().getIndexManager().getIndexByName(args[4].itemAt(0).getStringValue()).getWorker();
            if (indexWorker == null) {
                throw new XPathException(this, "Unknown index: " + args[4].itemAt(0).getStringValue());
            final Map<String, Object> hints = new HashMap<>();
            if (max != -1) {
                hints.put(IndexWorker.VALUE_COUNT, new IntegerValue(max));
            if (indexWorker instanceof OrderedValuesIndex) {
                hints.put(OrderedValuesIndex.START_VALUE, args[1].getStringValue());
            } else {
                logger.warn("{} isn't an instance of org.exist.indexing.OrderedValuesIndex. Start value '{}' ignored.", indexWorker.getClass().getName(), args[1]);
            if (qnames != null) {
                final List<QName> qnameList = new ArrayList<>(qnames.getItemCount());
                for (final SequenceIterator i = qnames.iterate(); i.hasNext(); ) {
                    final QNameValue qv = (QNameValue) i.nextItem();
                hints.put(QNamedKeysIndex.QNAMES_KEY, qnameList);
            final Occurrences[] occur = indexWorker.scanIndex(context, docs, nodes, hints);
            // TODO : add an extra argument to pass the END_VALUE ?
            final int len = (max != -1 && occur.length > max ? max : occur.length);
            final Sequence[] params = new Sequence[2];
            ValueSequence data = new ValueSequence();
            for (int j = 0; j < len; j++) {
                params[0] = new StringValue(occur[j].getTerm().toString());
                data.add(new IntegerValue(occur[j].getOccurrences(), Type.UNSIGNED_INT));
                data.add(new IntegerValue(occur[j].getDocuments(), Type.UNSIGNED_INT));
                data.add(new IntegerValue(j + 1, Type.UNSIGNED_INT));
                params[1] = data;
                result.addAll(ref.evalFunction(Sequence.EMPTY_SEQUENCE, null, params));
        // no index specified: use the range index
        } else {
            final Indexable indexable = (Indexable) args[1].itemAt(0);
            ValueOccurrences[] occur = null;
            // First check for indexes defined on qname
            final QName[] allQNames = getDefinedIndexes(context.getBroker(), docs);
            if (allQNames.length > 0) {
                occur = context.getBroker().getValueIndex().scanIndexKeys(docs, nodes, allQNames, indexable);
            // Also check if there's an index defined by path
            ValueOccurrences[] occur2 = context.getBroker().getValueIndex().scanIndexKeys(docs, nodes, indexable);
            // Merge the two results
            if (occur == null || occur.length == 0) {
                occur = occur2;
            } else {
                ValueOccurrences[] t = new ValueOccurrences[occur.length + occur2.length];
                System.arraycopy(occur, 0, t, 0, occur.length);
                System.arraycopy(occur2, 0, t, occur.length, occur2.length);
                occur = t;
            final int len = (max != -1 && occur.length > max ? max : occur.length);
            final Sequence[] params = new Sequence[2];
            ValueSequence data = new ValueSequence();
            for (int j = 0; j < len; j++) {
                params[0] = occur[j].getValue();
                data.add(new IntegerValue(occur[j].getOccurrences(), Type.UNSIGNED_INT));
                data.add(new IntegerValue(occur[j].getDocuments(), Type.UNSIGNED_INT));
                data.add(new IntegerValue(j + 1, Type.UNSIGNED_INT));
                params[1] = data;
                result.addAll(ref.evalFunction(Sequence.EMPTY_SEQUENCE, null, params));
    logger.debug("Returning: {}", result.getItemCount());
    return result;
Also used : OrderedValuesIndex(org.exist.indexing.OrderedValuesIndex) Occurrences(org.exist.util.Occurrences) ValueOccurrences(org.exist.util.ValueOccurrences) IndexWorker(org.exist.indexing.IndexWorker) Indexable( NodeSet(org.exist.dom.persistent.NodeSet) QName(org.exist.dom.QName) ValueOccurrences(org.exist.util.ValueOccurrences) DocumentSet(org.exist.dom.persistent.DocumentSet)

Example 2 with Occurrences

use of org.exist.util.Occurrences in project exist by eXist-db.

the class LuceneIndexWorker method scanIndexByQName.

private Occurrences[] scanIndexByQName(List<QName> qnames, DocumentSet docs, NodeSet nodes, String start, String end, long max) throws IOException {
    final TreeMap<String, Occurrences> map = new TreeMap<>();
    index.withReader(reader -> {
        for (QName qname : qnames) {
            String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols());
            doScanIndex(docs, nodes, start, end, max, map, reader, field);
        return null;
    Occurrences[] occur = new Occurrences[map.size()];
    return map.values().toArray(occur);
Also used : QName(org.exist.dom.QName) Occurrences(org.exist.util.Occurrences)

Example 3 with Occurrences

use of org.exist.util.Occurrences in project exist by eXist-db.

the class NativeStructuralIndexWorker method scanIndex.

 * Collect index statistics. Used by functions like util:index-keys.
 * @param context the xquery context
 * @param docs The documents to which the index entries belong
 * @param contextSet ignored by this index
 * @param hints Some "hints" for retrieving the index entries. See such hints in
 * {@link org.exist.indexing.OrderedValuesIndex} and {@link org.exist.indexing.QNamedKeysIndex}.
 * @return the matching occurrences
public Occurrences[] scanIndex(XQueryContext context, DocumentSet docs, NodeSet contextSet, Map hints) {
    final Map<String, Occurrences> occurrences = new TreeMap<>();
    for (final Iterator<DocumentImpl> i = docs.getDocumentIterator(); i.hasNext(); ) {
        final DocumentImpl doc =;
        final List<QName> qnames = getQNamesForDoc(doc);
        for (final QName qname : qnames) {
            final String name;
            if (qname.getNameType() == ElementValue.ATTRIBUTE) {
                name = "@" + qname.getLocalPart();
            } else {
                name = qname.getLocalPart();
            final byte[] fromKey = computeKey(qname.getNameType(), qname, doc.getDocId());
            final byte[] toKey = computeKey(qname.getNameType(), qname, doc.getDocId() + 1);
            final IndexQuery query = new IndexQuery(IndexQuery.RANGE, new Value(fromKey), new Value(toKey));
            try (final ManagedLock<ReentrantLock> btreeLock = index.lockManager.acquireBtreeReadLock(index.btree.getLockName())) {
                index.btree.query(query, (value, pointer) -> {
                    Occurrences oc = occurrences.get(name);
                    if (oc == null) {
                        oc = new Occurrences(name);
                        occurrences.put(name, oc);
                    } else {
                    return true;
            } catch (final LockException e) {
                NativeStructuralIndex.LOG.warn("Failed to lock structural index: {}", e.getMessage(), e);
            } catch (final Exception e) {
                NativeStructuralIndex.LOG.warn("Exception caught while reading structural index for document {}: {}", doc.getURI(), e.getMessage(), e);
    final Occurrences[] result = new Occurrences[occurrences.size()];
    int i = 0;
    for (Occurrences occ : occurrences.values()) {
        result[i++] = occ;
    return result;
Also used : ReentrantLock(java.util.concurrent.locks.ReentrantLock) IndexQuery( QName(org.exist.dom.QName) Occurrences(org.exist.util.Occurrences) DocumentImpl(org.exist.dom.persistent.DocumentImpl) PermissionDeniedException( LockException(org.exist.util.LockException) DatabaseConfigurationException(org.exist.util.DatabaseConfigurationException) LockException(org.exist.util.LockException) Value(

Example 4 with Occurrences

use of org.exist.util.Occurrences in project exist by eXist-db.

the class RangeIndexWorker method scanIndexByQName.

private Occurrences[] scanIndexByQName(List<QName> qnames, DocumentSet docs, NodeSet nodes, String start, String end, long max) throws IOException {
    return index.withReader(reader -> {
        TreeMap<String, Occurrences> map = new TreeMap<>();
        for (QName qname : qnames) {
            String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols());
            scan(docs, nodes, start, end, max, map, reader, field);
        Occurrences[] occur = new Occurrences[map.size()];
        return map.values().toArray(occur);
Also used : QName(org.exist.dom.QName) Occurrences(org.exist.util.Occurrences)

Example 5 with Occurrences

use of org.exist.util.Occurrences in project exist by eXist-db.

the class RangeIndexWorker method scan.

private void scan(DocumentSet docs, NodeSet nodes, String start, String end, long max, TreeMap<String, Occurrences> map, IndexReader reader, String field) throws IOException {
    List<AtomicReaderContext> leaves = reader.leaves();
    for (AtomicReaderContext context : leaves) {
        NumericDocValues docIdValues = context.reader().getNumericDocValues(FIELD_DOC_ID);
        BinaryDocValues nodeIdValues = context.reader().getBinaryDocValues(FIELD_NODE_ID);
        Bits liveDocs = context.reader().getLiveDocs();
        Terms terms = context.reader().terms(field);
        if (terms == null)
        TermsEnum termsIter = terms.iterator(null);
        if ( == null) {
        do {
            if (map.size() >= max) {
            BytesRef ref = termsIter.term();
            String term = ref.utf8ToString();
            boolean include = true;
            if (end != null) {
                if (term.compareTo(end) > 0)
                    include = false;
            } else if (start != null && !term.startsWith(start))
                include = false;
            if (include) {
                DocsEnum docsEnum =, null);
                while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    if (liveDocs != null && !liveDocs.get(docsEnum.docID())) {
                    int docId = (int) docIdValues.get(docsEnum.docID());
                    DocumentImpl storedDocument = docs.getDoc(docId);
                    if (storedDocument == null)
                    NodeId nodeId = null;
                    if (nodes != null) {
                        final BytesRef nodeIdRef = nodeIdValues.get(docsEnum.docID());
                        int units = ByteConversion.byteToShort(nodeIdRef.bytes, nodeIdRef.offset);
                        nodeId = index.getBrokerPool().getNodeFactory().createFromData(units, nodeIdRef.bytes, nodeIdRef.offset + 2);
                    if (nodeId == null || nodes.get(storedDocument, nodeId) != null) {
                        Occurrences oc = map.get(term);
                        if (oc == null) {
                            oc = new Occurrences(term);
                            map.put(term, oc);
        } while ( != null);
Also used : Occurrences(org.exist.util.Occurrences) DocumentImpl(org.exist.dom.persistent.DocumentImpl) NodeId(org.exist.numbering.NodeId) Bits(org.apache.lucene.util.Bits) BytesRef(org.apache.lucene.util.BytesRef)


Occurrences (org.exist.util.Occurrences)11 QName (org.exist.dom.QName)5 DocumentSet (org.exist.dom.persistent.DocumentSet)4 HashMap (java.util.HashMap)3 NodeSet (org.exist.dom.persistent.NodeSet)3 IndexWorker (org.exist.indexing.IndexWorker)3 OrderedValuesIndex (org.exist.indexing.OrderedValuesIndex)3 ValueOccurrences (org.exist.util.ValueOccurrences)3 DocumentImpl (org.exist.dom.persistent.DocumentImpl)2 Indexable ( XPathException (org.exist.xquery.XPathException)2 IntegerValue (org.exist.xquery.value.IntegerValue)2 Sequence (org.exist.xquery.value.Sequence)2 URISyntaxException ( ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 List (java.util.List)1 ReentrantLock (java.util.concurrent.locks.ReentrantLock)1 Stream ( Bits (org.apache.lucene.util.Bits)1