use of org.exist.util.Occurrences in project exist by eXist-db.
the class IndexKeys method eval.
/*
* (non-Javadoc)
*
* @see org.exist.xquery.BasicFunction#eval(org.exist.xquery.value.Sequence[],
* org.exist.xquery.value.Sequence)
*/
public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
if (args[0].isEmpty()) {
return Sequence.EMPTY_SEQUENCE;
}
NodeSet nodes = null;
DocumentSet docs = null;
Sequence qnames = null;
if (isCalledAs("index-keys-by-qname")) {
qnames = args[0];
docs = contextSequence == null ? context.getStaticallyKnownDocuments() : contextSequence.getDocumentSet();
} else {
nodes = args[0].toNodeSet();
docs = nodes.getDocumentSet();
}
final Sequence result = new ValueSequence();
try (final FunctionReference ref = (FunctionReference) args[2].itemAt(0)) {
int max = -1;
if (args[3].hasOne()) {
max = ((IntegerValue) args[3].itemAt(0)).getInt();
}
// if we have 5 arguments, query the user-specified index
if (this.getArgumentCount() == 5) {
final IndexWorker indexWorker = context.getBroker().getIndexController().getWorkerByIndexName(args[4].itemAt(0).getStringValue());
// IndexWorker indexWorker = context.getBroker().getBrokerPool().getIndexManager().getIndexByName(args[4].itemAt(0).getStringValue()).getWorker();
if (indexWorker == null) {
throw new XPathException(this, "Unknown index: " + args[4].itemAt(0).getStringValue());
}
final Map<String, Object> hints = new HashMap<>();
if (max != -1) {
hints.put(IndexWorker.VALUE_COUNT, new IntegerValue(max));
}
if (indexWorker instanceof OrderedValuesIndex) {
hints.put(OrderedValuesIndex.START_VALUE, args[1].getStringValue());
} else {
logger.warn("{} isn't an instance of org.exist.indexing.OrderedValuesIndex. Start value '{}' ignored.", indexWorker.getClass().getName(), args[1]);
}
if (qnames != null) {
final List<QName> qnameList = new ArrayList<>(qnames.getItemCount());
for (final SequenceIterator i = qnames.iterate(); i.hasNext(); ) {
final QNameValue qv = (QNameValue) i.nextItem();
qnameList.add(qv.getQName());
}
hints.put(QNamedKeysIndex.QNAMES_KEY, qnameList);
}
final Occurrences[] occur = indexWorker.scanIndex(context, docs, nodes, hints);
// TODO : add an extra argument to pass the END_VALUE ?
final int len = (max != -1 && occur.length > max ? max : occur.length);
final Sequence[] params = new Sequence[2];
ValueSequence data = new ValueSequence();
for (int j = 0; j < len; j++) {
params[0] = new StringValue(occur[j].getTerm().toString());
data.add(new IntegerValue(occur[j].getOccurrences(), Type.UNSIGNED_INT));
data.add(new IntegerValue(occur[j].getDocuments(), Type.UNSIGNED_INT));
data.add(new IntegerValue(j + 1, Type.UNSIGNED_INT));
params[1] = data;
result.addAll(ref.evalFunction(Sequence.EMPTY_SEQUENCE, null, params));
data.clear();
}
// no index specified: use the range index
} else {
final Indexable indexable = (Indexable) args[1].itemAt(0);
ValueOccurrences[] occur = null;
// First check for indexes defined on qname
final QName[] allQNames = getDefinedIndexes(context.getBroker(), docs);
if (allQNames.length > 0) {
occur = context.getBroker().getValueIndex().scanIndexKeys(docs, nodes, allQNames, indexable);
}
// Also check if there's an index defined by path
ValueOccurrences[] occur2 = context.getBroker().getValueIndex().scanIndexKeys(docs, nodes, indexable);
// Merge the two results
if (occur == null || occur.length == 0) {
occur = occur2;
} else {
ValueOccurrences[] t = new ValueOccurrences[occur.length + occur2.length];
System.arraycopy(occur, 0, t, 0, occur.length);
System.arraycopy(occur2, 0, t, occur.length, occur2.length);
occur = t;
}
final int len = (max != -1 && occur.length > max ? max : occur.length);
final Sequence[] params = new Sequence[2];
ValueSequence data = new ValueSequence();
for (int j = 0; j < len; j++) {
params[0] = occur[j].getValue();
data.add(new IntegerValue(occur[j].getOccurrences(), Type.UNSIGNED_INT));
data.add(new IntegerValue(occur[j].getDocuments(), Type.UNSIGNED_INT));
data.add(new IntegerValue(j + 1, Type.UNSIGNED_INT));
params[1] = data;
result.addAll(ref.evalFunction(Sequence.EMPTY_SEQUENCE, null, params));
data.clear();
}
}
}
logger.debug("Returning: {}", result.getItemCount());
return result;
}
use of org.exist.util.Occurrences in project exist by eXist-db.
the class LuceneIndexWorker method scanIndexByQName.
private Occurrences[] scanIndexByQName(List<QName> qnames, DocumentSet docs, NodeSet nodes, String start, String end, long max) throws IOException {
final TreeMap<String, Occurrences> map = new TreeMap<>();
index.withReader(reader -> {
for (QName qname : qnames) {
String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols());
doScanIndex(docs, nodes, start, end, max, map, reader, field);
}
return null;
});
Occurrences[] occur = new Occurrences[map.size()];
return map.values().toArray(occur);
}
use of org.exist.util.Occurrences in project exist by eXist-db.
the class NativeStructuralIndexWorker method scanIndex.
/**
* Collect index statistics. Used by functions like util:index-keys.
*
* @param context the xquery context
* @param docs The documents to which the index entries belong
* @param contextSet ignored by this index
* @param hints Some "hints" for retrieving the index entries. See such hints in
* {@link org.exist.indexing.OrderedValuesIndex} and {@link org.exist.indexing.QNamedKeysIndex}.
* @return the matching occurrences
*/
public Occurrences[] scanIndex(XQueryContext context, DocumentSet docs, NodeSet contextSet, Map hints) {
final Map<String, Occurrences> occurrences = new TreeMap<>();
for (final Iterator<DocumentImpl> i = docs.getDocumentIterator(); i.hasNext(); ) {
final DocumentImpl doc = i.next();
final List<QName> qnames = getQNamesForDoc(doc);
for (final QName qname : qnames) {
final String name;
if (qname.getNameType() == ElementValue.ATTRIBUTE) {
name = "@" + qname.getLocalPart();
} else {
name = qname.getLocalPart();
}
final byte[] fromKey = computeKey(qname.getNameType(), qname, doc.getDocId());
final byte[] toKey = computeKey(qname.getNameType(), qname, doc.getDocId() + 1);
final IndexQuery query = new IndexQuery(IndexQuery.RANGE, new Value(fromKey), new Value(toKey));
try (final ManagedLock<ReentrantLock> btreeLock = index.lockManager.acquireBtreeReadLock(index.btree.getLockName())) {
index.btree.query(query, (value, pointer) -> {
Occurrences oc = occurrences.get(name);
if (oc == null) {
oc = new Occurrences(name);
occurrences.put(name, oc);
oc.addDocument(doc);
oc.addOccurrences(1);
} else {
oc.addOccurrences(1);
oc.addDocument(doc);
}
return true;
});
} catch (final LockException e) {
NativeStructuralIndex.LOG.warn("Failed to lock structural index: {}", e.getMessage(), e);
} catch (final Exception e) {
NativeStructuralIndex.LOG.warn("Exception caught while reading structural index for document {}: {}", doc.getURI(), e.getMessage(), e);
}
}
}
final Occurrences[] result = new Occurrences[occurrences.size()];
int i = 0;
for (Occurrences occ : occurrences.values()) {
result[i++] = occ;
}
return result;
}
use of org.exist.util.Occurrences in project exist by eXist-db.
the class RangeIndexWorker method scanIndexByQName.
private Occurrences[] scanIndexByQName(List<QName> qnames, DocumentSet docs, NodeSet nodes, String start, String end, long max) throws IOException {
return index.withReader(reader -> {
TreeMap<String, Occurrences> map = new TreeMap<>();
for (QName qname : qnames) {
String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols());
scan(docs, nodes, start, end, max, map, reader, field);
}
Occurrences[] occur = new Occurrences[map.size()];
return map.values().toArray(occur);
});
}
use of org.exist.util.Occurrences in project exist by eXist-db.
the class RangeIndexWorker method scan.
private void scan(DocumentSet docs, NodeSet nodes, String start, String end, long max, TreeMap<String, Occurrences> map, IndexReader reader, String field) throws IOException {
List<AtomicReaderContext> leaves = reader.leaves();
for (AtomicReaderContext context : leaves) {
NumericDocValues docIdValues = context.reader().getNumericDocValues(FIELD_DOC_ID);
BinaryDocValues nodeIdValues = context.reader().getBinaryDocValues(FIELD_NODE_ID);
Bits liveDocs = context.reader().getLiveDocs();
Terms terms = context.reader().terms(field);
if (terms == null)
continue;
TermsEnum termsIter = terms.iterator(null);
if (termsIter.next() == null) {
continue;
}
do {
if (map.size() >= max) {
break;
}
BytesRef ref = termsIter.term();
String term = ref.utf8ToString();
boolean include = true;
if (end != null) {
if (term.compareTo(end) > 0)
include = false;
} else if (start != null && !term.startsWith(start))
include = false;
if (include) {
DocsEnum docsEnum = termsIter.docs(null, null);
while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
if (liveDocs != null && !liveDocs.get(docsEnum.docID())) {
continue;
}
int docId = (int) docIdValues.get(docsEnum.docID());
DocumentImpl storedDocument = docs.getDoc(docId);
if (storedDocument == null)
continue;
NodeId nodeId = null;
if (nodes != null) {
final BytesRef nodeIdRef = nodeIdValues.get(docsEnum.docID());
int units = ByteConversion.byteToShort(nodeIdRef.bytes, nodeIdRef.offset);
nodeId = index.getBrokerPool().getNodeFactory().createFromData(units, nodeIdRef.bytes, nodeIdRef.offset + 2);
}
if (nodeId == null || nodes.get(storedDocument, nodeId) != null) {
Occurrences oc = map.get(term);
if (oc == null) {
oc = new Occurrences(term);
map.put(term, oc);
}
oc.addDocument(storedDocument);
oc.addOccurrences(docsEnum.freq());
}
}
}
} while (termsIter.next() != null);
}
}
Aggregations