Search in sources :

Example 1 with InMemoryNodeSet

use of org.exist.dom.memtree.InMemoryNodeSet in project exist by eXist-db.

the class LocationStep method applyPredicate.

/**
 * The method <code>applyPredicate</code>
 *
 * @param outerSequence   a <code>Sequence</code> value
 * @param contextSequence a <code>Sequence</code> value
 * @return a <code>Sequence</code> value
 * @throws XPathException if an error occurs
 */
private Sequence applyPredicate(Sequence outerSequence, final Sequence contextSequence) throws XPathException {
    if (contextSequence == null) {
        return Sequence.EMPTY_SEQUENCE;
    }
    if (predicates == null || !applyPredicate || (!(contextSequence instanceof VirtualNodeSet) && contextSequence.isEmpty())) // Nothing to apply
    {
        return contextSequence;
    }
    Sequence result;
    final Predicate pred = predicates[0];
    // is required.
    if (abbreviatedStep && (pred.getExecutionMode() == Predicate.ExecutionMode.POSITIONAL || !contextSequence.isPersistentSet())) {
        result = new ValueSequence();
        ((ValueSequence) result).keepUnOrdered(unordered);
        if (contextSequence.isPersistentSet()) {
            final NodeSet contextSet = contextSequence.toNodeSet();
            outerSequence = contextSet.getParents(-1);
            for (final SequenceIterator i = outerSequence.iterate(); i.hasNext(); ) {
                final NodeValue node = (NodeValue) i.nextItem();
                final Sequence newContextSeq = contextSet.selectParentChild((NodeSet) node, NodeSet.DESCENDANT, getExpressionId());
                final Sequence temp = processPredicate(outerSequence, newContextSeq);
                result.addAll(temp);
            }
        } else {
            final MemoryNodeSet nodes = contextSequence.toMemNodeSet();
            outerSequence = nodes.getParents(new AnyNodeTest());
            for (final SequenceIterator i = outerSequence.iterate(); i.hasNext(); ) {
                final NodeValue node = (NodeValue) i.nextItem();
                final InMemoryNodeSet newSet = new InMemoryNodeSet();
                ((NodeImpl) node).selectChildren(test, newSet);
                final Sequence temp = processPredicate(outerSequence, newSet);
                result.addAll(temp);
            }
        }
    } else {
        result = processPredicate(outerSequence, contextSequence);
    }
    return result;
}
Also used : InMemoryNodeSet(org.exist.dom.memtree.InMemoryNodeSet) InMemoryNodeSet(org.exist.dom.memtree.InMemoryNodeSet) NodeImpl(org.exist.dom.memtree.NodeImpl) InMemoryNodeSet(org.exist.dom.memtree.InMemoryNodeSet)

Example 2 with InMemoryNodeSet

use of org.exist.dom.memtree.InMemoryNodeSet in project exist by eXist-db.

the class Expand method eval.

public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
    if (args[0].isEmpty()) {
        return Sequence.EMPTY_SEQUENCE;
    }
    // apply serialization options set on the XQuery context
    final Properties serializeOptions = new Properties();
    serializeOptions.setProperty(EXistOutputKeys.EXPAND_XINCLUDES, "yes");
    serializeOptions.setProperty(EXistOutputKeys.HIGHLIGHT_MATCHES, "elements");
    if (getArgumentCount() == 2) {
        final String serOpts = args[1].getStringValue();
        final String[] contents = Option.tokenize(serOpts);
        for (String content : contents) {
            final String[] pair = Option.parseKeyValuePair(content);
            if (pair == null) {
                throw new XPathException(this, "Found invalid serialization option: " + content);
            }
            logger.debug("Setting serialization property: {} = {}", pair[0], pair[1]);
            serializeOptions.setProperty(pair[0], pair[1]);
        }
    } else {
        context.checkOptions(serializeOptions);
    }
    context.pushDocumentContext();
    try {
        final InMemoryNodeSet result = new InMemoryNodeSet();
        final MemTreeBuilder builder = new MemTreeBuilder(getContext());
        final DocumentBuilderReceiver receiver = new DocumentBuilderReceiver(builder, true);
        int attrNr = -1;
        for (final SequenceIterator i = args[0].iterate(); i.hasNext(); ) {
            final NodeValue next = (NodeValue) i.nextItem();
            final short nodeType = ((INodeHandle) next).getNodeType();
            builder.startDocument();
            if (nodeType == Node.ATTRIBUTE_NODE) {
                // NOTE: Attributes nodes need special handling as they cannot be directly serialized via SAX to a ContentHandler
                final Attr attr = (Attr) next.getNode();
                String ns = attr.getNamespaceURI();
                if (ns == null || ns.isEmpty()) {
                    ns = XMLConstants.NULL_NS_URI;
                }
                attrNr = builder.addAttribute(new QName(attr.getLocalName(), ns), attr.getValue());
            } else {
                next.toSAX(context.getBroker(), receiver, serializeOptions);
            }
            builder.endDocument();
            if (Node.DOCUMENT_NODE == nodeType) {
                result.add(builder.getDocument());
            } else if (Node.ATTRIBUTE_NODE == nodeType) {
                result.add(builder.getDocument().getAttribute(attrNr));
            } else {
                result.add(builder.getDocument().getNode(1));
            }
            builder.reset(getContext());
        }
        return result;
    } catch (final SAXException e) {
        throw new XPathException(this, e);
    } finally {
        context.popDocumentContext();
    }
}
Also used : NodeValue(org.exist.xquery.value.NodeValue) XPathException(org.exist.xquery.XPathException) INodeHandle(org.exist.dom.INodeHandle) QName(org.exist.dom.QName) InMemoryNodeSet(org.exist.dom.memtree.InMemoryNodeSet) Properties(java.util.Properties) DocumentBuilderReceiver(org.exist.dom.memtree.DocumentBuilderReceiver) Attr(org.w3c.dom.Attr) SAXException(org.xml.sax.SAXException) MemTreeBuilder(org.exist.dom.memtree.MemTreeBuilder) SequenceIterator(org.exist.xquery.value.SequenceIterator)

Example 3 with InMemoryNodeSet

use of org.exist.dom.memtree.InMemoryNodeSet in project exist by eXist-db.

the class Field method highlightMatches.

/**
 * Highlight matches in field content using the analyzer defined for the field.
 *
 * @param fieldName the name of the field
 * @param proxy node on which the field is defined
 * @param match the lucene match attached to the node
 * @param text the content of the field
 * @return a sequence of exist:field elements containing the field content with matches enclosed in exist:match
 * @throws XPathException in case of error
 * @throws IOException in case of a lucene error
 */
private Sequence highlightMatches(final String fieldName, final NodeProxy proxy, final LuceneMatch match, final Sequence text) throws XPathException, IOException {
    final LuceneIndexWorker index = (LuceneIndexWorker) context.getBroker().getIndexController().getWorkerByIndexId(LuceneIndex.ID);
    final Map<Object, Query> terms = index.getTerms(match.getQuery());
    final NodePath path = LuceneMatchListener.getPath(proxy);
    final LuceneConfig config = index.getLuceneConfig(context.getBroker(), proxy.getDocumentSet());
    LuceneIndexConfig idxConf = config.getConfig(path).next();
    if (idxConf == null) {
        // no lucene index: no fields to highlight
        return Sequence.EMPTY_SEQUENCE;
    }
    final Analyzer analyzer = idxConf.getAnalyzer();
    context.pushDocumentContext();
    try {
        final MemTreeBuilder builder = context.getDocumentBuilder();
        builder.startDocument();
        final InMemoryNodeSet result = new InMemoryNodeSet(text.getItemCount());
        for (final SequenceIterator si = text.iterate(); si.hasNext(); ) {
            final int nodeNr = builder.startElement(Namespaces.EXIST_NS, "field", "exist:field", null);
            final String content = si.nextItem().getStringValue();
            int currentPos = 0;
            try (final Reader reader = new StringReader(content);
                final TokenStream tokenStream = analyzer.tokenStream(fieldName, reader)) {
                tokenStream.reset();
                final MarkableTokenFilter stream = new MarkableTokenFilter(tokenStream);
                while (stream.incrementToken()) {
                    String token = stream.getAttribute(CharTermAttribute.class).toString();
                    final Query query = terms.get(token);
                    if (query != null) {
                        if (match.getQuery() instanceof PhraseQuery) {
                            final Term[] phraseTerms = ((PhraseQuery) match.getQuery()).getTerms();
                            if (token.equals(phraseTerms[0].text())) {
                                // Scan the following text and collect tokens to see
                                // if they are part of the phrase.
                                stream.mark();
                                int t = 1;
                                OffsetAttribute offset = stream.getAttribute(OffsetAttribute.class);
                                final int startOffset = offset.startOffset();
                                int endOffset = offset.endOffset();
                                while (stream.incrementToken() && t < phraseTerms.length) {
                                    token = stream.getAttribute(CharTermAttribute.class).toString();
                                    if (token.equals(phraseTerms[t].text())) {
                                        offset = stream.getAttribute(OffsetAttribute.class);
                                        endOffset = offset.endOffset();
                                        t++;
                                        if (t == phraseTerms.length) {
                                            break;
                                        }
                                    } else {
                                        break;
                                    }
                                }
                                if (t == phraseTerms.length) {
                                    if (currentPos < startOffset) {
                                        builder.characters(content.substring(currentPos, startOffset));
                                    }
                                    builder.startElement(Namespaces.EXIST_NS, "match", "exist:match", null);
                                    builder.characters(content.substring(startOffset, endOffset));
                                    builder.endElement();
                                    currentPos = endOffset;
                                }
                            }
                        // End of phrase handling
                        } else {
                            final OffsetAttribute offset = stream.getAttribute(OffsetAttribute.class);
                            if (currentPos < offset.startOffset()) {
                                builder.characters(content.substring(currentPos, offset.startOffset()));
                            }
                            builder.startElement(Namespaces.EXIST_NS, "match", "exist:match", null);
                            builder.characters(content.substring(offset.startOffset(), offset.endOffset()));
                            builder.endElement();
                            currentPos = offset.endOffset();
                        }
                    }
                }
            }
            if (currentPos < content.length() - 1) {
                builder.characters(content.substring(currentPos));
            }
            builder.endElement();
            result.add(builder.getDocument().getNode(nodeNr));
        }
        return result;
    } finally {
        context.popDocumentContext();
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) InMemoryNodeSet(org.exist.dom.memtree.InMemoryNodeSet) Reader(java.io.Reader) StringReader(java.io.StringReader) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) NodePath(org.exist.storage.NodePath) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) MemTreeBuilder(org.exist.dom.memtree.MemTreeBuilder) StringReader(java.io.StringReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute)

Aggregations

InMemoryNodeSet (org.exist.dom.memtree.InMemoryNodeSet)3 MemTreeBuilder (org.exist.dom.memtree.MemTreeBuilder)2 Reader (java.io.Reader)1 StringReader (java.io.StringReader)1 Properties (java.util.Properties)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)1 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)1 Term (org.apache.lucene.index.Term)1 PhraseQuery (org.apache.lucene.search.PhraseQuery)1 Query (org.apache.lucene.search.Query)1 INodeHandle (org.exist.dom.INodeHandle)1 QName (org.exist.dom.QName)1 DocumentBuilderReceiver (org.exist.dom.memtree.DocumentBuilderReceiver)1 NodeImpl (org.exist.dom.memtree.NodeImpl)1 NodePath (org.exist.storage.NodePath)1 XPathException (org.exist.xquery.XPathException)1 NodeValue (org.exist.xquery.value.NodeValue)1 SequenceIterator (org.exist.xquery.value.SequenceIterator)1