Search in sources :

Example 1 with IEmbeddedXMLStreamReader

use of org.exist.stax.IEmbeddedXMLStreamReader in project exist by eXist-db.

the class ElementImpl method getChildren.

private void getChildren(final boolean includeAttributes, final org.exist.dom.NodeListImpl childList) {
    try (final DBBroker broker = ownerDocument.getBrokerPool().getBroker()) {
        final int thisLevel = nodeId.getTreeLevel();
        final int childLevel = thisLevel + 1;
        for (final IEmbeddedXMLStreamReader reader = broker.getXMLStreamReader(this, includeAttributes); reader.hasNext(); ) {
            final int status = reader.next();
            final NodeId otherId = (NodeId) reader.getProperty(ExtendedXMLStreamReader.PROPERTY_NODE_ID);
            final int otherLevel = otherId.getTreeLevel();
            // skip descendants
            if (otherLevel > childLevel) {
                continue;
            }
            if (status == XMLStreamConstants.END_ELEMENT) {
                if (otherLevel == thisLevel) {
                    // exit-for
                    break;
                }
            // skip over any other END_ELEMENT(s)
            } else {
                if (otherLevel == childLevel) {
                    // child
                    childList.add(reader.getNode());
                }
            }
        }
    } catch (final IOException | XMLStreamException | EXistException e) {
        LOG.warn("Internal error while reading child nodes: {}", e.getMessage(), e);
    }
}
Also used : XMLStreamException(javax.xml.stream.XMLStreamException) NodeId(org.exist.numbering.NodeId) IOException(java.io.IOException) EXistException(org.exist.EXistException) IEmbeddedXMLStreamReader(org.exist.stax.IEmbeddedXMLStreamReader)

Example 2 with IEmbeddedXMLStreamReader

use of org.exist.stax.IEmbeddedXMLStreamReader in project exist by eXist-db.

the class NodeProxy method directSelectAttribute.

@Override
public NodeSet directSelectAttribute(final DBBroker broker, final NodeTest test, final int contextId) {
    if (nodeType != UNKNOWN_NODE_TYPE && nodeType != Node.ELEMENT_NODE) {
        return NodeSet.EMPTY_SET;
    }
    try {
        NewArrayNodeSet result = null;
        final IEmbeddedXMLStreamReader reader = broker.getXMLStreamReader(this, true);
        int status = reader.next();
        if (status != XMLStreamReader.START_ELEMENT) {
            return NodeSet.EMPTY_SET;
        }
        final int attrs = reader.getAttributeCount();
        for (int i = 0; i < attrs; i++) {
            status = reader.next();
            if (status != XMLStreamReader.ATTRIBUTE) {
                break;
            }
            final AttrImpl attr = (AttrImpl) reader.getNode();
            if (test.matches(attr)) {
                final NodeProxy child = new NodeProxy(attr);
                if (Expression.NO_CONTEXT_ID != contextId) {
                    child.addContextNode(contextId, this);
                } else {
                    child.copyContext(this);
                }
                if (!test.isWildcardTest()) {
                    return child;
                }
                if (result == null) {
                    result = new NewArrayNodeSet();
                }
                result.add(child);
            }
        }
        return result == null ? NodeSet.EMPTY_SET : result;
    } catch (final IOException | XMLStreamException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
}
Also used : XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) IEmbeddedXMLStreamReader(org.exist.stax.IEmbeddedXMLStreamReader)

Example 3 with IEmbeddedXMLStreamReader

use of org.exist.stax.IEmbeddedXMLStreamReader in project exist by eXist-db.

the class LuceneMatchListener method scanMatches.

private void scanMatches(final NodeProxy p) {
    // Collect the text content of all descendants of p.
    // Remember the start offsets of the text nodes for later use.
    final NodePath path = getPath(p);
    final LuceneIndexConfig idxConf = config.getConfig(path).next();
    final TextExtractor extractor = new DefaultTextExtractor();
    extractor.configure(config, idxConf);
    final OffsetList offsets = new OffsetList();
    int level = 0;
    int textOffset = 0;
    try {
        final IEmbeddedXMLStreamReader reader = broker.getXMLStreamReader(p, false);
        while (reader.hasNext()) {
            final int ev = reader.next();
            switch(ev) {
                case XMLStreamConstants.END_ELEMENT:
                    if (--level < 0) {
                        break;
                    }
                    // call extractor.endElement unless this is the root of the current fragment
                    if (level > 0) {
                        textOffset += extractor.endElement(reader.getQName());
                    }
                    break;
                case XMLStreamConstants.START_ELEMENT:
                    // call extractor.startElement unless this is the root of the current fragment
                    if (level > 0) {
                        textOffset += extractor.startElement(reader.getQName());
                    }
                    ++level;
                    break;
                case XMLStreamConstants.CHARACTERS:
                    final NodeId nodeId = (NodeId) reader.getProperty(ExtendedXMLStreamReader.PROPERTY_NODE_ID);
                    textOffset += extractor.beforeCharacters();
                    offsets.add(textOffset, nodeId);
                    textOffset += extractor.characters(reader.getXMLText());
                    break;
            }
        }
    } catch (final IOException | XMLStreamException e) {
        LOG.warn("Problem found while serializing XML: {}", e.getMessage(), e);
    }
    // Retrieve the Analyzer for the NodeProxy that was used for
    // indexing and querying.
    Analyzer analyzer = idxConf.getAnalyzer();
    if (analyzer == null) {
        // Otherwise use system default Lucene analyzer (from conf.xml)
        // to tokenize the text and find matching query terms.
        analyzer = index.getDefaultAnalyzer();
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Analyzer: {} for path: {}", analyzer, path);
    }
    final String str = extractor.getText().toString();
    try (final Reader reader = new StringReader(str);
        final TokenStream tokenStream = analyzer.tokenStream(null, reader)) {
        tokenStream.reset();
        final MarkableTokenFilter stream = new MarkableTokenFilter(tokenStream);
        while (stream.incrementToken()) {
            String text = stream.getAttribute(CharTermAttribute.class).toString();
            final Query query = termMap.get(text);
            if (query != null) {
                // single words which may also occur elsewhere in the document
                if (query instanceof PhraseQuery) {
                    final PhraseQuery phraseQuery = (PhraseQuery) query;
                    final Term[] terms = phraseQuery.getTerms();
                    if (text.equals(terms[0].text())) {
                        // Scan the following text and collect tokens to see
                        // if they are part of the phrase.
                        stream.mark();
                        int t = 1;
                        final List<State> stateList = new ArrayList<>(terms.length);
                        stateList.add(stream.captureState());
                        while (stream.incrementToken() && t < terms.length) {
                            text = stream.getAttribute(CharTermAttribute.class).toString();
                            if (text.equals(terms[t].text())) {
                                stateList.add(stream.captureState());
                                if (++t == terms.length) {
                                    break;
                                }
                            } else {
                                // stream.reset();
                                break;
                            }
                        }
                        if (stateList.size() == terms.length) {
                            // we indeed have a phrase match. record the offsets of its terms.
                            int lastIdx = -1;
                            for (int i = 0; i < terms.length; i++) {
                                stream.restoreState(stateList.get(i));
                                final OffsetAttribute offsetAttr = stream.getAttribute(OffsetAttribute.class);
                                final int idx = offsets.getIndex(offsetAttr.startOffset());
                                final NodeId nodeId = offsets.ids[idx];
                                final Offset offset = nodesWithMatch.get(nodeId);
                                if (offset != null) {
                                    if (lastIdx == idx) {
                                        offset.setEndOffset(offsetAttr.endOffset() - offsets.offsets[idx]);
                                    } else {
                                        offset.add(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]);
                                    }
                                } else {
                                    nodesWithMatch.put(nodeId, new Offset(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]));
                                }
                                lastIdx = idx;
                            }
                        }
                    }
                // End of phrase handling
                } else {
                    final OffsetAttribute offsetAttr = stream.getAttribute(OffsetAttribute.class);
                    final int idx = offsets.getIndex(offsetAttr.startOffset());
                    final NodeId nodeId = offsets.ids[idx];
                    final Offset offset = nodesWithMatch.get(nodeId);
                    if (offset != null) {
                        offset.add(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]);
                    } else {
                        nodesWithMatch.put(nodeId, new Offset(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]));
                    }
                }
            }
        }
    } catch (final IOException e) {
        LOG.warn("Problem found while serializing XML: {}", e.getMessage(), e);
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) IEmbeddedXMLStreamReader(org.exist.stax.IEmbeddedXMLStreamReader) ExtendedXMLStreamReader(org.exist.stax.ExtendedXMLStreamReader) Reader(java.io.Reader) StringReader(java.io.StringReader) IEmbeddedXMLStreamReader(org.exist.stax.IEmbeddedXMLStreamReader) Analyzer(org.apache.lucene.analysis.Analyzer) StringReader(java.io.StringReader) PhraseQuery(org.apache.lucene.search.PhraseQuery) IOException(java.io.IOException) Term(org.apache.lucene.index.Term) NodePath(org.exist.storage.NodePath) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) XMLStreamException(javax.xml.stream.XMLStreamException) State(org.apache.lucene.util.AttributeSource.State) NodeId(org.exist.numbering.NodeId) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute)

Example 4 with IEmbeddedXMLStreamReader

use of org.exist.stax.IEmbeddedXMLStreamReader in project exist by eXist-db.

the class NativeBroker method getXMLStreamReader.

@Override
public IEmbeddedXMLStreamReader getXMLStreamReader(final NodeHandle node, final boolean reportAttributes) throws IOException, XMLStreamException {
    if (streamReader == null) {
        final RawNodeIterator iterator = new RawNodeIterator(this, domDb, node);
        streamReader = new EmbeddedXMLStreamReader(this, node.getOwnerDocument(), iterator, node, reportAttributes);
    } else {
        streamReader.reposition(this, node, reportAttributes);
    }
    return streamReader;
}
Also used : RawNodeIterator(org.exist.storage.dom.RawNodeIterator) IEmbeddedXMLStreamReader(org.exist.stax.IEmbeddedXMLStreamReader) EmbeddedXMLStreamReader(org.exist.stax.EmbeddedXMLStreamReader)

Example 5 with IEmbeddedXMLStreamReader

use of org.exist.stax.IEmbeddedXMLStreamReader in project exist by eXist-db.

the class GetFragmentBetween method getFragmentBetween.

/**
 * Fetch the fragment between two nodes (normally milestones) in an XML document
 *
 * @param node1 first node from which down to the node node2 the XML fragment is delivered as a string
 * @param node2 the node to which down the XML fragment is delivered as a string
 *
 * @return fragment between the two nodes
 *
 * @throws XPathException
 */
private StringBuilder getFragmentBetween(final Node node1, final Optional<Node> node2) throws XPathException {
    final StoredNode storedNode1 = (StoredNode) node1;
    final Optional<StoredNode> storedNode2 = node2.map(n -> (StoredNode) n);
    final NodeId node1NodeId = storedNode1.getNodeId();
    final Optional<NodeId> node2NodeId = storedNode2.map(StoredNode::getNodeId);
    final DocumentImpl docImpl = (DocumentImpl) node1.getOwnerDocument();
    final StringBuilder resultFragment = new StringBuilder();
    Optional<NodeId> actualNodeId = Optional.empty();
    boolean getFragmentMode = false;
    try {
        final BrokerPool brokerPool = docImpl.getBrokerPool();
        try (final DBBroker dbBroker = brokerPool.getBroker()) {
            final NodeList children = docImpl.getChildNodes();
            for (int i = 0; i < children.getLength(); i++) {
                final StoredNode docChildStoredNode = (StoredNode) children.item(i);
                final int docChildStoredNodeType = docChildStoredNode.getNodeType();
                final IEmbeddedXMLStreamReader reader = dbBroker.getXMLStreamReader(docChildStoredNode, false);
                while (reader.hasNext() && !node2NodeId.equals(actualNodeId) && docChildStoredNodeType != Node.PROCESSING_INSTRUCTION_NODE && docChildStoredNodeType != Node.COMMENT_NODE) {
                    final int status = reader.next();
                    switch(status) {
                        case XMLStreamReader.START_DOCUMENT:
                        case XMLStreamReader.END_DOCUMENT:
                            break;
                        case XMLStreamReader.START_ELEMENT:
                            actualNodeId = Optional.of(reader.getNode().getNodeId());
                            if (actualNodeId.map(node1NodeId::equals).orElse(false)) {
                                getFragmentMode = true;
                            }
                            if (actualNodeId.equals(node2NodeId)) {
                                getFragmentMode = false;
                            }
                            if (getFragmentMode) {
                                final String startElementTag = getStartElementTag(reader);
                                resultFragment.append(startElementTag);
                            }
                            break;
                        case XMLStreamReader.END_ELEMENT:
                            if (getFragmentMode) {
                                final String endElementTag = getEndElementTag(reader);
                                resultFragment.append(endElementTag);
                            }
                            break;
                        case XMLStreamReader.CHARACTERS:
                            if (getFragmentMode) {
                                final String characters = getCharacters(reader);
                                resultFragment.append(characters);
                            }
                            break;
                        case XMLStreamReader.CDATA:
                            if (getFragmentMode) {
                                final String cdata = getCDataTag(reader);
                                resultFragment.append(cdata);
                            }
                            break;
                        case XMLStreamReader.COMMENT:
                            if (getFragmentMode) {
                                final String comment = getCommentTag(reader);
                                resultFragment.append(comment);
                            }
                            break;
                        case XMLStreamReader.PROCESSING_INSTRUCTION:
                            if (getFragmentMode) {
                                final String piTag = getPITag(reader);
                                resultFragment.append(piTag);
                            }
                            break;
                    }
                }
            }
        }
    } catch (final EXistException | XMLStreamException | IOException e) {
        throw new XPathException(this, "An error occurred while getFragmentBetween: " + e.getMessage(), e);
    }
    return resultFragment;
}
Also used : NodeList(org.w3c.dom.NodeList) EXistException(org.exist.EXistException) IOException(java.io.IOException) IEmbeddedXMLStreamReader(org.exist.stax.IEmbeddedXMLStreamReader) DocumentImpl(org.exist.dom.persistent.DocumentImpl) DBBroker(org.exist.storage.DBBroker) XMLStreamException(javax.xml.stream.XMLStreamException) NodeId(org.exist.numbering.NodeId) BrokerPool(org.exist.storage.BrokerPool) StoredNode(org.exist.dom.persistent.StoredNode)

Aggregations

IEmbeddedXMLStreamReader (org.exist.stax.IEmbeddedXMLStreamReader)9 IOException (java.io.IOException)8 XMLStreamException (javax.xml.stream.XMLStreamException)8 NodeId (org.exist.numbering.NodeId)6 EXistException (org.exist.EXistException)5 DBBroker (org.exist.storage.DBBroker)4 Reader (java.io.Reader)1 StringReader (java.io.StringReader)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)1 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)1 Term (org.apache.lucene.index.Term)1 PhraseQuery (org.apache.lucene.search.PhraseQuery)1 Query (org.apache.lucene.search.Query)1 State (org.apache.lucene.util.AttributeSource.State)1 DocumentImpl (org.exist.dom.persistent.DocumentImpl)1 StoredNode (org.exist.dom.persistent.StoredNode)1 EmbeddedXMLStreamReader (org.exist.stax.EmbeddedXMLStreamReader)1 ExtendedXMLStreamReader (org.exist.stax.ExtendedXMLStreamReader)1