use of org.exist.stax.IEmbeddedXMLStreamReader in project exist by eXist-db.
the class ElementImpl method getChildren.
private void getChildren(final boolean includeAttributes, final org.exist.dom.NodeListImpl childList) {
try (final DBBroker broker = ownerDocument.getBrokerPool().getBroker()) {
final int thisLevel = nodeId.getTreeLevel();
final int childLevel = thisLevel + 1;
for (final IEmbeddedXMLStreamReader reader = broker.getXMLStreamReader(this, includeAttributes); reader.hasNext(); ) {
final int status = reader.next();
final NodeId otherId = (NodeId) reader.getProperty(ExtendedXMLStreamReader.PROPERTY_NODE_ID);
final int otherLevel = otherId.getTreeLevel();
// skip descendants
if (otherLevel > childLevel) {
continue;
}
if (status == XMLStreamConstants.END_ELEMENT) {
if (otherLevel == thisLevel) {
// exit-for
break;
}
// skip over any other END_ELEMENT(s)
} else {
if (otherLevel == childLevel) {
// child
childList.add(reader.getNode());
}
}
}
} catch (final IOException | XMLStreamException | EXistException e) {
LOG.warn("Internal error while reading child nodes: {}", e.getMessage(), e);
}
}
use of org.exist.stax.IEmbeddedXMLStreamReader in project exist by eXist-db.
the class NodeProxy method directSelectAttribute.
@Override
public NodeSet directSelectAttribute(final DBBroker broker, final NodeTest test, final int contextId) {
if (nodeType != UNKNOWN_NODE_TYPE && nodeType != Node.ELEMENT_NODE) {
return NodeSet.EMPTY_SET;
}
try {
NewArrayNodeSet result = null;
final IEmbeddedXMLStreamReader reader = broker.getXMLStreamReader(this, true);
int status = reader.next();
if (status != XMLStreamReader.START_ELEMENT) {
return NodeSet.EMPTY_SET;
}
final int attrs = reader.getAttributeCount();
for (int i = 0; i < attrs; i++) {
status = reader.next();
if (status != XMLStreamReader.ATTRIBUTE) {
break;
}
final AttrImpl attr = (AttrImpl) reader.getNode();
if (test.matches(attr)) {
final NodeProxy child = new NodeProxy(attr);
if (Expression.NO_CONTEXT_ID != contextId) {
child.addContextNode(contextId, this);
} else {
child.copyContext(this);
}
if (!test.isWildcardTest()) {
return child;
}
if (result == null) {
result = new NewArrayNodeSet();
}
result.add(child);
}
}
return result == null ? NodeSet.EMPTY_SET : result;
} catch (final IOException | XMLStreamException e) {
throw new RuntimeException(e.getMessage(), e);
}
}
use of org.exist.stax.IEmbeddedXMLStreamReader in project exist by eXist-db.
the class LuceneMatchListener method scanMatches.
private void scanMatches(final NodeProxy p) {
// Collect the text content of all descendants of p.
// Remember the start offsets of the text nodes for later use.
final NodePath path = getPath(p);
final LuceneIndexConfig idxConf = config.getConfig(path).next();
final TextExtractor extractor = new DefaultTextExtractor();
extractor.configure(config, idxConf);
final OffsetList offsets = new OffsetList();
int level = 0;
int textOffset = 0;
try {
final IEmbeddedXMLStreamReader reader = broker.getXMLStreamReader(p, false);
while (reader.hasNext()) {
final int ev = reader.next();
switch(ev) {
case XMLStreamConstants.END_ELEMENT:
if (--level < 0) {
break;
}
// call extractor.endElement unless this is the root of the current fragment
if (level > 0) {
textOffset += extractor.endElement(reader.getQName());
}
break;
case XMLStreamConstants.START_ELEMENT:
// call extractor.startElement unless this is the root of the current fragment
if (level > 0) {
textOffset += extractor.startElement(reader.getQName());
}
++level;
break;
case XMLStreamConstants.CHARACTERS:
final NodeId nodeId = (NodeId) reader.getProperty(ExtendedXMLStreamReader.PROPERTY_NODE_ID);
textOffset += extractor.beforeCharacters();
offsets.add(textOffset, nodeId);
textOffset += extractor.characters(reader.getXMLText());
break;
}
}
} catch (final IOException | XMLStreamException e) {
LOG.warn("Problem found while serializing XML: {}", e.getMessage(), e);
}
// Retrieve the Analyzer for the NodeProxy that was used for
// indexing and querying.
Analyzer analyzer = idxConf.getAnalyzer();
if (analyzer == null) {
// Otherwise use system default Lucene analyzer (from conf.xml)
// to tokenize the text and find matching query terms.
analyzer = index.getDefaultAnalyzer();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Analyzer: {} for path: {}", analyzer, path);
}
final String str = extractor.getText().toString();
try (final Reader reader = new StringReader(str);
final TokenStream tokenStream = analyzer.tokenStream(null, reader)) {
tokenStream.reset();
final MarkableTokenFilter stream = new MarkableTokenFilter(tokenStream);
while (stream.incrementToken()) {
String text = stream.getAttribute(CharTermAttribute.class).toString();
final Query query = termMap.get(text);
if (query != null) {
// single words which may also occur elsewhere in the document
if (query instanceof PhraseQuery) {
final PhraseQuery phraseQuery = (PhraseQuery) query;
final Term[] terms = phraseQuery.getTerms();
if (text.equals(terms[0].text())) {
// Scan the following text and collect tokens to see
// if they are part of the phrase.
stream.mark();
int t = 1;
final List<State> stateList = new ArrayList<>(terms.length);
stateList.add(stream.captureState());
while (stream.incrementToken() && t < terms.length) {
text = stream.getAttribute(CharTermAttribute.class).toString();
if (text.equals(terms[t].text())) {
stateList.add(stream.captureState());
if (++t == terms.length) {
break;
}
} else {
// stream.reset();
break;
}
}
if (stateList.size() == terms.length) {
// we indeed have a phrase match. record the offsets of its terms.
int lastIdx = -1;
for (int i = 0; i < terms.length; i++) {
stream.restoreState(stateList.get(i));
final OffsetAttribute offsetAttr = stream.getAttribute(OffsetAttribute.class);
final int idx = offsets.getIndex(offsetAttr.startOffset());
final NodeId nodeId = offsets.ids[idx];
final Offset offset = nodesWithMatch.get(nodeId);
if (offset != null) {
if (lastIdx == idx) {
offset.setEndOffset(offsetAttr.endOffset() - offsets.offsets[idx]);
} else {
offset.add(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]);
}
} else {
nodesWithMatch.put(nodeId, new Offset(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]));
}
lastIdx = idx;
}
}
}
// End of phrase handling
} else {
final OffsetAttribute offsetAttr = stream.getAttribute(OffsetAttribute.class);
final int idx = offsets.getIndex(offsetAttr.startOffset());
final NodeId nodeId = offsets.ids[idx];
final Offset offset = nodesWithMatch.get(nodeId);
if (offset != null) {
offset.add(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]);
} else {
nodesWithMatch.put(nodeId, new Offset(offsetAttr.startOffset() - offsets.offsets[idx], offsetAttr.endOffset() - offsets.offsets[idx]));
}
}
}
}
} catch (final IOException e) {
LOG.warn("Problem found while serializing XML: {}", e.getMessage(), e);
}
}
use of org.exist.stax.IEmbeddedXMLStreamReader in project exist by eXist-db.
the class NativeBroker method getXMLStreamReader.
@Override
public IEmbeddedXMLStreamReader getXMLStreamReader(final NodeHandle node, final boolean reportAttributes) throws IOException, XMLStreamException {
if (streamReader == null) {
final RawNodeIterator iterator = new RawNodeIterator(this, domDb, node);
streamReader = new EmbeddedXMLStreamReader(this, node.getOwnerDocument(), iterator, node, reportAttributes);
} else {
streamReader.reposition(this, node, reportAttributes);
}
return streamReader;
}
use of org.exist.stax.IEmbeddedXMLStreamReader in project exist by eXist-db.
the class GetFragmentBetween method getFragmentBetween.
/**
* Fetch the fragment between two nodes (normally milestones) in an XML document
*
* @param node1 first node from which down to the node node2 the XML fragment is delivered as a string
* @param node2 the node to which down the XML fragment is delivered as a string
*
* @return fragment between the two nodes
*
* @throws XPathException
*/
private StringBuilder getFragmentBetween(final Node node1, final Optional<Node> node2) throws XPathException {
final StoredNode storedNode1 = (StoredNode) node1;
final Optional<StoredNode> storedNode2 = node2.map(n -> (StoredNode) n);
final NodeId node1NodeId = storedNode1.getNodeId();
final Optional<NodeId> node2NodeId = storedNode2.map(StoredNode::getNodeId);
final DocumentImpl docImpl = (DocumentImpl) node1.getOwnerDocument();
final StringBuilder resultFragment = new StringBuilder();
Optional<NodeId> actualNodeId = Optional.empty();
boolean getFragmentMode = false;
try {
final BrokerPool brokerPool = docImpl.getBrokerPool();
try (final DBBroker dbBroker = brokerPool.getBroker()) {
final NodeList children = docImpl.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
final StoredNode docChildStoredNode = (StoredNode) children.item(i);
final int docChildStoredNodeType = docChildStoredNode.getNodeType();
final IEmbeddedXMLStreamReader reader = dbBroker.getXMLStreamReader(docChildStoredNode, false);
while (reader.hasNext() && !node2NodeId.equals(actualNodeId) && docChildStoredNodeType != Node.PROCESSING_INSTRUCTION_NODE && docChildStoredNodeType != Node.COMMENT_NODE) {
final int status = reader.next();
switch(status) {
case XMLStreamReader.START_DOCUMENT:
case XMLStreamReader.END_DOCUMENT:
break;
case XMLStreamReader.START_ELEMENT:
actualNodeId = Optional.of(reader.getNode().getNodeId());
if (actualNodeId.map(node1NodeId::equals).orElse(false)) {
getFragmentMode = true;
}
if (actualNodeId.equals(node2NodeId)) {
getFragmentMode = false;
}
if (getFragmentMode) {
final String startElementTag = getStartElementTag(reader);
resultFragment.append(startElementTag);
}
break;
case XMLStreamReader.END_ELEMENT:
if (getFragmentMode) {
final String endElementTag = getEndElementTag(reader);
resultFragment.append(endElementTag);
}
break;
case XMLStreamReader.CHARACTERS:
if (getFragmentMode) {
final String characters = getCharacters(reader);
resultFragment.append(characters);
}
break;
case XMLStreamReader.CDATA:
if (getFragmentMode) {
final String cdata = getCDataTag(reader);
resultFragment.append(cdata);
}
break;
case XMLStreamReader.COMMENT:
if (getFragmentMode) {
final String comment = getCommentTag(reader);
resultFragment.append(comment);
}
break;
case XMLStreamReader.PROCESSING_INSTRUCTION:
if (getFragmentMode) {
final String piTag = getPITag(reader);
resultFragment.append(piTag);
}
break;
}
}
}
}
} catch (final EXistException | XMLStreamException | IOException e) {
throw new XPathException(this, "An error occurred while getFragmentBetween: " + e.getMessage(), e);
}
return resultFragment;
}
Aggregations