use of org.knime.core.data.xml.XMLValue in project knime-core by knime.
the class XMLXpathCellReader method readXML.
/**
* {@inheritDoc}
*/
@Override
public XMLValue<Document> readXML() throws IOException {
if (!m_xpathMatcher.nodeMatches()) {
return null;
}
try {
while (m_parser.hasNext()) {
switch(m_parser.getEventType()) {
case XMLStreamConstants.START_ELEMENT:
updateBasePath();
updateXmlSpaceDefinition();
updateXmlLangDefinition();
pushNamespaceContext();
for (int i = 0; i < m_docs.size(); i++) {
Element element = createElement(m_docs.get(i));
m_currNodes.get(i).appendChild(element);
m_currNodes.set(i, element);
}
boolean match = m_xpathMatcher.startElement(m_parser.getName());
if (match) {
Document doc = m_builder.newDocument();
Element element = createElement(doc);
m_namespaceStack.stream().flatMap(e -> e.stream()).forEach(p -> element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, p.getFirst(), p.getSecond()));
// (see: http://www.w3.org/XML/1998/namespace)
if (!m_base.isEmpty() && null != m_base.get(0)) {
element.setAttributeNS(XMLConstants.XML_NS_URI, "xml:base", m_base.get(0));
}
if (!m_space.isEmpty() && null != m_space.get(0)) {
element.setAttributeNS(XMLConstants.XML_NS_URI, "xml:space", m_space.get(0));
}
if (!m_lang.isEmpty() && null != m_lang.get(0)) {
element.setAttributeNS(XMLConstants.XML_NS_URI, "xml:lang", m_lang.get(0));
}
doc.appendChild(element);
m_docs.add(doc);
m_currNodes.add(element);
}
break;
case XMLStreamConstants.END_ELEMENT:
if (!m_reentrent) {
m_xpathMatcher.endElement();
if (!m_base.isEmpty()) {
m_base.remove(0);
}
if (!m_space.isEmpty()) {
m_space.remove(0);
}
if (!m_lang.isEmpty()) {
m_lang.remove(0);
}
}
for (int i = m_docs.size() - 1; i >= 0; i--) {
Node curr = m_currNodes.get(i);
Node first = m_docs.get(i).getFirstChild();
if (curr.isSameNode(first)) {
XMLValue<Document> cell = null;
cell = createDataCell(m_docs.get(i));
m_docs.remove(i);
m_currNodes.remove(i);
m_reentrent = true;
return cell;
} else {
m_currNodes.set(i, curr.getParentNode());
}
}
popNamespaceContext();
m_reentrent = false;
break;
case XMLStreamConstants.CHARACTERS:
if (!m_parser.isWhiteSpace() || (m_parser.isWhiteSpace() && !m_space.isEmpty() && null != m_space.get(0) && m_space.get(0).equals("preserve"))) {
for (int i = 0; i < m_docs.size(); i++) {
String str = m_parser.getText();
Text text = m_docs.get(i).createTextNode(str);
m_currNodes.get(i).appendChild(text);
}
}
break;
case XMLStreamConstants.SPACE:
if (!m_space.isEmpty() && null != m_space.get(0) && m_space.get(0).equals("preserve")) {
for (int i = 0; i < m_docs.size(); i++) {
String str = m_parser.getText();
Text text = m_docs.get(i).createTextNode(str);
m_currNodes.get(i).appendChild(text);
}
}
break;
case XMLStreamConstants.COMMENT:
for (int i = 0; i < m_docs.size(); i++) {
String str = m_parser.getText();
Comment comment = m_docs.get(i).createComment(str);
m_currNodes.get(i).appendChild(comment);
}
break;
case XMLStreamConstants.ENTITY_REFERENCE:
for (int i = 0; i < m_docs.size(); i++) {
String str = m_parser.getText();
EntityReference ref = m_docs.get(i).createEntityReference(str);
m_currNodes.get(i).appendChild(ref);
}
break;
case XMLStreamConstants.PROCESSING_INSTRUCTION:
for (int i = 0; i < m_docs.size(); i++) {
String piTarget = m_parser.getPITarget();
String piName = m_parser.getPIData();
ProcessingInstruction pi = m_docs.get(i).createProcessingInstruction(piTarget, piName);
m_currNodes.get(i).appendChild(pi);
}
break;
case XMLStreamConstants.DTD:
// not needed use DOM-Reader for full documents
break;
case XMLStreamConstants.END_DOCUMENT:
// never called
m_parser.close();
default:
break;
}
m_parser.next();
}
return null;
} catch (XMLStreamException e) {
throw new IOException(e);
}
}
use of org.knime.core.data.xml.XMLValue in project knime-core by knime.
the class XML2PMMLNodeModel method createColRearranger.
private ColumnRearranger createColRearranger(final DataTableSpec spec) throws InvalidSettingsException {
if (m_xmlColumnName.getStringValue() == null) {
guessDefaultXMLColumn(spec);
}
String xmlColName = m_xmlColumnName.getStringValue();
String newColName = m_newColumnName.getStringValue();
final int colIndex = spec.findColumnIndex(xmlColName);
CheckUtils.checkSetting(colIndex >= 0, "Column: '%s' does not exist anymore.", xmlColName);
final DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
CheckUtils.checkSetting(colSpec.getType().isCompatible(StringValue.class), "Selected column '%s' is not string/xml-compatible", xmlColName);
DataColumnSpecCreator colSpecCreator;
if (newColName != null && !m_replaceColumn.getBooleanValue()) {
String newName = DataTableSpec.getUniqueColumnName(spec, newColName);
colSpecCreator = new DataColumnSpecCreator(newName, PMMLCell.TYPE);
} else {
colSpecCreator = new DataColumnSpecCreator(colSpec);
colSpecCreator.setType(PMMLCell.TYPE);
colSpecCreator.removeAllHandlers();
colSpecCreator.setDomain(null);
}
DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
ColumnRearranger rearranger = new ColumnRearranger(spec);
CellFactory fac = new SingleCellFactory(outColumnSpec) {
@Override
public DataCell getCell(final DataRow row) {
DataCell cell = row.getCell(colIndex);
if (cell.isMissing()) {
return DataType.getMissingCell();
} else {
PMMLDocument pmmlDoc = null;
String failure = null;
XmlObject xmlDoc;
try (LockedSupplier<Document> supplier = ((XMLValue<Document>) cell).getDocumentSupplier()) {
xmlDoc = XmlObject.Factory.parse(supplier.get().cloneNode(true));
if (xmlDoc instanceof PMMLDocument) {
pmmlDoc = (PMMLDocument) xmlDoc;
} else if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
/* Parse the modified document and assign it to a
* PMMLDocument.*/
pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
} else {
failure = "No valid PMML v 3.x/4.0/4.1 document";
}
} catch (XmlException e) {
if (!m_failOnInvalid.getBooleanValue()) {
LOGGER.error("Invalid PMML in row " + row.getKey() + ": " + e.getMessage(), e);
}
failure = e.getMessage();
}
if (failure != null) {
m_failCounter.incrementAndGet();
if (m_failOnInvalid.getBooleanValue()) {
throw new RuntimeException("Invalid PMML in row " + row.getKey() + ": " + failure);
} else {
return new MissingCell(failure);
}
} else {
try {
return PMMLCellFactory.create(pmmlDoc.toString());
} catch (Exception e) {
return new MissingCell(e.getMessage());
}
}
}
}
};
if (m_replaceColumn.getBooleanValue()) {
rearranger.replace(fac, colIndex);
} else {
rearranger.append(fac);
}
return rearranger;
}
use of org.knime.core.data.xml.XMLValue in project knime-core by knime.
the class XMLDOMCellReader method readXML.
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public XMLValue<Document> readXML() throws IOException {
if (m_first) {
m_first = false;
Document doc;
try {
doc = m_builder.parse(m_in);
} catch (SAXException e) {
throw new IOException(e);
}
removeEmptyTextRecursive(doc, new LinkedList<Boolean>());
return (XMLValue<Document>) XMLCellFactory.create(doc);
} else {
return null;
}
}
Aggregations