use of org.apache.xmlbeans.XmlObject in project knime-core by knime.
the class PMMLPortObject method loadFrom.
/**
* Initializes the pmml port object based on the xml input stream.
* @param spec the referring spec of this object
* @param is the pmml input stream
* @throws IOException if the file cannot be found
* @throws XmlException if something goes wrong during reading
*/
public void loadFrom(final PMMLPortObjectSpec spec, final InputStream is) throws IOException, XmlException {
// disallow close in the factory -- we had indeterministic behavior
// where close was called more than once (which should be OK) but as
// the argument input stream is a NonClosableZipInput, which delegates
// close to closeEntry(), we have to make sure that close is only
// called once.
// TODO: The document is read twice here. Could we "probe" into the file to check the version?
XmlObject xmlDoc = XmlObject.Factory.parse(new NonClosableInputStream(is));
is.close();
if (xmlDoc instanceof PMMLDocument) {
m_pmmlDoc = (PMMLDocument) xmlDoc;
} else {
/* Try to recover when reading a PMML 3.x/4.0 document that
* was produced by KNIME by just replacing the PMML version and
* namespace. */
if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
try {
String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
/* Parse the modified document and assign it to a
* PMMLDocument.*/
m_pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
} catch (Exception e) {
throw new RuntimeException("Parsing of PMML v 3.x/4.0 document failed.", e);
}
LOGGER.info("KNIME produced PMML 3.x/4.0 converted to PMML 4.1.");
} else {
throw new RuntimeException("Parsing of PMML v 3.x/4.0 document failed.");
}
}
m_spec = spec;
}
use of org.apache.xmlbeans.XmlObject in project knime-core by knime.
the class PMMLPortObject method addModelTranslater.
/**
* Adds the model of the content translater to the PMML document.
* @param modelTranslator the model translator containing the model to be
* added
*/
public void addModelTranslater(final PMMLTranslator modelTranslator) {
SchemaType type = modelTranslator.exportTo(m_pmmlDoc, m_spec);
LocalTransformations localTransformations = moveDerivedFields(type);
/* Remove mining fields from mining schema that where created as a
* derived field. In KNIME the origin of columns is not distinguished
* and all columns are added to the mining schema. But in PMML this
* results in duplicate entries. Those columns should only appear once
* as derived field in the transformation dictionary or local
* transformations. */
Set<String> derivedFields = new HashSet<String>();
for (DerivedField derivedField : getDerivedFields()) {
derivedFields.add(derivedField.getName());
}
MiningSchema miningSchema = PMMLUtils.getFirstMiningSchema(m_pmmlDoc, type);
if (miningSchema == null) {
LOGGER.info("No mining schema found.");
return;
}
MiningField[] miningFieldArray = miningSchema.getMiningFieldArray();
List<MiningField> miningFields = new ArrayList<MiningField>(Arrays.asList(miningFieldArray));
Set<String> miningFieldNames = new HashSet<String>();
for (MiningField miningField : miningFieldArray) {
String miningFieldName = miningField.getName();
if (derivedFields.contains(miningFieldName)) {
LOGGER.debug("Removing field \"" + miningFieldName + "\" from MiningFields as it is a DerivedField.");
miningFields.remove(miningField);
} else {
miningFieldNames.add(miningFieldName);
}
}
/* According to the PMML Spec DerivedFields must ultimately refer back
* to active MiningFields of the model's MiningSchema. Therefore we
* have to add all referred DataFields to the MiningSchema. */
String fullPath = NAMESPACE_DECLARATION + "$this/pmml:DerivedField/*/@field" + "| $this/pmml:DerivedField//pmml:FieldColumnPair/@field";
XmlObject[] xmlDescendants = localTransformations.selectPath(fullPath);
Set<String> referencedFields = new LinkedHashSet<String>();
// collect all referred field names
for (XmlObject xo : xmlDescendants) {
XmlCursor xmlCursor = xo.newCursor();
referencedFields.add(xmlCursor.getTextValue());
xmlCursor.dispose();
}
for (String referencedField : referencedFields) {
if (!derivedFields.contains(referencedField) && !miningFieldNames.contains(referencedField)) {
/* Add them to the mining schema if they are not already
* contained there and if they don't refer to derived fields. */
MiningField miningField = MiningField.Factory.newInstance();
miningField.setName(referencedField);
miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
LOGGER.debug("Adding field \"" + referencedField + "\" to MiningSchema because it is referenced in " + "LocalTransformations.");
miningFields.add(miningField);
}
}
miningSchema.setMiningFieldArray(miningFields.toArray(new MiningField[0]));
}
use of org.apache.xmlbeans.XmlObject in project knime-core by knime.
the class PMMLConditionTranslator method parseCompoundPredicate.
/**
* Create a KNIME compound predicate from a PMML compound predicate. Note that the "order" of the sub-predicates is
* important (because of surrogate predicate). Therefore, we need to use xmlCursor to retrieve the order of the
* predicates
*
* @param xmlCompoundPredicate the PMML Compound Predicate element
* @return the KNIME Compound Predicate
*/
protected PMMLCompoundPredicate parseCompoundPredicate(final CompoundPredicate xmlCompoundPredicate) {
List<PMMLPredicate> tempPredicateList = new ArrayList<PMMLPredicate>();
if (xmlCompoundPredicate.sizeOfSimplePredicateArray() != 0) {
for (SimplePredicate xmlSubSimplePredicate : xmlCompoundPredicate.getSimplePredicateList()) {
tempPredicateList.add(parseSimplePredicate(xmlSubSimplePredicate));
}
}
if (xmlCompoundPredicate.sizeOfCompoundPredicateArray() != 0) {
for (CompoundPredicate xmlSubCompoundPredicate : xmlCompoundPredicate.getCompoundPredicateList()) {
tempPredicateList.add(parseCompoundPredicate(xmlSubCompoundPredicate));
}
}
if (xmlCompoundPredicate.sizeOfSimpleSetPredicateArray() != 0) {
for (SimpleSetPredicate xmlSubSimpleSetPredicate : xmlCompoundPredicate.getSimpleSetPredicateList()) {
tempPredicateList.add(parseSimpleSetPredicate(xmlSubSimpleSetPredicate));
}
}
if (xmlCompoundPredicate.sizeOfTrueArray() != 0) {
for (int i = 0; i < xmlCompoundPredicate.sizeOfTrueArray(); i++) {
tempPredicateList.add(new PMMLTruePredicate());
}
}
if (xmlCompoundPredicate.sizeOfFalseArray() != 0) {
for (int i = 0; i < xmlCompoundPredicate.sizeOfFalseArray(); i++) {
tempPredicateList.add(new PMMLFalsePredicate());
}
}
List<String> predicateNames = new ArrayList<String>();
XmlCursor xmlCursor = xmlCompoundPredicate.newCursor();
if (xmlCursor.toFirstChild()) {
do {
XmlObject xmlElement = xmlCursor.getObject();
XmlCursor elementCursor = xmlElement.newCursor();
if (xmlElement instanceof CompoundPredicateDocument.CompoundPredicate) {
predicateNames.add(COMPOUND);
} else if (xmlElement instanceof TrueDocument.True) {
predicateNames.add(TRUE);
} else if (xmlElement instanceof FalseDocument.False) {
predicateNames.add(FALSE);
} else {
elementCursor.toFirstAttribute();
do {
if ("field".equals(elementCursor.getName().getLocalPart())) {
predicateNames.add(m_nameMapper.getColumnName(elementCursor.getTextValue()));
break;
}
} while (elementCursor.toNextAttribute());
}
} while (xmlCursor.toNextSibling());
}
// ------------------------------------------------------
// sort the predicate list
List<PMMLPredicate> predicateList = new ArrayList<PMMLPredicate>();
List<PMMLPredicate> compoundList = new ArrayList<PMMLPredicate>();
for (PMMLPredicate tempPredicate : tempPredicateList) {
if (tempPredicate instanceof PMMLCompoundPredicate) {
compoundList.add(tempPredicate);
}
}
for (String name : predicateNames) {
if (name.equals(COMPOUND)) {
predicateList.add(compoundList.get(0));
compoundList.remove(0);
} else if (name.equals(TRUE)) {
predicateList.add(new PMMLTruePredicate());
} else if (name.equals(FALSE)) {
predicateList.add(new PMMLFalsePredicate());
} else {
int foundIndex = -1, i = 0;
for (PMMLPredicate tempPredicate : tempPredicateList) {
if (tempPredicate instanceof PMMLSimplePredicate) {
if (name.equals(((PMMLSimplePredicate) tempPredicate).getSplitAttribute())) {
predicateList.add(tempPredicate);
foundIndex = i;
break;
}
} else if (tempPredicate instanceof PMMLSimpleSetPredicate) {
if (name.equals(((PMMLSimpleSetPredicate) tempPredicate).getSplitAttribute())) {
predicateList.add(tempPredicate);
foundIndex = i;
break;
}
}
++i;
}
assert foundIndex >= 0 : tempPredicateList + "\n" + name;
tempPredicateList.remove(foundIndex);
}
}
LinkedList<PMMLPredicate> subPredicates = new LinkedList<PMMLPredicate>(predicateList);
String operator = xmlCompoundPredicate.getBooleanOperator().toString();
PMMLCompoundPredicate compoundPredicate = newCompoundPredicate(operator);
compoundPredicate.setPredicates(subPredicates);
return compoundPredicate;
}
use of org.apache.xmlbeans.XmlObject in project knime-core by knime.
the class XML2PMMLNodeModel method createColRearranger.
private ColumnRearranger createColRearranger(final DataTableSpec spec) throws InvalidSettingsException {
if (m_xmlColumnName.getStringValue() == null) {
guessDefaultXMLColumn(spec);
}
String xmlColName = m_xmlColumnName.getStringValue();
String newColName = m_newColumnName.getStringValue();
final int colIndex = spec.findColumnIndex(xmlColName);
CheckUtils.checkSetting(colIndex >= 0, "Column: '%s' does not exist anymore.", xmlColName);
final DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
CheckUtils.checkSetting(colSpec.getType().isCompatible(StringValue.class), "Selected column '%s' is not string/xml-compatible", xmlColName);
DataColumnSpecCreator colSpecCreator;
if (newColName != null && !m_replaceColumn.getBooleanValue()) {
String newName = DataTableSpec.getUniqueColumnName(spec, newColName);
colSpecCreator = new DataColumnSpecCreator(newName, PMMLCell.TYPE);
} else {
colSpecCreator = new DataColumnSpecCreator(colSpec);
colSpecCreator.setType(PMMLCell.TYPE);
colSpecCreator.removeAllHandlers();
colSpecCreator.setDomain(null);
}
DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
ColumnRearranger rearranger = new ColumnRearranger(spec);
CellFactory fac = new SingleCellFactory(outColumnSpec) {
@Override
public DataCell getCell(final DataRow row) {
DataCell cell = row.getCell(colIndex);
if (cell.isMissing()) {
return DataType.getMissingCell();
} else {
PMMLDocument pmmlDoc = null;
String failure = null;
XmlObject xmlDoc;
try (LockedSupplier<Document> supplier = ((XMLValue<Document>) cell).getDocumentSupplier()) {
xmlDoc = XmlObject.Factory.parse(supplier.get().cloneNode(true));
if (xmlDoc instanceof PMMLDocument) {
pmmlDoc = (PMMLDocument) xmlDoc;
} else if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
/* Parse the modified document and assign it to a
* PMMLDocument.*/
pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
} else {
failure = "No valid PMML v 3.x/4.0/4.1 document";
}
} catch (XmlException e) {
if (!m_failOnInvalid.getBooleanValue()) {
LOGGER.error("Invalid PMML in row " + row.getKey() + ": " + e.getMessage(), e);
}
failure = e.getMessage();
}
if (failure != null) {
m_failCounter.incrementAndGet();
if (m_failOnInvalid.getBooleanValue()) {
throw new RuntimeException("Invalid PMML in row " + row.getKey() + ": " + failure);
} else {
return new MissingCell(failure);
}
} else {
try {
return PMMLCellFactory.create(pmmlDoc.toString());
} catch (Exception e) {
return new MissingCell(e.getMessage());
}
}
}
}
};
if (m_replaceColumn.getBooleanValue()) {
rearranger.replace(fac, colIndex);
} else {
rearranger.append(fac);
}
return rearranger;
}
use of org.apache.xmlbeans.XmlObject in project hackpad by dropbox.
the class XML method insertChild.
/**
*
* @param curs
* @param xmlToInsert
*/
private void insertChild(XmlCursor curs, Object xmlToInsert) {
if (xmlToInsert == null || xmlToInsert instanceof Undefined) {
// Do nothing
} else if (xmlToInsert instanceof XmlCursor) {
moveSrcToDest((XmlCursor) xmlToInsert, curs, true);
} else if (xmlToInsert instanceof XML) {
XML xmlValue = (XML) xmlToInsert;
// If it's an attribute, then change to text node
if (xmlValue.tokenType() == XmlCursor.TokenType.ATTR) {
insertChild(curs, xmlValue.toString());
} else {
XmlCursor cursToInsert = ((XML) xmlToInsert).newCursor();
moveSrcToDest(cursToInsert, curs, true);
cursToInsert.dispose();
}
} else if (xmlToInsert instanceof XMLList) {
XMLList list = (XMLList) xmlToInsert;
for (int i = 0; i < list.length(); i++) {
insertChild(curs, list.item(i));
}
} else {
// Convert to string and make XML out of it
String xmlStr = ScriptRuntime.toString(xmlToInsert);
// Create an empty document.
XmlObject xo = XmlObject.Factory.newInstance();
XmlCursor sourceCurs = xo.newCursor();
sourceCurs.toNextToken();
// To hold the text.
sourceCurs.insertChars(xmlStr);
sourceCurs.toPrevToken();
// Call us again with the cursor.
moveSrcToDest(sourceCurs, curs, true);
}
}
Aggregations