use of org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary in project knime-core by knime.
the class PMMLReaderNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
PMMLPortObject inPort = m_hasPMMLIn ? (PMMLPortObject) inData[0] : null;
if (inPort != null) {
TransformationDictionary dict = TransformationDictionary.Factory.newInstance();
dict.setDerivedFieldArray(inPort.getDerivedFields());
m_pmmlPort.addGlobalTransformations(dict);
m_pmmlPort.moveGlobalTransformationsToModel();
}
return new PortObject[] { m_pmmlPort };
}
use of org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary in project knime-core by knime.
the class PMMLNormalizeTranslator method exportToTransDict.
/**
* {@inheritDoc}
*/
@Override
public TransformationDictionary exportToTransDict() {
TransformationDictionary dictionary = TransformationDictionary.Factory.newInstance();
dictionary.setDerivedFieldArray(createDerivedFields());
return dictionary;
}
use of org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary in project knime-core by knime.
the class PMMLPortObject method moveGlobalTransformationsToModel.
/**
* Moves the content of the transformation dictionary to local
* transformations of the model if a model exists.
*/
public void moveGlobalTransformationsToModel() {
PMML pmml = m_pmmlDoc.getPMML();
TransformationDictionary transDict = pmml.getTransformationDictionary();
if (transDict == null || transDict.getDerivedFieldArray() == null || transDict.getDerivedFieldArray().length == 0) {
// nothing to be moved
return;
}
DerivedField[] globalDerivedFields = transDict.getDerivedFieldArray();
LocalTransformations localTrans = null;
if (pmml.getTreeModelArray().length > 0) {
TreeModel model = pmml.getTreeModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.getClusteringModelArray().length > 0) {
ClusteringModel model = pmml.getClusteringModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.getNeuralNetworkArray().length > 0) {
NeuralNetwork model = pmml.getNeuralNetworkArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.getSupportVectorMachineModelArray().length > 0) {
SupportVectorMachineModel model = pmml.getSupportVectorMachineModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.getRegressionModelArray().length > 0) {
RegressionModel model = pmml.getRegressionModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.getGeneralRegressionModelArray().length > 0) {
GeneralRegressionModel model = pmml.getGeneralRegressionModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
} else if (pmml.sizeOfRuleSetModelArray() > 0) {
RuleSetModel model = pmml.getRuleSetModelArray(0);
localTrans = model.getLocalTransformations();
if (localTrans == null) {
localTrans = model.addNewLocalTransformations();
}
}
if (localTrans != null) {
DerivedField[] derivedFields = appendDerivedFields(localTrans.getDerivedFieldArray(), globalDerivedFields);
localTrans.setDerivedFieldArray(derivedFields);
// remove derived fields from TransformationDictionary
transDict.setDerivedFieldArray(new DerivedField[0]);
}
// else do nothing as no model exists yet
}
use of org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary in project knime-core by knime.
the class PMMLPortObject method addGlobalTransformations.
/**
* Adds global transformations to the PMML document. Only DerivedField
* elements are supported so far. If no global transformations are set so
* far the dictionary is set as new transformation dictionary, otherwise
* all contained transformations are appended to the existing one.
*
* @param dictionary the transformation dictionary that contains the
* transformations to be added
*/
public void addGlobalTransformations(final TransformationDictionary dictionary) {
// add the transformations to the TransformationDictionary
if (dictionary.getDefineFunctionArray().length > 0) {
throw new IllegalArgumentException("DefineFunctions are not " + "supported so far. Only derived fields are allowed.");
}
TransformationDictionary dict = m_pmmlDoc.getPMML().getTransformationDictionary();
if (dict == null) {
m_pmmlDoc.getPMML().setTransformationDictionary(dictionary);
dict = m_pmmlDoc.getPMML().getTransformationDictionary();
} else {
// append the transformations to the existing dictionary
DerivedField[] existingFields = dict.getDerivedFieldArray();
DerivedField[] result = appendDerivedFields(existingFields, dictionary.getDerivedFieldArray());
dict.setDerivedFieldArray(result);
}
DerivedField[] df = dict.getDerivedFieldArray();
List<String> colNames = new ArrayList<String>(df.length);
Set<String> dfNames = new HashSet<String>();
for (int i = 0; i < df.length; i++) {
String derivedName = df[i].getName();
if (dfNames.contains(derivedName)) {
throw new IllegalArgumentException("Derived field name \"" + derivedName + "\" is not unique.");
}
dfNames.add(derivedName);
String displayName = df[i].getDisplayName();
colNames.add(displayName == null ? derivedName : displayName);
}
/* Remove data fields from data dictionary that where created as a
* derived field. In KNIME the origin of columns is not distinguished
* and all columns are added to the data dictionary. But in PMML this
* results in duplicate entries. Those columns should only appear once
* as derived field in the transformation dictionary or local
* transformations. */
DataDictionary dataDict = m_pmmlDoc.getPMML().getDataDictionary();
DataField[] dataFieldArray = dataDict.getDataFieldArray();
List<DataField> dataFields = new ArrayList<DataField>(Arrays.asList(dataFieldArray));
for (DataField dataField : dataFieldArray) {
if (dfNames.contains(dataField.getName())) {
dataFields.remove(dataField);
}
}
dataDict.setDataFieldArray(dataFields.toArray(new DataField[0]));
// update the number of fields
dataDict.setNumberOfFields(BigInteger.valueOf(dataFields.size()));
// -------------------------------------------------
// update field names in the model if applicable
DerivedFieldMapper dfm = new DerivedFieldMapper(df);
Map<String, String> derivedFieldMap = dfm.getDerivedFieldMap();
/* Use XPATH to update field names in the model and move the derived
* fields to local transformations. */
PMML pmml = m_pmmlDoc.getPMML();
if (pmml.getTreeModelArray().length > 0) {
fixAttributeAtPath(pmml, TREE_PATH, FIELD, derivedFieldMap);
} else if (pmml.getClusteringModelArray().length > 0) {
fixAttributeAtPath(pmml, CLUSTERING_PATH, FIELD, derivedFieldMap);
} else if (pmml.getNeuralNetworkArray().length > 0) {
fixAttributeAtPath(pmml, NN_PATH, FIELD, derivedFieldMap);
} else if (pmml.getSupportVectorMachineModelArray().length > 0) {
fixAttributeAtPath(pmml, SVM_PATH, FIELD, derivedFieldMap);
} else if (pmml.getRegressionModelArray().length > 0) {
fixAttributeAtPath(pmml, REGRESSION_PATH_1, FIELD, derivedFieldMap);
fixAttributeAtPath(pmml, REGRESSION_PATH_2, NAME, derivedFieldMap);
} else if (pmml.getGeneralRegressionModelArray().length > 0) {
fixAttributeAtPath(pmml, GR_PATH_1, NAME, derivedFieldMap);
fixAttributeAtPath(pmml, GR_PATH_2, LABEL, derivedFieldMap);
fixAttributeAtPath(pmml, GR_PATH_3, PREDICTOR_NAME, derivedFieldMap);
}
// else do nothing as no model exists yet
// --------------------------------------------------
PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(this, m_spec.getDataTableSpec());
creator.addPreprocColNames(colNames);
m_spec = creator.createSpec();
}
use of org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary in project knime-core by knime.
the class AutoBinnerUtility method translate.
/**
* This method translates a {@link PMMLPreprocDiscretize} object into {@link PMMLPortObject}.
*
* @param pmmlDiscretize {@link PMMLPreprocDiscretize} object
* @param dataTableSpec {@link DataTableSpec} if incoming {@link BufferedDataTable}
* @return a {@link PMMLPortObject} containing required parameters for binning operation
*/
public static PMMLPortObject translate(final PMMLPreprocDiscretize pmmlDiscretize, final DataTableSpec dataTableSpec) {
final Map<String, Bin[]> columnToBins = new HashMap<>();
final Map<String, String> columnToAppend = new HashMap<>();
final List<String> replacedColumnNames = pmmlDiscretize.getConfiguration().getNames();
for (String replacedColumnName : replacedColumnNames) {
final PMMLDiscretize discretize = pmmlDiscretize.getConfiguration().getDiscretize(replacedColumnName);
final List<PMMLDiscretizeBin> bins = discretize.getBins();
final String originalColumnName = discretize.getField();
final boolean replaceColumnTheSame = replacedColumnName.equals(originalColumnName);
columnToAppend.put(originalColumnName, replaceColumnTheSame ? null : replacedColumnName);
final NumericBin[] numericBin = bins.stream().map(AutoBinnerUtility::getNumericBin).toArray(NumericBin[]::new);
columnToBins.put(originalColumnName, numericBin);
}
final DataTableSpec newDataTableSpec = createNewDataTableSpec(dataTableSpec, columnToAppend);
final PMMLPortObjectSpecCreator pmmlSpecCreator = new PMMLPortObjectSpecCreator(newDataTableSpec);
final PMMLPortObject pmmlPortObject = new PMMLPortObject(pmmlSpecCreator.createSpec(), null, newDataTableSpec);
final PMMLBinningTranslator trans = new PMMLBinningTranslator(columnToBins, columnToAppend, new DerivedFieldMapper(pmmlPortObject));
final TransformationDictionary exportToTransDict = trans.exportToTransDict();
pmmlPortObject.addGlobalTransformations(exportToTransDict);
return pmmlPortObject;
}
Aggregations