Search in sources :

Example 6 with TransformationDictionary

use of org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary in project knime-core by knime.

the class PMMLReaderNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    PMMLPortObject inPort = m_hasPMMLIn ? (PMMLPortObject) inData[0] : null;
    if (inPort != null) {
        TransformationDictionary dict = TransformationDictionary.Factory.newInstance();
        dict.setDerivedFieldArray(inPort.getDerivedFields());
        m_pmmlPort.addGlobalTransformations(dict);
        m_pmmlPort.moveGlobalTransformationsToModel();
    }
    return new PortObject[] { m_pmmlPort };
}
Also used : PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) TransformationDictionary(org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject)

Example 7 with TransformationDictionary

use of org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary in project knime-core by knime.

the class PMMLNormalizeTranslator method exportToTransDict.

/**
 * {@inheritDoc}
 */
@Override
public TransformationDictionary exportToTransDict() {
    TransformationDictionary dictionary = TransformationDictionary.Factory.newInstance();
    dictionary.setDerivedFieldArray(createDerivedFields());
    return dictionary;
}
Also used : TransformationDictionary(org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary)

Example 8 with TransformationDictionary

use of org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary in project knime-core by knime.

the class PMMLPortObject method moveGlobalTransformationsToModel.

/**
 * Moves the content of the transformation dictionary to local
 * transformations of the model if a model exists.
 */
public void moveGlobalTransformationsToModel() {
    PMML pmml = m_pmmlDoc.getPMML();
    TransformationDictionary transDict = pmml.getTransformationDictionary();
    if (transDict == null || transDict.getDerivedFieldArray() == null || transDict.getDerivedFieldArray().length == 0) {
        // nothing to be moved
        return;
    }
    DerivedField[] globalDerivedFields = transDict.getDerivedFieldArray();
    LocalTransformations localTrans = null;
    if (pmml.getTreeModelArray().length > 0) {
        TreeModel model = pmml.getTreeModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.getClusteringModelArray().length > 0) {
        ClusteringModel model = pmml.getClusteringModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.getNeuralNetworkArray().length > 0) {
        NeuralNetwork model = pmml.getNeuralNetworkArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.getSupportVectorMachineModelArray().length > 0) {
        SupportVectorMachineModel model = pmml.getSupportVectorMachineModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.getRegressionModelArray().length > 0) {
        RegressionModel model = pmml.getRegressionModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.getGeneralRegressionModelArray().length > 0) {
        GeneralRegressionModel model = pmml.getGeneralRegressionModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.sizeOfRuleSetModelArray() > 0) {
        RuleSetModel model = pmml.getRuleSetModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    }
    if (localTrans != null) {
        DerivedField[] derivedFields = appendDerivedFields(localTrans.getDerivedFieldArray(), globalDerivedFields);
        localTrans.setDerivedFieldArray(derivedFields);
        // remove derived fields from TransformationDictionary
        transDict.setDerivedFieldArray(new DerivedField[0]);
    }
// else do nothing as no model exists yet
}
Also used : TreeModel(org.dmg.pmml.TreeModelDocument.TreeModel) RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) LocalTransformations(org.dmg.pmml.LocalTransformationsDocument.LocalTransformations) TransformationDictionary(org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary) GeneralRegressionModel(org.dmg.pmml.GeneralRegressionModelDocument.GeneralRegressionModel) PMML(org.dmg.pmml.PMMLDocument.PMML) NeuralNetwork(org.dmg.pmml.NeuralNetworkDocument.NeuralNetwork) SupportVectorMachineModel(org.dmg.pmml.SupportVectorMachineModelDocument.SupportVectorMachineModel) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) ClusteringModel(org.dmg.pmml.ClusteringModelDocument.ClusteringModel) GeneralRegressionModel(org.dmg.pmml.GeneralRegressionModelDocument.GeneralRegressionModel) RegressionModel(org.dmg.pmml.RegressionModelDocument.RegressionModel)

Example 9 with TransformationDictionary

use of org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary in project knime-core by knime.

the class PMMLPortObject method addGlobalTransformations.

/**
 * Adds global transformations to the PMML document. Only DerivedField
 * elements are supported so far. If no global transformations are set so
 * far the dictionary is set as new transformation dictionary, otherwise
 * all contained transformations are appended to the existing one.
 *
 * @param dictionary the transformation dictionary that contains the
 *      transformations to be added
 */
public void addGlobalTransformations(final TransformationDictionary dictionary) {
    // add the transformations to the TransformationDictionary
    if (dictionary.getDefineFunctionArray().length > 0) {
        throw new IllegalArgumentException("DefineFunctions are not " + "supported so far. Only derived fields are allowed.");
    }
    TransformationDictionary dict = m_pmmlDoc.getPMML().getTransformationDictionary();
    if (dict == null) {
        m_pmmlDoc.getPMML().setTransformationDictionary(dictionary);
        dict = m_pmmlDoc.getPMML().getTransformationDictionary();
    } else {
        // append the transformations to the existing dictionary
        DerivedField[] existingFields = dict.getDerivedFieldArray();
        DerivedField[] result = appendDerivedFields(existingFields, dictionary.getDerivedFieldArray());
        dict.setDerivedFieldArray(result);
    }
    DerivedField[] df = dict.getDerivedFieldArray();
    List<String> colNames = new ArrayList<String>(df.length);
    Set<String> dfNames = new HashSet<String>();
    for (int i = 0; i < df.length; i++) {
        String derivedName = df[i].getName();
        if (dfNames.contains(derivedName)) {
            throw new IllegalArgumentException("Derived field name \"" + derivedName + "\" is not unique.");
        }
        dfNames.add(derivedName);
        String displayName = df[i].getDisplayName();
        colNames.add(displayName == null ? derivedName : displayName);
    }
    /* Remove data fields from data dictionary that where created as a
         * derived field. In KNIME the origin of columns is not distinguished
         * and all columns are added to the data dictionary. But in PMML this
         * results in duplicate entries. Those columns should only appear once
         * as derived field in the transformation dictionary or local
         * transformations. */
    DataDictionary dataDict = m_pmmlDoc.getPMML().getDataDictionary();
    DataField[] dataFieldArray = dataDict.getDataFieldArray();
    List<DataField> dataFields = new ArrayList<DataField>(Arrays.asList(dataFieldArray));
    for (DataField dataField : dataFieldArray) {
        if (dfNames.contains(dataField.getName())) {
            dataFields.remove(dataField);
        }
    }
    dataDict.setDataFieldArray(dataFields.toArray(new DataField[0]));
    // update the number of fields
    dataDict.setNumberOfFields(BigInteger.valueOf(dataFields.size()));
    // -------------------------------------------------
    // update field names in the model if applicable
    DerivedFieldMapper dfm = new DerivedFieldMapper(df);
    Map<String, String> derivedFieldMap = dfm.getDerivedFieldMap();
    /* Use XPATH to update field names in the model and move the derived
         * fields to local transformations. */
    PMML pmml = m_pmmlDoc.getPMML();
    if (pmml.getTreeModelArray().length > 0) {
        fixAttributeAtPath(pmml, TREE_PATH, FIELD, derivedFieldMap);
    } else if (pmml.getClusteringModelArray().length > 0) {
        fixAttributeAtPath(pmml, CLUSTERING_PATH, FIELD, derivedFieldMap);
    } else if (pmml.getNeuralNetworkArray().length > 0) {
        fixAttributeAtPath(pmml, NN_PATH, FIELD, derivedFieldMap);
    } else if (pmml.getSupportVectorMachineModelArray().length > 0) {
        fixAttributeAtPath(pmml, SVM_PATH, FIELD, derivedFieldMap);
    } else if (pmml.getRegressionModelArray().length > 0) {
        fixAttributeAtPath(pmml, REGRESSION_PATH_1, FIELD, derivedFieldMap);
        fixAttributeAtPath(pmml, REGRESSION_PATH_2, NAME, derivedFieldMap);
    } else if (pmml.getGeneralRegressionModelArray().length > 0) {
        fixAttributeAtPath(pmml, GR_PATH_1, NAME, derivedFieldMap);
        fixAttributeAtPath(pmml, GR_PATH_2, LABEL, derivedFieldMap);
        fixAttributeAtPath(pmml, GR_PATH_3, PREDICTOR_NAME, derivedFieldMap);
    }
    // else do nothing as no model exists yet
    // --------------------------------------------------
    PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(this, m_spec.getDataTableSpec());
    creator.addPreprocColNames(colNames);
    m_spec = creator.createSpec();
}
Also used : TransformationDictionary(org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary) ArrayList(java.util.ArrayList) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) DataField(org.dmg.pmml.DataFieldDocument.DataField) PMML(org.dmg.pmml.PMMLDocument.PMML) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 10 with TransformationDictionary

use of org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary in project knime-core by knime.

the class AutoBinnerUtility method translate.

/**
 * This method translates a {@link PMMLPreprocDiscretize} object into {@link PMMLPortObject}.
 *
 * @param pmmlDiscretize {@link PMMLPreprocDiscretize} object
 * @param dataTableSpec {@link DataTableSpec} if incoming {@link BufferedDataTable}
 * @return a {@link PMMLPortObject} containing required parameters for binning operation
 */
public static PMMLPortObject translate(final PMMLPreprocDiscretize pmmlDiscretize, final DataTableSpec dataTableSpec) {
    final Map<String, Bin[]> columnToBins = new HashMap<>();
    final Map<String, String> columnToAppend = new HashMap<>();
    final List<String> replacedColumnNames = pmmlDiscretize.getConfiguration().getNames();
    for (String replacedColumnName : replacedColumnNames) {
        final PMMLDiscretize discretize = pmmlDiscretize.getConfiguration().getDiscretize(replacedColumnName);
        final List<PMMLDiscretizeBin> bins = discretize.getBins();
        final String originalColumnName = discretize.getField();
        final boolean replaceColumnTheSame = replacedColumnName.equals(originalColumnName);
        columnToAppend.put(originalColumnName, replaceColumnTheSame ? null : replacedColumnName);
        final NumericBin[] numericBin = bins.stream().map(AutoBinnerUtility::getNumericBin).toArray(NumericBin[]::new);
        columnToBins.put(originalColumnName, numericBin);
    }
    final DataTableSpec newDataTableSpec = createNewDataTableSpec(dataTableSpec, columnToAppend);
    final PMMLPortObjectSpecCreator pmmlSpecCreator = new PMMLPortObjectSpecCreator(newDataTableSpec);
    final PMMLPortObject pmmlPortObject = new PMMLPortObject(pmmlSpecCreator.createSpec(), null, newDataTableSpec);
    final PMMLBinningTranslator trans = new PMMLBinningTranslator(columnToBins, columnToAppend, new DerivedFieldMapper(pmmlPortObject));
    final TransformationDictionary exportToTransDict = trans.exportToTransDict();
    pmmlPortObject.addGlobalTransformations(exportToTransDict);
    return pmmlPortObject;
}
Also used : NumericBin(org.knime.core.util.binning.numeric.NumericBin) DataTableSpec(org.knime.core.data.DataTableSpec) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TransformationDictionary(org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary) PMMLDiscretizeBin(org.knime.core.util.binning.auto.pmml.PMMLDiscretizeBin) DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMMLBinningTranslator(org.knime.core.util.binning.numeric.PMMLBinningTranslator) PMMLDiscretize(org.knime.core.util.binning.auto.pmml.PMMLDiscretize) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Aggregations

TransformationDictionary (org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary)19 DerivedField (org.dmg.pmml.DerivedFieldDocument.DerivedField)4 DerivedFieldMapper (org.knime.core.node.port.pmml.preproc.DerivedFieldMapper)4 LocalTransformations (org.dmg.pmml.LocalTransformationsDocument.LocalTransformations)3 PMML (org.dmg.pmml.PMMLDocument.PMML)3 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 ClusteringModel (org.dmg.pmml.ClusteringModelDocument.ClusteringModel)2 GeneralRegressionModel (org.dmg.pmml.GeneralRegressionModelDocument.GeneralRegressionModel)2 NeuralNetwork (org.dmg.pmml.NeuralNetworkDocument.NeuralNetwork)2 RegressionModel (org.dmg.pmml.RegressionModelDocument.RegressionModel)2 RuleSetModel (org.dmg.pmml.RuleSetModelDocument.RuleSetModel)2 SupportVectorMachineModel (org.dmg.pmml.SupportVectorMachineModelDocument.SupportVectorMachineModel)2 TreeModel (org.dmg.pmml.TreeModelDocument.TreeModel)2 DataTableSpec (org.knime.core.data.DataTableSpec)2 PMMLPortObjectSpecCreator (org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)2 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1