Search in sources :

Example 11 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLMissingValueReplacementTranslator method exportTo.

/**
 * {@inheritDoc}
 */
@Override
public SchemaType exportTo(final PMMLDocument pmmlDoc, final PMMLPortObjectSpec spec) {
    TransformationDictionary td = pmmlDoc.getPMML().getTransformationDictionary();
    if (td == null) {
        td = pmmlDoc.getPMML().addNewTransformationDictionary();
    }
    DerivedFieldMapper mapper = new DerivedFieldMapper(pmmlDoc);
    for (MissingCellHandler handler : m_handlers) {
        DerivedField f = handler.getPMMLDerivedField();
        if (f != null) {
            f.setDisplayName(f.getName());
            f.setName(mapper.createDerivedFieldName(f.getName()));
            td.getDerivedFieldList().add(f);
        }
    }
    return null;
}
Also used : DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) TransformationDictionary(org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Example 12 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class DBAutoBinner method intoBinnerMaps.

/**
 * This method translates a {@link PMMLPortObject} into a {@link DBBinnerMaps} object which holds several Maps
 * needed to create a binner statement in {@link StatementManipulator}
 *
 * @param pmmlPortObject A {@link PMMLPortObject} containing all necessary information about binning operation
 * @param dataTableSpec Incoming {@link DataTableSpec}
 * @return a {@link DBBinnerMaps} object containing required parameters for {@link StatementManipulator}
 */
public static DBBinnerMaps intoBinnerMaps(final PMMLPortObject pmmlPortObject, final DataTableSpec dataTableSpec) {
    Map<String, List<Pair<Double, Double>>> boundariesMap = new LinkedHashMap<>();
    Map<String, List<Pair<Boolean, Boolean>>> boundariesOpenMap = new LinkedHashMap<>();
    Map<String, List<String>> namingMap = new LinkedHashMap<>();
    Map<String, String> appendMap = new LinkedHashMap<>();
    DerivedField[] derivedFields = pmmlPortObject.getDerivedFields();
    for (int i = 0; i < derivedFields.length; i++) {
        // each column has its own derived fields
        List<Pair<Double, Double>> boundaries = new ArrayList<>();
        List<String> names = new ArrayList<>();
        List<Pair<Boolean, Boolean>> boundariesOpen = new ArrayList<>();
        List<DiscretizeBin> discretizeBinList = derivedFields[i].getDiscretize().getDiscretizeBinList();
        String replacedColumnName = DataTableSpec.getUniqueColumnName(dataTableSpec, derivedFields[i].getName());
        String originalColumnName = derivedFields[i].getDiscretize().getField();
        for (DiscretizeBin discBin : discretizeBinList) {
            Interval interval = discBin.getInterval();
            double left = interval.isSetLeftMargin() ? interval.getLeftMargin() : Double.NEGATIVE_INFINITY;
            double right = interval.isSetRightMargin() ? interval.getRightMargin() : Double.POSITIVE_INFINITY;
            boundaries.add(new Pair<>(left, right));
            names.add(discBin.getBinValue());
            boolean leftOpen;
            boolean rightOpen;
            int closure = discBin.getInterval().xgetClosure().enumValue().intValue();
            /*
                 *static final int INT_OPEN_CLOSED = 1;
                 *static final int INT_OPEN_OPEN = 2;
                 *static final int INT_CLOSED_OPEN = 3;
                 *static final int INT_CLOSED_CLOSED = 4;
                 */
            switch(closure) {
                case 1:
                    leftOpen = true;
                    rightOpen = false;
                    break;
                case 2:
                    leftOpen = true;
                    rightOpen = true;
                    break;
                case 3:
                    leftOpen = false;
                    rightOpen = true;
                    break;
                case 4:
                    leftOpen = false;
                    rightOpen = false;
                    break;
                default:
                    leftOpen = true;
                    rightOpen = false;
                    break;
            }
            boundariesOpen.add(new Pair<>(leftOpen, rightOpen));
        }
        boundariesMap.put(originalColumnName, boundaries);
        namingMap.put(originalColumnName, names);
        boundariesOpenMap.put(originalColumnName, boundariesOpen);
        if (replacedColumnName.matches("(.*)" + originalColumnName + "\\*" + "(.*)")) {
            appendMap.put(originalColumnName, null);
        } else {
            appendMap.put(originalColumnName, replacedColumnName);
        }
    }
    DBBinnerMaps maps = new DBBinnerMaps(boundariesMap, boundariesOpenMap, namingMap, appendMap);
    return maps;
}
Also used : ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) PMMLDiscretizeBin(org.knime.base.node.preproc.autobinner.pmml.PMMLDiscretizeBin) DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) Pair(org.knime.core.util.Pair) PMMLInterval(org.knime.base.node.preproc.autobinner.pmml.PMMLInterval) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 13 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLNormalizeTranslator method createDerivedFields.

private DerivedField[] createDerivedFields() {
    int num = m_affineTrans.getNames().length;
    DerivedField[] derivedFields = new DerivedField[num];
    for (int i = 0; i < num; i++) {
        DerivedField df = DerivedField.Factory.newInstance();
        df.setExtensionArray(createSummaryExtension());
        String name = m_affineTrans.getNames()[i];
        df.setDisplayName(name);
        /* The field name must be retrieved before creating a new derived
             * name for this derived field as the map only contains the
             * current mapping. */
        String fieldName = m_mapper.getDerivedFieldName(name);
        df.setName(m_mapper.createDerivedFieldName(name));
        df.setOptype(OPTYPE.CONTINUOUS);
        df.setDataType(DATATYPE.DOUBLE);
        NormContinuous cont = df.addNewNormContinuous();
        cont.setField(fieldName);
        double trans = m_affineTrans.getTranslations()[i];
        double scale = m_affineTrans.getScales()[i];
        LinearNorm firstNorm = cont.addNewLinearNorm();
        firstNorm.setOrig(0.0);
        firstNorm.setNorm(trans);
        LinearNorm secondNorm = cont.addNewLinearNorm();
        secondNorm.setOrig(1.0);
        secondNorm.setNorm(scale + trans);
        derivedFields[i] = df;
    }
    return derivedFields;
}
Also used : NormContinuous(org.dmg.pmml.NormContinuousDocument.NormContinuous) LinearNorm(org.dmg.pmml.LinearNormDocument.LinearNorm) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Example 14 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLPortObject method addModelTranslater.

/**
 * Adds the model of the content translater to the PMML document.
 * @param modelTranslator the model translator containing the model to be
 *      added
 */
public void addModelTranslater(final PMMLTranslator modelTranslator) {
    SchemaType type = modelTranslator.exportTo(m_pmmlDoc, m_spec);
    LocalTransformations localTransformations = moveDerivedFields(type);
    /* Remove mining fields from mining schema that where created as a
         * derived field. In KNIME the origin of columns is not distinguished
         * and all columns are added to the mining schema. But in PMML this
         * results in duplicate entries. Those columns should only appear once
         * as derived field in the transformation dictionary or local
         * transformations. */
    Set<String> derivedFields = new HashSet<String>();
    for (DerivedField derivedField : getDerivedFields()) {
        derivedFields.add(derivedField.getName());
    }
    MiningSchema miningSchema = PMMLUtils.getFirstMiningSchema(m_pmmlDoc, type);
    if (miningSchema == null) {
        LOGGER.info("No mining schema found.");
        return;
    }
    MiningField[] miningFieldArray = miningSchema.getMiningFieldArray();
    List<MiningField> miningFields = new ArrayList<MiningField>(Arrays.asList(miningFieldArray));
    Set<String> miningFieldNames = new HashSet<String>();
    for (MiningField miningField : miningFieldArray) {
        String miningFieldName = miningField.getName();
        if (derivedFields.contains(miningFieldName)) {
            LOGGER.debug("Removing field \"" + miningFieldName + "\" from MiningFields as it is a DerivedField.");
            miningFields.remove(miningField);
        } else {
            miningFieldNames.add(miningFieldName);
        }
    }
    /* According to the PMML Spec DerivedFields must ultimately refer back
         * to active MiningFields of the model's MiningSchema. Therefore we
         * have to add all referred DataFields to the MiningSchema. */
    String fullPath = NAMESPACE_DECLARATION + "$this/pmml:DerivedField/*/@field" + "| $this/pmml:DerivedField//pmml:FieldColumnPair/@field";
    XmlObject[] xmlDescendants = localTransformations.selectPath(fullPath);
    Set<String> referencedFields = new LinkedHashSet<String>();
    // collect all referred field names
    for (XmlObject xo : xmlDescendants) {
        XmlCursor xmlCursor = xo.newCursor();
        referencedFields.add(xmlCursor.getTextValue());
        xmlCursor.dispose();
    }
    for (String referencedField : referencedFields) {
        if (!derivedFields.contains(referencedField) && !miningFieldNames.contains(referencedField)) {
            /* Add them to the mining schema if they are not already
                 * contained there and if they don't refer to derived fields. */
            MiningField miningField = MiningField.Factory.newInstance();
            miningField.setName(referencedField);
            miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
            LOGGER.debug("Adding field \"" + referencedField + "\" to MiningSchema because it is referenced in " + "LocalTransformations.");
            miningFields.add(miningField);
        }
    }
    miningSchema.setMiningFieldArray(miningFields.toArray(new MiningField[0]));
}
Also used : LinkedHashSet(java.util.LinkedHashSet) MiningField(org.dmg.pmml.MiningFieldDocument.MiningField) ArrayList(java.util.ArrayList) SchemaType(org.apache.xmlbeans.SchemaType) XmlCursor(org.apache.xmlbeans.XmlCursor) LocalTransformations(org.dmg.pmml.LocalTransformationsDocument.LocalTransformations) MiningSchema(org.dmg.pmml.MiningSchemaDocument.MiningSchema) XmlObject(org.apache.xmlbeans.XmlObject) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 15 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLPortObject method moveGlobalTransformationsToModel.

/**
 * Moves the content of the transformation dictionary to local
 * transformations of the model if a model exists.
 */
public void moveGlobalTransformationsToModel() {
    PMML pmml = m_pmmlDoc.getPMML();
    TransformationDictionary transDict = pmml.getTransformationDictionary();
    if (transDict == null || transDict.getDerivedFieldArray() == null || transDict.getDerivedFieldArray().length == 0) {
        // nothing to be moved
        return;
    }
    DerivedField[] globalDerivedFields = transDict.getDerivedFieldArray();
    LocalTransformations localTrans = null;
    if (pmml.getTreeModelArray().length > 0) {
        TreeModel model = pmml.getTreeModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.getClusteringModelArray().length > 0) {
        ClusteringModel model = pmml.getClusteringModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.getNeuralNetworkArray().length > 0) {
        NeuralNetwork model = pmml.getNeuralNetworkArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.getSupportVectorMachineModelArray().length > 0) {
        SupportVectorMachineModel model = pmml.getSupportVectorMachineModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.getRegressionModelArray().length > 0) {
        RegressionModel model = pmml.getRegressionModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.getGeneralRegressionModelArray().length > 0) {
        GeneralRegressionModel model = pmml.getGeneralRegressionModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    } else if (pmml.sizeOfRuleSetModelArray() > 0) {
        RuleSetModel model = pmml.getRuleSetModelArray(0);
        localTrans = model.getLocalTransformations();
        if (localTrans == null) {
            localTrans = model.addNewLocalTransformations();
        }
    }
    if (localTrans != null) {
        DerivedField[] derivedFields = appendDerivedFields(localTrans.getDerivedFieldArray(), globalDerivedFields);
        localTrans.setDerivedFieldArray(derivedFields);
        // remove derived fields from TransformationDictionary
        transDict.setDerivedFieldArray(new DerivedField[0]);
    }
// else do nothing as no model exists yet
}
Also used : TreeModel(org.dmg.pmml.TreeModelDocument.TreeModel) RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) LocalTransformations(org.dmg.pmml.LocalTransformationsDocument.LocalTransformations) TransformationDictionary(org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary) GeneralRegressionModel(org.dmg.pmml.GeneralRegressionModelDocument.GeneralRegressionModel) PMML(org.dmg.pmml.PMMLDocument.PMML) NeuralNetwork(org.dmg.pmml.NeuralNetworkDocument.NeuralNetwork) SupportVectorMachineModel(org.dmg.pmml.SupportVectorMachineModelDocument.SupportVectorMachineModel) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) ClusteringModel(org.dmg.pmml.ClusteringModelDocument.ClusteringModel) GeneralRegressionModel(org.dmg.pmml.GeneralRegressionModelDocument.GeneralRegressionModel) RegressionModel(org.dmg.pmml.RegressionModelDocument.RegressionModel)

Aggregations

DerivedField (org.dmg.pmml.DerivedFieldDocument.DerivedField)41 ArrayList (java.util.ArrayList)12 FieldRef (org.dmg.pmml.FieldRefDocument.FieldRef)11 BigInteger (java.math.BigInteger)9 DerivedFieldMapper (org.knime.core.node.port.pmml.preproc.DerivedFieldMapper)8 MapValues (org.dmg.pmml.MapValuesDocument.MapValues)7 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 Apply (org.dmg.pmml.ApplyDocument.Apply)5 DiscretizeBin (org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin)5 Discretize (org.dmg.pmml.DiscretizeDocument.Discretize)5 Interval (org.dmg.pmml.IntervalDocument.Interval)5 NormDiscrete (org.dmg.pmml.NormDiscreteDocument.NormDiscrete)5 DataCell (org.knime.core.data.DataCell)5 DataType (org.knime.core.data.DataType)5 LinkedHashMap (java.util.LinkedHashMap)4 LinkedHashSet (java.util.LinkedHashSet)4 Map (java.util.Map)4 LocalTransformations (org.dmg.pmml.LocalTransformationsDocument.LocalTransformations)4 NeuralLayer (org.dmg.pmml.NeuralLayerDocument.NeuralLayer)4 NeuralOutput (org.dmg.pmml.NeuralOutputDocument.NeuralOutput)4