Search in sources :

Example 21 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLBinningTranslator method initializeFrom.

/**
 * {@inheritDoc}
 */
@Override
public List<Integer> initializeFrom(final DerivedField[] derivedFields) {
    m_mapper = new DerivedFieldMapper(derivedFields);
    List<Integer> consumed = new ArrayList(derivedFields.length);
    for (int i = 0; i < derivedFields.length; i++) {
        DerivedField df = derivedFields[i];
        if (!df.isSetDiscretize()) {
            // only reading discretize entries other entries are skipped
            continue;
        }
        consumed.add(i);
        Discretize discretize = df.getDiscretize();
        DiscretizeBin[] pmmlBins = discretize.getDiscretizeBinArray();
        NumericBin[] knimeBins = new NumericBin[pmmlBins.length];
        for (int j = 0; j < pmmlBins.length; j++) {
            DiscretizeBin bin = pmmlBins[j];
            String binName = bin.getBinValue();
            Interval interval = bin.getInterval();
            double leftValue = interval.getLeftMargin();
            double rightValue = interval.getRightMargin();
            Closure.Enum closure = interval.getClosure();
            boolean leftOpen = true;
            boolean rightOpen = true;
            if (Closure.OPEN_CLOSED == closure) {
                rightOpen = false;
            } else if (Closure.CLOSED_OPEN == closure) {
                leftOpen = false;
            } else if (Closure.CLOSED_CLOSED == closure) {
                leftOpen = false;
                rightOpen = false;
            }
            knimeBins[j] = new NumericBin(binName, leftOpen, leftValue, rightOpen, rightValue);
        }
        /**
         * This field contains the name of the column in KNIME that
         * corresponds to the derived field in PMML. This is necessary if
         * derived fields are defined on other derived fields and the
         * columns in KNIME are replaced with the preprocessed values.
         * In this case KNIME has to know the original names (e.g. A) while
         * PMML references to A', A'' etc.
         */
        String displayName = df.getDisplayName();
        if (displayName != null) {
            m_columnToBins.put(displayName, knimeBins);
            m_columnToAppend.put(displayName, null);
        } else if (df.getName() != null) {
            String field = m_mapper.getColumnName(discretize.getField());
            m_columnToBins.put(field, knimeBins);
            m_columnToAppend.put(field, df.getName());
        }
    }
    return consumed;
}
Also used : Closure(org.dmg.pmml.IntervalDocument.Interval.Closure) ArrayList(java.util.ArrayList) DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) Discretize(org.dmg.pmml.DiscretizeDocument.Discretize) DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 22 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLBinningTranslator method createDerivedFields.

private DerivedField[] createDerivedFields() {
    int num = m_columnToBins.size();
    DerivedField[] derivedFields = new DerivedField[num];
    int i = 0;
    for (Map.Entry<String, Bin[]> entry : m_columnToBins.entrySet()) {
        Bin[] bins = entry.getValue();
        DerivedField df = DerivedField.Factory.newInstance();
        String name = entry.getKey();
        /* The field name must be retrieved before creating a new derived
             * name for this derived field as the map only contains the
             * current mapping. */
        String fieldName = m_mapper.getDerivedFieldName(name);
        Discretize dis = df.addNewDiscretize();
        dis.setField(fieldName);
        String derivedName = m_columnToAppend.get(name);
        if (derivedName != null) {
            df.setName(derivedName);
        } else {
            df.setName(m_mapper.createDerivedFieldName(name));
            df.setDisplayName(name);
        }
        df.setOptype(OPTYPE.CATEGORICAL);
        df.setDataType(DATATYPE.STRING);
        for (int j = 0; j < bins.length; j++) {
            NumericBin knimeBin = (NumericBin) bins[j];
            boolean leftOpen = knimeBin.isLeftOpen();
            boolean rightOpen = knimeBin.isRightOpen();
            double leftValue = knimeBin.getLeftValue();
            double rightValue = knimeBin.getRightValue();
            DiscretizeBin pmmlBin = dis.addNewDiscretizeBin();
            pmmlBin.setBinValue(knimeBin.getBinName());
            Interval interval = pmmlBin.addNewInterval();
            if (!Double.isInfinite(leftValue)) {
                interval.setLeftMargin(leftValue);
            }
            if (!Double.isInfinite(rightValue)) {
                interval.setRightMargin(rightValue);
            }
            if (leftOpen && rightOpen) {
                interval.setClosure(Closure.OPEN_OPEN);
            } else if (leftOpen && !rightOpen) {
                interval.setClosure(Closure.OPEN_CLOSED);
            } else if (!leftOpen && rightOpen) {
                interval.setClosure(Closure.CLOSED_OPEN);
            } else if (!leftOpen && !rightOpen) {
                interval.setClosure(Closure.CLOSED_CLOSED);
            }
        }
        derivedFields[i++] = df;
    }
    return derivedFields;
}
Also used : DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) Bin(org.knime.base.node.preproc.pmml.binner.BinnerColumnFactory.Bin) Discretize(org.dmg.pmml.DiscretizeDocument.Discretize) DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) TreeMap(java.util.TreeMap) Map(java.util.Map) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 23 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLMany2OneTranslator method createDerivedField.

private DerivedField createDerivedField() {
    final DerivedField derivedField = DerivedField.Factory.newInstance();
    derivedField.setName(m_appendedCol);
    derivedField.setDataType(DATATYPE.STRING);
    derivedField.setOptype(OPTYPE.CATEGORICAL);
    Apply parentApply = null;
    for (String col : m_sourceCols) {
        Apply ifApply;
        if (parentApply == null) {
            ifApply = derivedField.addNewApply();
        } else {
            ifApply = parentApply.addNewApply();
        }
        ifApply.setFunction("if");
        Apply innerIf = ifApply.addNewApply();
        innerIf.setFunction("equal");
        innerIf.addNewFieldRef().setField(col);
        if (m_method == IncludeMethod.Maximum || m_method == IncludeMethod.Minimum) {
            Apply a = innerIf.addNewApply();
            a.setFunction(IncludeMethod.Maximum == m_method ? "max" : "min");
            for (String s : m_sourceCols) {
                a.addNewFieldRef().setField(s);
            }
        } else {
            // if (m_method == IncludeMethod.Binary) {
            innerIf.addNewConstant().setStringValue("1");
        }
        ifApply.addNewConstant().setStringValue(col);
        parentApply = ifApply;
    }
    if (parentApply != null) {
        parentApply.addNewConstant().setStringValue("missing");
    }
    return derivedField;
}
Also used : Apply(org.dmg.pmml.ApplyDocument.Apply) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Example 24 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class MissingCellHandler method createExtensionDerivedField.

/**
 * Creates a derived field that contains an extension which
 * contains the name of the factory to use for the replacement.
 * The result may be adjusted to contain necessary information for the handler.
 * @param dataType the data type of the derived field
 * @param factoryID the id of the factory
 * @return the derived field
 */
protected DerivedField createExtensionDerivedField(final DATATYPE.Enum dataType, final String factoryID) {
    DerivedField field = DerivedField.Factory.newInstance();
    if (dataType == org.dmg.pmml.DATATYPE.STRING || dataType == org.dmg.pmml.DATATYPE.BOOLEAN) {
        field.setOptype(org.dmg.pmml.OPTYPE.CATEGORICAL);
    } else {
        field.setOptype(org.dmg.pmml.OPTYPE.CONTINUOUS);
    }
    Extension e = field.addNewExtension();
    e.setName(CUSTOM_HANDLER_EXTENSION_NAME);
    e.setValue(factoryID);
    field.setDataType(dataType);
    field.setName(m_col.getName());
    field.setDisplayName(m_col.getName());
    // Insert settings
    NodeSettings nodeSettings = new NodeSettings("");
    saveSettingsTo(nodeSettings);
    try {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        nodeSettings.saveToXML(baos);
        Document doc = javax.xml.parsers.DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new ByteArrayInputStream(baos.toByteArray()));
        Node copy = e.getDomNode().getOwnerDocument().importNode(doc.getFirstChild(), true);
        e.getDomNode().appendChild(copy);
    } catch (Exception ex) {
        LOGGER.error("An error occurred while writing settings to PMML.\n" + ex.getMessage());
        return null;
    }
    return field;
}
Also used : Extension(org.dmg.pmml.ExtensionDocument.Extension) NodeSettings(org.knime.core.node.NodeSettings) ByteArrayInputStream(java.io.ByteArrayInputStream) Node(org.w3c.dom.Node) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Document(org.w3c.dom.Document) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) InvalidSettingsException(org.knime.core.node.InvalidSettingsException)

Example 25 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class MissingCellHandler method createValueReplacingDerivedField.

/**
 * Helper method for creating a derived field that replaces a field's value with a fixed value.
 * @param dataType the data type of the field.
 * @param value the replacement value for the field
 * @return the derived field
 */
protected DerivedField createValueReplacingDerivedField(final DATATYPE.Enum dataType, final String value) {
    DerivedField field = DerivedField.Factory.newInstance();
    if (dataType == org.dmg.pmml.DATATYPE.STRING || dataType == org.dmg.pmml.DATATYPE.BOOLEAN) {
        field.setOptype(org.dmg.pmml.OPTYPE.CATEGORICAL);
    } else {
        field.setOptype(org.dmg.pmml.OPTYPE.CONTINUOUS);
    }
    /*
         * Create the PMML equivalent of: "if fieldVal is missing then x else fieldVal"
         * <Apply function="if">
         *    <Apply function="isMissing">
         *        <FieldRef field="fieldVal"/>
         *    </Apply>
         *    <Constant dataType="___" value="x"/>
         *    <FieldRef field="fieldVal"/>
         * </Apply>
         */
    Apply ifApply = field.addNewApply();
    ifApply.setFunction(IF_FUNCTION_NAME);
    Apply isMissingApply = Apply.Factory.newInstance();
    FieldRef fieldRef = FieldRef.Factory.newInstance();
    fieldRef.setField(m_col.getName());
    isMissingApply.setFieldRefArray(new FieldRef[] { fieldRef });
    isMissingApply.setFunction(IS_MISSING_FUNCTION_NAME);
    ifApply.setApplyArray(new Apply[] { isMissingApply });
    Constant replacement = Constant.Factory.newInstance();
    replacement.setDataType(dataType);
    replacement.setStringValue(value);
    ifApply.setConstantArray(new Constant[] { replacement });
    ifApply.setFieldRefArray(new FieldRef[] { fieldRef });
    field.setDataType(dataType);
    field.setName(m_col.getName());
    field.setDisplayName(m_col.getName());
    return field;
}
Also used : FieldRef(org.dmg.pmml.FieldRefDocument.FieldRef) Apply(org.dmg.pmml.ApplyDocument.Apply) Constant(org.dmg.pmml.ConstantDocument.Constant) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Aggregations

DerivedField (org.dmg.pmml.DerivedFieldDocument.DerivedField)41 ArrayList (java.util.ArrayList)12 FieldRef (org.dmg.pmml.FieldRefDocument.FieldRef)11 BigInteger (java.math.BigInteger)9 DerivedFieldMapper (org.knime.core.node.port.pmml.preproc.DerivedFieldMapper)8 MapValues (org.dmg.pmml.MapValuesDocument.MapValues)7 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 Apply (org.dmg.pmml.ApplyDocument.Apply)5 DiscretizeBin (org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin)5 Discretize (org.dmg.pmml.DiscretizeDocument.Discretize)5 Interval (org.dmg.pmml.IntervalDocument.Interval)5 NormDiscrete (org.dmg.pmml.NormDiscreteDocument.NormDiscrete)5 DataCell (org.knime.core.data.DataCell)5 DataType (org.knime.core.data.DataType)5 LinkedHashMap (java.util.LinkedHashMap)4 LinkedHashSet (java.util.LinkedHashSet)4 Map (java.util.Map)4 LocalTransformations (org.dmg.pmml.LocalTransformationsDocument.LocalTransformations)4 NeuralLayer (org.dmg.pmml.NeuralLayerDocument.NeuralLayer)4 NeuralOutput (org.dmg.pmml.NeuralOutputDocument.NeuralOutput)4