Search in sources :

Example 6 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class CategoryToNumberApplyNodeModel method createRearranger.

/**
 * Creates a rearranger that processes the derived fields with MapValues
 * in the given model.
 */
private ColumnRearranger createRearranger(final DataTableSpec spec, final PMMLPortObject model) {
    // Retrieve columns with string data in the spec
    Set<String> stringCols = new LinkedHashSet<String>();
    Set<String> otherCols = new LinkedHashSet<String>();
    for (DataColumnSpec colSpec : spec) {
        if (colSpec.getType().isCompatible(StringValue.class)) {
            stringCols.add(colSpec.getName());
        } else {
            otherCols.add(colSpec.getName());
        }
    }
    if (stringCols.isEmpty()) {
        if (null == model) {
            // during configure
            setWarningMessage("No columns to process.");
        } else {
            // during execute
            setWarningMessage("No columns to process, returning input.");
        }
    }
    // The map values in the model if present
    Map<String, DerivedField> mapValues = null != model ? getMapValues(model) : Collections.<String, DerivedField>emptyMap();
    // Create rearranger
    ColumnRearranger rearranger = new ColumnRearranger(spec);
    for (String col : mapValues.keySet()) {
        DerivedField derivedField = mapValues.get(col);
        MapValues map = derivedField.getMapValues();
        // this PMML MapValues model is found but has wrong type.
        if (null != model && otherCols.contains(col)) {
            String outColumn = null == derivedField.getDisplayName() || derivedField.getDisplayName().trim().isEmpty() ? derivedField.getName() : derivedField.getDisplayName();
            LOGGER.warn("Cannot create column \"" + outColumn + "\" since the input column \"" + col + "\" is not of type StringValue.");
            continue;
        }
        // this PMML MapValues model.
        if (null != model && !stringCols.contains(col)) {
            String outColumn = null == derivedField.getDisplayName() || derivedField.getDisplayName().trim().isEmpty() ? derivedField.getName() : derivedField.getDisplayName();
            LOGGER.warn("Cannot create column \"" + outColumn + "\" since the column \"" + col + "\" is not in the input.");
            continue;
        }
        CategoryToNumberApplyCellFactory factory = new CategoryToNumberApplyCellFactory(spec, col, m_settings, map);
        if (m_settings.getAppendColumns()) {
            rearranger.append(factory);
        } else {
            rearranger.replace(factory, col);
        }
    }
    return rearranger;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) MapValues(org.dmg.pmml.MapValuesDocument.MapValues) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Example 7 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLMapValuesTranslator method createDerivedFields.

private DerivedField[] createDerivedFields() {
    DerivedField df = DerivedField.Factory.newInstance();
    df.setExtensionArray(createSummaryExtension());
    /* The field name must be retrieved before creating a new derived
         * name for this derived field as the map only contains the
         * current mapping. */
    String fieldName = m_mapper.getDerivedFieldName(m_config.getInColumn());
    if (m_config.getInColumn().equals(m_config.getOutColumn())) {
        String name = m_config.getInColumn();
        df.setDisplayName(name);
        df.setName(m_mapper.createDerivedFieldName(name));
    } else {
        df.setName(m_config.getOutColumn());
    }
    df.setOptype(m_config.getOpType());
    df.setDataType(m_config.getOutDataType());
    MapValues mapValues = df.addNewMapValues();
    // the element in the InlineTable representing the output column
    // Use dummy name instead of m_config.getOutColumn() since the
    // input column could contain characters that are not allowed in XML
    final QName xmlOut = new QName("http://www.dmg.org/PMML-4_0", "out");
    mapValues.setOutputColumn(xmlOut.getLocalPart());
    mapValues.setDataType(m_config.getOutDataType());
    if (!m_config.getDefaultValue().isMissing()) {
        mapValues.setDefaultValue(m_config.getDefaultValue().toString());
    }
    if (!m_config.getMapMissingTo().isMissing()) {
        mapValues.setMapMissingTo(m_config.getMapMissingTo().toString());
    }
    // the mapping of input field <-> element in the InlineTable
    FieldColumnPair fieldColPair = mapValues.addNewFieldColumnPair();
    fieldColPair.setField(fieldName);
    // Use dummy name instead of m_config.getInColumn() since the
    // input column could contain characters that are not allowed in XML
    final QName xmlIn = new QName("http://www.dmg.org/PMML-4_0", "in");
    fieldColPair.setColumn(xmlIn.getLocalPart());
    InlineTable table = mapValues.addNewInlineTable();
    for (Entry<DataCell, ? extends DataCell> entry : m_config.getEntries().entrySet()) {
        Row row = table.addNewRow();
        XmlCursor cursor = row.newCursor();
        cursor.toNextToken();
        cursor.insertElementWithText(xmlIn, entry.getKey().toString());
        cursor.insertElementWithText(xmlOut, entry.getValue().toString());
        cursor.dispose();
    }
    return new DerivedField[] { df };
}
Also used : InlineTable(org.dmg.pmml.InlineTableDocument.InlineTable) MapValues(org.dmg.pmml.MapValuesDocument.MapValues) QName(javax.xml.namespace.QName) DataCell(org.knime.core.data.DataCell) FieldColumnPair(org.dmg.pmml.FieldColumnPairDocument.FieldColumnPair) Row(org.dmg.pmml.RowDocument.Row) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) XmlCursor(org.apache.xmlbeans.XmlCursor)

Example 8 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class NumberToCategoryApplyNodeModel method getMapValues.

/**
 * @param model the PMML model
 * @return the field in the first FieldColumnPair of the MapValues mapped
 * to the MapValues Model
 */
private Map<String, DerivedField> getMapValues(final PMMLPortObject model) {
    Map<String, DerivedField> mapValues = new LinkedHashMap<String, DerivedField>();
    DerivedField[] derivedFields = model.getDerivedFields();
    for (DerivedField derivedField : derivedFields) {
        MapValues map = derivedField.getMapValues();
        if (null != map) {
            // This is the field name the mapValues is based on
            String name = derivedField.getDisplayName();
            if (name == null) {
                name = derivedField.getName();
            }
            mapValues.put(name, derivedField);
        }
    }
    return mapValues;
}
Also used : MapValues(org.dmg.pmml.MapValuesDocument.MapValues) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) LinkedHashMap(java.util.LinkedHashMap)

Example 9 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLOne2ManyTranslator method createDerivedFields.

private DerivedField[] createDerivedFields() {
    List<DerivedField> derivedFields = new ArrayList<DerivedField>();
    for (Map.Entry<String, List<Pair<String, String>>> entry : m_columnMapping.entrySet()) {
        String columnName = entry.getKey();
        String derivedName = m_mapper.getDerivedFieldName(columnName);
        for (Pair<String, String> nameValue : entry.getValue()) {
            DerivedField derivedField = DerivedField.Factory.newInstance();
            derivedField.setName(nameValue.getFirst());
            derivedField.setOptype(OPTYPE.ORDINAL);
            derivedField.setDataType(DATATYPE.INTEGER);
            NormDiscrete normDiscrete = derivedField.addNewNormDiscrete();
            normDiscrete.setField(derivedName);
            normDiscrete.setValue(nameValue.getSecond());
            normDiscrete.setMapMissingTo(0);
            derivedFields.add(derivedField);
        }
    }
    return derivedFields.toArray(new DerivedField[0]);
}
Also used : NormDiscrete(org.dmg.pmml.NormDiscreteDocument.NormDiscrete) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) Map(java.util.Map)

Example 10 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class MissingCellReplacingDataTable method prepareHandlers.

/**
 * @param inTableSpec
 * @param pmmlDoc
 * @return
 * @throws InvalidSettingsException
 */
private MissingCellHandler[] prepareHandlers(final DataTableSpec inTableSpec, final PMMLDocument pmmlDoc) throws InvalidSettingsException {
    MissingCellHandler[] handlers = new MissingCellHandler[inTableSpec.getNumColumns()];
    if (pmmlDoc.getPMML().getTransformationDictionary() == null || pmmlDoc.getPMML().getTransformationDictionary().getDerivedFieldList().size() == 0) {
        for (int i = 0; i < inTableSpec.getNumColumns(); i++) {
            handlers[i] = DoNothingMissingCellHandlerFactory.getInstance().createHandler(inTableSpec.getColumnSpec(i));
        }
        return handlers;
    }
    DerivedFieldMapper mapper = new DerivedFieldMapper(pmmlDoc);
    Map<String, DerivedField> derivedFields = new HashMap<>();
    for (DerivedField df : pmmlDoc.getPMML().getTransformationDictionary().getDerivedFieldList()) {
        String name = mapper.getColumnName(df.getName());
        derivedFields.put(name, df);
    }
    for (int i = 0; i < inTableSpec.getNumColumns(); i++) {
        DataColumnSpec spec = inTableSpec.getColumnSpec(i);
        handlers[i] = createHandlerForColumn(spec, derivedFields.get(spec.getName()));
    }
    return handlers;
}
Also used : DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) DataColumnSpec(org.knime.core.data.DataColumnSpec) HashMap(java.util.HashMap) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Aggregations

DerivedField (org.dmg.pmml.DerivedFieldDocument.DerivedField)41 ArrayList (java.util.ArrayList)12 FieldRef (org.dmg.pmml.FieldRefDocument.FieldRef)11 BigInteger (java.math.BigInteger)9 DerivedFieldMapper (org.knime.core.node.port.pmml.preproc.DerivedFieldMapper)8 MapValues (org.dmg.pmml.MapValuesDocument.MapValues)7 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 Apply (org.dmg.pmml.ApplyDocument.Apply)5 DiscretizeBin (org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin)5 Discretize (org.dmg.pmml.DiscretizeDocument.Discretize)5 Interval (org.dmg.pmml.IntervalDocument.Interval)5 NormDiscrete (org.dmg.pmml.NormDiscreteDocument.NormDiscrete)5 DataCell (org.knime.core.data.DataCell)5 DataType (org.knime.core.data.DataType)5 LinkedHashMap (java.util.LinkedHashMap)4 LinkedHashSet (java.util.LinkedHashSet)4 Map (java.util.Map)4 LocalTransformations (org.dmg.pmml.LocalTransformationsDocument.LocalTransformations)4 NeuralLayer (org.dmg.pmml.NeuralLayerDocument.NeuralLayer)4 NeuralOutput (org.dmg.pmml.NeuralOutputDocument.NeuralOutput)4