Search in sources :

Example 16 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLPortObject method addGlobalTransformations.

/**
 * Adds global transformations to the PMML document. Only DerivedField
 * elements are supported so far. If no global transformations are set so
 * far the dictionary is set as new transformation dictionary, otherwise
 * all contained transformations are appended to the existing one.
 *
 * @param dictionary the transformation dictionary that contains the
 *      transformations to be added
 */
public void addGlobalTransformations(final TransformationDictionary dictionary) {
    // add the transformations to the TransformationDictionary
    if (dictionary.getDefineFunctionArray().length > 0) {
        throw new IllegalArgumentException("DefineFunctions are not " + "supported so far. Only derived fields are allowed.");
    }
    TransformationDictionary dict = m_pmmlDoc.getPMML().getTransformationDictionary();
    if (dict == null) {
        m_pmmlDoc.getPMML().setTransformationDictionary(dictionary);
        dict = m_pmmlDoc.getPMML().getTransformationDictionary();
    } else {
        // append the transformations to the existing dictionary
        DerivedField[] existingFields = dict.getDerivedFieldArray();
        DerivedField[] result = appendDerivedFields(existingFields, dictionary.getDerivedFieldArray());
        dict.setDerivedFieldArray(result);
    }
    DerivedField[] df = dict.getDerivedFieldArray();
    List<String> colNames = new ArrayList<String>(df.length);
    Set<String> dfNames = new HashSet<String>();
    for (int i = 0; i < df.length; i++) {
        String derivedName = df[i].getName();
        if (dfNames.contains(derivedName)) {
            throw new IllegalArgumentException("Derived field name \"" + derivedName + "\" is not unique.");
        }
        dfNames.add(derivedName);
        String displayName = df[i].getDisplayName();
        colNames.add(displayName == null ? derivedName : displayName);
    }
    /* Remove data fields from data dictionary that where created as a
         * derived field. In KNIME the origin of columns is not distinguished
         * and all columns are added to the data dictionary. But in PMML this
         * results in duplicate entries. Those columns should only appear once
         * as derived field in the transformation dictionary or local
         * transformations. */
    DataDictionary dataDict = m_pmmlDoc.getPMML().getDataDictionary();
    DataField[] dataFieldArray = dataDict.getDataFieldArray();
    List<DataField> dataFields = new ArrayList<DataField>(Arrays.asList(dataFieldArray));
    for (DataField dataField : dataFieldArray) {
        if (dfNames.contains(dataField.getName())) {
            dataFields.remove(dataField);
        }
    }
    dataDict.setDataFieldArray(dataFields.toArray(new DataField[0]));
    // update the number of fields
    dataDict.setNumberOfFields(BigInteger.valueOf(dataFields.size()));
    // -------------------------------------------------
    // update field names in the model if applicable
    DerivedFieldMapper dfm = new DerivedFieldMapper(df);
    Map<String, String> derivedFieldMap = dfm.getDerivedFieldMap();
    /* Use XPATH to update field names in the model and move the derived
         * fields to local transformations. */
    PMML pmml = m_pmmlDoc.getPMML();
    if (pmml.getTreeModelArray().length > 0) {
        fixAttributeAtPath(pmml, TREE_PATH, FIELD, derivedFieldMap);
    } else if (pmml.getClusteringModelArray().length > 0) {
        fixAttributeAtPath(pmml, CLUSTERING_PATH, FIELD, derivedFieldMap);
    } else if (pmml.getNeuralNetworkArray().length > 0) {
        fixAttributeAtPath(pmml, NN_PATH, FIELD, derivedFieldMap);
    } else if (pmml.getSupportVectorMachineModelArray().length > 0) {
        fixAttributeAtPath(pmml, SVM_PATH, FIELD, derivedFieldMap);
    } else if (pmml.getRegressionModelArray().length > 0) {
        fixAttributeAtPath(pmml, REGRESSION_PATH_1, FIELD, derivedFieldMap);
        fixAttributeAtPath(pmml, REGRESSION_PATH_2, NAME, derivedFieldMap);
    } else if (pmml.getGeneralRegressionModelArray().length > 0) {
        fixAttributeAtPath(pmml, GR_PATH_1, NAME, derivedFieldMap);
        fixAttributeAtPath(pmml, GR_PATH_2, LABEL, derivedFieldMap);
        fixAttributeAtPath(pmml, GR_PATH_3, PREDICTOR_NAME, derivedFieldMap);
    }
    // else do nothing as no model exists yet
    // --------------------------------------------------
    PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(this, m_spec.getDataTableSpec());
    creator.addPreprocColNames(colNames);
    m_spec = creator.createSpec();
}
Also used : TransformationDictionary(org.dmg.pmml.TransformationDictionaryDocument.TransformationDictionary) ArrayList(java.util.ArrayList) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) DataField(org.dmg.pmml.DataFieldDocument.DataField) PMML(org.dmg.pmml.PMMLDocument.PMML) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 17 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLBinningTranslator method createDerivedFields.

private DerivedField[] createDerivedFields() {
    final int num = m_columnToBins.size();
    final DerivedField[] derivedFields = new DerivedField[num];
    int i = 0;
    for (Map.Entry<String, Bin[]> entry : m_columnToBins.entrySet()) {
        final Bin[] bins = entry.getValue();
        final DerivedField df = DerivedField.Factory.newInstance();
        final String name = entry.getKey();
        /* The field name must be retrieved before creating a new derived
             * name for this derived field as the map only contains the
             * current mapping. */
        final String fieldName = m_mapper.getDerivedFieldName(name);
        final Discretize dis = df.addNewDiscretize();
        dis.setField(fieldName);
        final String derivedName = m_columnToAppend.get(name);
        if (derivedName != null) {
            df.setName(derivedName);
        } else {
            df.setName(m_mapper.createDerivedFieldName(name));
            df.setDisplayName(name);
        }
        df.setOptype(OPTYPE.CATEGORICAL);
        df.setDataType(DATATYPE.STRING);
        for (Bin bin : bins) {
            final NumericBin knimeBin = (NumericBin) bin;
            final boolean leftOpen = knimeBin.isLeftOpen();
            final boolean rightOpen = knimeBin.isRightOpen();
            final double leftValue = knimeBin.getLeftValue();
            final double rightValue = knimeBin.getRightValue();
            final DiscretizeBin pmmlBin = dis.addNewDiscretizeBin();
            pmmlBin.setBinValue(knimeBin.getBinName());
            final Interval interval = pmmlBin.addNewInterval();
            if (!Double.isInfinite(leftValue)) {
                interval.setLeftMargin(leftValue);
            }
            if (!Double.isInfinite(rightValue)) {
                interval.setRightMargin(rightValue);
            }
            if (leftOpen && rightOpen) {
                interval.setClosure(Closure.OPEN_OPEN);
            } else if (leftOpen && !rightOpen) {
                interval.setClosure(Closure.OPEN_CLOSED);
            } else if (!leftOpen && rightOpen) {
                interval.setClosure(Closure.CLOSED_OPEN);
            } else if (!leftOpen && !rightOpen) {
                interval.setClosure(Closure.CLOSED_CLOSED);
            }
        }
        derivedFields[i++] = df;
    }
    return derivedFields;
}
Also used : DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) Discretize(org.dmg.pmml.DiscretizeDocument.Discretize) DiscretizeBin(org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) TreeMap(java.util.TreeMap) Map(java.util.Map) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 18 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLStringConversionTranslator method createDerivedFields.

private DerivedField[] createDerivedFields() {
    DATATYPE.Enum dataType = PMMLDataDictionaryTranslator.getPMMLDataType(m_parseType);
    OPTYPE.Enum optype = PMMLDataDictionaryTranslator.getOptype(m_parseType);
    int num = m_includeCols.size();
    DerivedField[] derivedFields = new DerivedField[num];
    for (int i = 0; i < num; i++) {
        DerivedField df = DerivedField.Factory.newInstance();
        String name = m_includeCols.get(i);
        df.setDisplayName(name);
        /* The field name must be retrieved before creating a new derived
             * name for this derived field as the map only contains the
             * current mapping. */
        String fieldName = m_mapper.getDerivedFieldName(name);
        df.setName(m_mapper.createDerivedFieldName(name));
        df.setDataType(dataType);
        df.setOptype(optype);
        FieldRef fieldRef = df.addNewFieldRef();
        fieldRef.setField(fieldName);
        derivedFields[i] = df;
    }
    return derivedFields;
}
Also used : DATATYPE(org.dmg.pmml.DATATYPE) FieldRef(org.dmg.pmml.FieldRefDocument.FieldRef) OPTYPE(org.dmg.pmml.OPTYPE) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Example 19 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class PMMLStringConversionTranslator method initializeFrom.

/**
 * {@inheritDoc}
 */
@Override
@SuppressWarnings("unchecked")
public List<Integer> initializeFrom(final DerivedField[] derivedFields) {
    if (derivedFields == null) {
        return Collections.EMPTY_LIST;
    }
    int num = derivedFields.length;
    List<Integer> consumed = new ArrayList<Integer>(num);
    for (int i = 0; i < derivedFields.length; i++) {
        DerivedField df = derivedFields[i];
        /**
         * This field contains the name of the column in KNIME that
         * corresponds to the derived field in PMML. This is necessary if
         * derived fields are defined on other derived fields and the
         * columns in KNIME are replaced with the preprocessed values.
         * In this case KNIME has to know the original names (e.g. A) while
         * PMML references to A*, A** etc.
         */
        String displayName = df.getDisplayName();
        if (!df.isSetFieldRef()) {
            // only reading field references
            continue;
        }
        DataType dataType = PMMLDataDictionaryTranslator.getKNIMEDataType(df.getDataType());
        if (dataType.isCompatible(IntValue.class)) {
            m_parseType = IntCell.TYPE;
        } else if (dataType.isCompatible(DoubleValue.class)) {
            m_parseType = DoubleCell.TYPE;
        } else if (dataType == StringCell.TYPE) {
            m_parseType = StringCell.TYPE;
        } else {
            // only processing int, double and string conversions
            continue;
        }
        FieldRef fieldRef = df.getFieldRef();
        if (displayName != null) {
            m_includeCols.add(displayName);
        } else {
            m_includeCols.add(m_mapper.getColumnName(fieldRef.getField()));
        }
        consumed.add(i);
    }
    return consumed;
}
Also used : FieldRef(org.dmg.pmml.FieldRefDocument.FieldRef) DoubleValue(org.knime.core.data.DoubleValue) ArrayList(java.util.ArrayList) DataType(org.knime.core.data.DataType) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Example 20 with DerivedField

use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.

the class DataColumnSpecFilterPMMLNodeModel method createPMMLOut.

private PMMLPortObject createPMMLOut(final PMMLPortObject pmmlIn, final DataTableSpec outSpec, final FilterResult res) throws XmlException {
    StringBuffer warningBuffer = new StringBuffer();
    if (pmmlIn == null) {
        return new PMMLPortObject(createPMMLSpec(null, outSpec, res));
    } else {
        PMMLDocument pmmldoc;
        try (LockedSupplier<Document> supplier = pmmlIn.getPMMLValue().getDocumentSupplier()) {
            pmmldoc = PMMLDocument.Factory.parse(supplier.get());
        }
        // Inspect models to check if they use any excluded columns
        List<PMMLModelWrapper> models = PMMLModelWrapper.getModelListFromPMMLDocument(pmmldoc);
        for (PMMLModelWrapper model : models) {
            MiningSchema ms = model.getMiningSchema();
            for (MiningField mf : ms.getMiningFieldList()) {
                if (isExcluded(mf.getName(), res)) {
                    if (warningBuffer.length() != 0) {
                        warningBuffer.append("\n");
                    }
                    warningBuffer.append(model.getModelType().name() + " uses excluded column " + mf.getName());
                }
            }
        }
        ArrayList<String> warningFields = new ArrayList<String>();
        PMML pmml = pmmldoc.getPMML();
        // Now check the transformations if they exist
        if (pmml.getTransformationDictionary() != null) {
            for (DerivedField df : pmml.getTransformationDictionary().getDerivedFieldList()) {
                FieldRef fr = df.getFieldRef();
                if (fr != null && isExcluded(fr.getField(), res)) {
                    warningFields.add(fr.getField());
                }
                Aggregate a = df.getAggregate();
                if (a != null && isExcluded(a.getField(), res)) {
                    warningFields.add(a.getField());
                }
                Apply ap = df.getApply();
                if (ap != null) {
                    for (FieldRef fieldRef : ap.getFieldRefList()) {
                        if (isExcluded(fieldRef.getField(), res)) {
                            warningFields.add(fieldRef.getField());
                            break;
                        }
                    }
                }
                Discretize d = df.getDiscretize();
                if (d != null && isExcluded(d.getField(), res)) {
                    warningFields.add(d.getField());
                }
                MapValues mv = df.getMapValues();
                if (mv != null) {
                    for (FieldColumnPair fcp : mv.getFieldColumnPairList()) {
                        if (isExcluded(fcp.getField(), res)) {
                            warningFields.add(fcp.getField());
                        }
                    }
                }
                NormContinuous nc = df.getNormContinuous();
                if (nc != null && isExcluded(nc.getField(), res)) {
                    warningFields.add(nc.getField());
                }
                NormDiscrete nd = df.getNormDiscrete();
                if (nd != null && isExcluded(nd.getField(), res)) {
                    warningFields.add(nd.getField());
                }
            }
        }
        DataDictionary dict = pmml.getDataDictionary();
        List<DataField> fields = dict.getDataFieldList();
        // Apply filter to spec
        int numFields = 0;
        for (int i = fields.size() - 1; i >= 0; i--) {
            if (isExcluded(fields.get(i).getName(), res)) {
                dict.removeDataField(i);
            } else {
                numFields++;
            }
        }
        dict.setNumberOfFields(new BigInteger(Integer.toString(numFields)));
        pmml.setDataDictionary(dict);
        pmmldoc.setPMML(pmml);
        // generate warnings and set as warning message
        for (String s : warningFields) {
            if (warningBuffer.length() != 0) {
                warningBuffer.append("\n");
            }
            warningBuffer.append("Transformation dictionary uses excluded column " + s);
        }
        if (warningBuffer.length() > 0) {
            setWarningMessage(warningBuffer.toString().trim());
        }
        PMMLPortObject outport = null;
        try {
            outport = new PMMLPortObject(createPMMLSpec(pmmlIn.getSpec(), outSpec, res), pmmldoc);
        } catch (IllegalArgumentException e) {
            if (res.getIncludes().length == 0) {
                throw new IllegalArgumentException("Excluding all columns produces invalid PMML", e);
            } else {
                throw e;
            }
        }
        return outport;
    }
}
Also used : MiningField(org.dmg.pmml.MiningFieldDocument.MiningField) NormContinuous(org.dmg.pmml.NormContinuousDocument.NormContinuous) Apply(org.dmg.pmml.ApplyDocument.Apply) ArrayList(java.util.ArrayList) FieldColumnPair(org.dmg.pmml.FieldColumnPairDocument.FieldColumnPair) PMMLDocument(org.dmg.pmml.PMMLDocument) Document(org.w3c.dom.Document) MapValues(org.dmg.pmml.MapValuesDocument.MapValues) Discretize(org.dmg.pmml.DiscretizeDocument.Discretize) FieldRef(org.dmg.pmml.FieldRefDocument.FieldRef) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) PMMLModelWrapper(org.knime.core.node.port.pmml.PMMLModelWrapper) NormDiscrete(org.dmg.pmml.NormDiscreteDocument.NormDiscrete) MiningSchema(org.dmg.pmml.MiningSchemaDocument.MiningSchema) DataField(org.dmg.pmml.DataFieldDocument.DataField) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMML(org.dmg.pmml.PMMLDocument.PMML) BigInteger(java.math.BigInteger) PMMLDocument(org.dmg.pmml.PMMLDocument) Aggregate(org.dmg.pmml.AggregateDocument.Aggregate) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Aggregations

DerivedField (org.dmg.pmml.DerivedFieldDocument.DerivedField)41 ArrayList (java.util.ArrayList)12 FieldRef (org.dmg.pmml.FieldRefDocument.FieldRef)11 BigInteger (java.math.BigInteger)9 DerivedFieldMapper (org.knime.core.node.port.pmml.preproc.DerivedFieldMapper)8 MapValues (org.dmg.pmml.MapValuesDocument.MapValues)7 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 Apply (org.dmg.pmml.ApplyDocument.Apply)5 DiscretizeBin (org.dmg.pmml.DiscretizeBinDocument.DiscretizeBin)5 Discretize (org.dmg.pmml.DiscretizeDocument.Discretize)5 Interval (org.dmg.pmml.IntervalDocument.Interval)5 NormDiscrete (org.dmg.pmml.NormDiscreteDocument.NormDiscrete)5 DataCell (org.knime.core.data.DataCell)5 DataType (org.knime.core.data.DataType)5 LinkedHashMap (java.util.LinkedHashMap)4 LinkedHashSet (java.util.LinkedHashSet)4 Map (java.util.Map)4 LocalTransformations (org.dmg.pmml.LocalTransformationsDocument.LocalTransformations)4 NeuralLayer (org.dmg.pmml.NeuralLayerDocument.NeuralLayer)4 NeuralOutput (org.dmg.pmml.NeuralOutputDocument.NeuralOutput)4