Search in sources :

Example 1 with Value

use of org.dmg.pmml.ValueDocument.Value in project knime-core by knime.

the class PMMLRuleTranslator method initDataDictionary.

/**
 * Inits {@link #m_dataDictionary} based on the {@code pmmlDoc} document.
 *
 * @param pmmlDoc A {@link PMMLDocument}.
 */
private void initDataDictionary(final PMMLDocument pmmlDoc) {
    DataDictionary dd = pmmlDoc.getPMML().getDataDictionary();
    if (dd == null) {
        m_dataDictionary = Collections.emptyMap();
        return;
    }
    Map<String, List<String>> dataDictionary = new LinkedHashMap<String, List<String>>(dd.sizeOfDataFieldArray() * 2);
    for (DataField df : dd.getDataFieldList()) {
        List<String> list = new ArrayList<String>(df.sizeOfValueArray());
        for (Value val : df.getValueList()) {
            list.add(val.getValue());
        }
        dataDictionary.put(df.getName(), Collections.unmodifiableList(list));
    }
    m_dataDictionary = Collections.unmodifiableMap(dataDictionary);
}
Also used : DataField(org.dmg.pmml.DataFieldDocument.DataField) ArrayList(java.util.ArrayList) Value(org.dmg.pmml.ValueDocument.Value) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) LinkedHashMap(java.util.LinkedHashMap)

Example 2 with Value

use of org.dmg.pmml.ValueDocument.Value in project knime-core by knime.

the class PMMLDataDictionaryTranslator method addColSpecsForDataFields.

/**
 * @param pmmlDoc the PMML document to analyze
 * @param colSpecs the list to add the data column specs to
 */
private void addColSpecsForDataFields(final PMMLDocument pmmlDoc, final List<DataColumnSpec> colSpecs) {
    DataDictionary dict = pmmlDoc.getPMML().getDataDictionary();
    for (DataField dataField : dict.getDataFieldArray()) {
        String name = dataField.getName();
        DataType dataType = getKNIMEDataType(dataField.getDataType());
        DataColumnSpecCreator specCreator = new DataColumnSpecCreator(name, dataType);
        DataColumnDomain domain = null;
        if (dataType.isCompatible(NominalValue.class)) {
            Value[] valueArray = dataField.getValueArray();
            DataCell[] cells;
            if (DataType.getType(StringCell.class).equals(dataType)) {
                if (dataField.getIntervalArray().length > 0) {
                    throw new IllegalArgumentException("Intervals cannot be defined for Strings.");
                }
                cells = new StringCell[valueArray.length];
                if (valueArray.length > 0) {
                    for (int j = 0; j < cells.length; j++) {
                        cells[j] = new StringCell(valueArray[j].getValue());
                    }
                }
                domain = new DataColumnDomainCreator(cells).createDomain();
            }
        } else if (dataType.isCompatible(DoubleValue.class)) {
            Double leftMargin = null;
            Double rightMargin = null;
            Interval[] intervalArray = dataField.getIntervalArray();
            if (intervalArray != null && intervalArray.length > 0) {
                Interval interval = dataField.getIntervalArray(0);
                leftMargin = interval.getLeftMargin();
                rightMargin = interval.getRightMargin();
            } else if (dataField.getValueArray() != null && dataField.getValueArray().length > 0) {
                // try to derive the bounds from the values
                Value[] valueArray = dataField.getValueArray();
                List<Double> values = new ArrayList<Double>();
                for (int j = 0; j < valueArray.length; j++) {
                    String value = "";
                    try {
                        value = valueArray[j].getValue();
                        values.add(Double.parseDouble(value));
                    } catch (Exception e) {
                        throw new IllegalArgumentException("Skipping domain calculation. " + "Value \"" + value + "\" cannot be cast to double.");
                    }
                }
                leftMargin = Collections.min(values);
                rightMargin = Collections.max(values);
            }
            if (leftMargin != null && rightMargin != null) {
                // set the bounds of the domain if available
                DataCell lowerBound = null;
                DataCell upperBound = null;
                if (DataType.getType(IntCell.class).equals(dataType)) {
                    lowerBound = new IntCell(leftMargin.intValue());
                    upperBound = new IntCell(rightMargin.intValue());
                } else if (DataType.getType(DoubleCell.class).equals(dataType)) {
                    lowerBound = new DoubleCell(leftMargin);
                    upperBound = new DoubleCell(rightMargin);
                }
                domain = new DataColumnDomainCreator(lowerBound, upperBound).createDomain();
            } else {
                domain = new DataColumnDomainCreator().createDomain();
            }
        }
        specCreator.setDomain(domain);
        colSpecs.add(specCreator.createSpec());
        m_dictFields.add(name);
    }
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) IntCell(org.knime.core.data.def.IntCell) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataField(org.dmg.pmml.DataFieldDocument.DataField) StringCell(org.knime.core.data.def.StringCell) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) BooleanValue(org.knime.core.data.BooleanValue) IntValue(org.knime.core.data.IntValue) Value(org.dmg.pmml.ValueDocument.Value) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 3 with Value

use of org.dmg.pmml.ValueDocument.Value in project knime-core by knime.

the class PMMLDataDictionaryTranslator method exportTo.

/**
 * Adds a data dictionary to the PMML document based on the
 * {@link DataTableSpec}.
 *
 * @param pmmlDoc the PMML document to export to
 * @param dts the data table spec
 * @return the schema type of the exported schema if applicable, otherwise
 *         null
 * @see #exportTo(PMMLDocument, PMMLPortObjectSpec)
 */
public SchemaType exportTo(final PMMLDocument pmmlDoc, final DataTableSpec dts) {
    DataDictionary dict = DataDictionary.Factory.newInstance();
    dict.setNumberOfFields(BigInteger.valueOf(dts.getNumColumns()));
    DataField dataField;
    for (DataColumnSpec colSpec : dts) {
        dataField = dict.addNewDataField();
        dataField.setName(colSpec.getName());
        DataType dataType = colSpec.getType();
        dataField.setOptype(getOptype(dataType));
        dataField.setDataType(getPMMLDataType(dataType));
        // Value
        if (colSpec.getType().isCompatible(NominalValue.class) && colSpec.getDomain().hasValues()) {
            for (DataCell possVal : colSpec.getDomain().getValues()) {
                Value value = dataField.addNewValue();
                value.setValue(possVal.toString());
            }
        } else if (colSpec.getType().isCompatible(DoubleValue.class) && colSpec.getDomain().hasBounds()) {
            Interval interval = dataField.addNewInterval();
            interval.setClosure(Interval.Closure.CLOSED_CLOSED);
            interval.setLeftMargin(((DoubleValue) colSpec.getDomain().getLowerBound()).getDoubleValue());
            interval.setRightMargin(((DoubleValue) colSpec.getDomain().getUpperBound()).getDoubleValue());
        }
    }
    pmmlDoc.getPMML().setDataDictionary(dict);
    // no schematype available yet
    return null;
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataField(org.dmg.pmml.DataFieldDocument.DataField) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) NominalValue(org.knime.core.data.NominalValue) BooleanValue(org.knime.core.data.BooleanValue) IntValue(org.knime.core.data.IntValue) Value(org.dmg.pmml.ValueDocument.Value) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) Interval(org.dmg.pmml.IntervalDocument.Interval)

Aggregations

DataDictionary (org.dmg.pmml.DataDictionaryDocument.DataDictionary)3 DataField (org.dmg.pmml.DataFieldDocument.DataField)3 Value (org.dmg.pmml.ValueDocument.Value)3 ArrayList (java.util.ArrayList)2 Interval (org.dmg.pmml.IntervalDocument.Interval)2 BooleanValue (org.knime.core.data.BooleanValue)2 DataCell (org.knime.core.data.DataCell)2 DataType (org.knime.core.data.DataType)2 DoubleValue (org.knime.core.data.DoubleValue)2 IntValue (org.knime.core.data.IntValue)2 NominalValue (org.knime.core.data.NominalValue)2 LinkedHashMap (java.util.LinkedHashMap)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 DataColumnDomain (org.knime.core.data.DataColumnDomain)1 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)1 DataColumnSpec (org.knime.core.data.DataColumnSpec)1 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)1 DoubleCell (org.knime.core.data.def.DoubleCell)1 IntCell (org.knime.core.data.def.IntCell)1