Search in sources :

Example 11 with DataColumnProperties

use of org.knime.core.data.DataColumnProperties in project knime-core by knime.

the class LogRegLearner method init.

/**
 * Initialize instance and check if settings are consistent.
 */
private void init(final DataTableSpec inSpec, final Set<String> exclude) throws InvalidSettingsException {
    List<String> inputCols = new ArrayList<String>();
    FilterResult includedColumns = m_settings.getIncludedColumns().applyTo(inSpec);
    for (String column : includedColumns.getIncludes()) {
        inputCols.add(column);
    }
    inputCols.remove(m_settings.getTargetColumn());
    if (inputCols.isEmpty()) {
        throw new InvalidSettingsException("At least one column must " + "be included.");
    }
    DataColumnSpec targetColSpec = null;
    List<DataColumnSpec> regressorColSpecs = new ArrayList<DataColumnSpec>();
    // Auto configuration when target is not set
    if (null == m_settings.getTargetColumn() && m_settings.getIncludedColumns().applyTo(inSpec).getExcludes().length == 0) {
        for (int i = 0; i < inSpec.getNumColumns(); i++) {
            DataColumnSpec colSpec = inSpec.getColumnSpec(i);
            String colName = colSpec.getName();
            inputCols.remove(colName);
            if (colSpec.getType().isCompatible(NominalValue.class)) {
                m_settings.setTargetColumn(colName);
            }
        }
        // when there is no column with nominal data
        if (null == m_settings.getTargetColumn()) {
            throw new InvalidSettingsException("No column in " + "spec compatible to \"NominalValue\".");
        }
    }
    // remove all columns that should not be used
    inputCols.removeAll(exclude);
    m_specialColumns = new LinkedList<>();
    for (int i = 0; i < inSpec.getNumColumns(); i++) {
        DataColumnSpec colSpec = inSpec.getColumnSpec(i);
        String colName = colSpec.getName();
        final DataType type = colSpec.getType();
        if (m_settings.getTargetColumn().equals(colName)) {
            if (type.isCompatible(NominalValue.class)) {
                targetColSpec = colSpec;
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not nominal.");
            }
        } else if (inputCols.contains(colName)) {
            if (type.isCompatible(DoubleValue.class) || type.isCompatible(NominalValue.class)) {
                regressorColSpecs.add(colSpec);
            } else if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class) || (type.isCollectionType() && type.getCollectionElementType().isCompatible(DoubleValue.class))) {
                m_specialColumns.add(colSpec);
                // We change the table spec later to encode it as a string.
                regressorColSpecs.add(new DataColumnSpecCreator(colSpec.getName(), StringCell.TYPE).createSpec());
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not one of the allowed types, " + "which are numeric or nomial.");
            }
        }
    }
    if (null != targetColSpec) {
        // Check if target has at least two categories.
        final Set<DataCell> targetValues = targetColSpec.getDomain().getValues();
        if (targetValues != null && targetValues.size() < 2) {
            throw new InvalidSettingsException("The target column \"" + targetColSpec.getName() + "\" has one value, only. " + "At least two target categories are expected.");
        }
        String[] learnerCols = new String[regressorColSpecs.size() + 1];
        for (int i = 0; i < regressorColSpecs.size(); i++) {
            learnerCols[i] = regressorColSpecs.get(i).getName();
        }
        learnerCols[learnerCols.length - 1] = targetColSpec.getName();
        final DataColumnSpec[] updatedSpecs = new DataColumnSpec[inSpec.getNumColumns()];
        for (int i = updatedSpecs.length; i-- > 0; ) {
            final DataColumnSpec columnSpec = inSpec.getColumnSpec(i);
            final DataType type = columnSpec.getType();
            if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class)) {
                final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(columnSpec.getName(), StringCell.TYPE);
                colSpecCreator.setProperties(new DataColumnProperties(Collections.singletonMap("realType", type.isCompatible(BitVectorValue.class) ? "BitVector" : "ByteVector")));
                updatedSpecs[i] = colSpecCreator.createSpec();
            } else {
                updatedSpecs[i] = columnSpec;
            }
        }
        DataTableSpec updated = new DataTableSpec(updatedSpecs);
        PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(updated);
        creator.setTargetCols(Arrays.asList(targetColSpec));
        creator.setLearningCols(regressorColSpecs);
        // creator.addPreprocColNames(m_specialColumns.stream().flatMap(spec -> ));
        m_pmmlOutSpec = creator.createSpec();
        m_learner = new Learner(m_pmmlOutSpec, m_specialColumns, m_settings.getTargetReferenceCategory(), m_settings.getSortTargetCategories(), m_settings.getSortIncludesCategories());
    } else {
        throw new InvalidSettingsException("The target is " + "not in the input.");
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) DataColumnProperties(org.knime.core.data.DataColumnProperties) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Aggregations

DataColumnProperties (org.knime.core.data.DataColumnProperties)11 DataColumnSpec (org.knime.core.data.DataColumnSpec)9 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)9 DataType (org.knime.core.data.DataType)4 DataTableSpec (org.knime.core.data.DataTableSpec)3 BitVectorValue (org.knime.core.data.vector.bitvector.BitVectorValue)3 ByteVectorValue (org.knime.core.data.vector.bytevector.ByteVectorValue)3 FilterResult (org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)3 ArrayList (java.util.ArrayList)2 DataCell (org.knime.core.data.DataCell)2 DoubleValue (org.knime.core.data.DoubleValue)2 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)2 PMMLPortObjectSpecCreator (org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)2 BigInteger (java.math.BigInteger)1 HashMap (java.util.HashMap)1 Hashtable (java.util.Hashtable)1 LinkedHashMap (java.util.LinkedHashMap)1 JsonObjectBuilder (javax.json.JsonObjectBuilder)1 Apply (org.dmg.pmml.ApplyDocument.Apply)1 Constant (org.dmg.pmml.ConstantDocument.Constant)1