Search in sources :

Example 6 with DataColumnProperties

use of org.knime.core.data.DataColumnProperties in project knime-core by knime.

the class MissingValueHandling2NodeDialogPane method createAsIncompatibleMarkedColumnSpec.

/**
 * @param dataColumnSpec
 * @return
 */
private static DataColumnSpec createAsIncompatibleMarkedColumnSpec(final DataColumnSpec originalSpec) {
    DataColumnSpecCreator creator = new DataColumnSpecCreator(originalSpec);
    final DataColumnProperties origProps = originalSpec.getProperties();
    final Map<String, String> map = createIncompatiblePropertiesMap();
    final DataColumnProperties props;
    if (origProps != null) {
        props = origProps.cloneAndOverwrite(map);
    } else {
        props = new DataColumnProperties(map);
    }
    creator.setProperties(props);
    final DataColumnSpec invalidSpec = creator.createSpec();
    return invalidSpec;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnProperties(org.knime.core.data.DataColumnProperties)

Example 7 with DataColumnProperties

use of org.knime.core.data.DataColumnProperties in project knime-core by knime.

the class LogRegCoordinator method init.

/**
 * Initialize instance and check if settings are consistent.
 */
private void init(final DataTableSpec inSpec, final Set<String> exclude) throws InvalidSettingsException {
    List<String> inputCols = new ArrayList<String>();
    FilterResult includedColumns = m_settings.getIncludedColumns().applyTo(inSpec);
    for (String column : includedColumns.getIncludes()) {
        inputCols.add(column);
    }
    inputCols.remove(m_settings.getTargetColumn());
    if (inputCols.isEmpty()) {
        throw new InvalidSettingsException("At least one column must " + "be included.");
    }
    DataColumnSpec targetColSpec = null;
    List<DataColumnSpec> regressorColSpecs = new ArrayList<DataColumnSpec>();
    // Auto configuration when target is not set
    if (null == m_settings.getTargetColumn() && m_settings.getIncludedColumns().applyTo(inSpec).getExcludes().length == 0) {
        for (int i = 0; i < inSpec.getNumColumns(); i++) {
            DataColumnSpec colSpec = inSpec.getColumnSpec(i);
            String colName = colSpec.getName();
            inputCols.remove(colName);
            if (colSpec.getType().isCompatible(NominalValue.class)) {
                m_settings.setTargetColumn(colName);
            }
        }
        // when there is no column with nominal data
        if (null == m_settings.getTargetColumn()) {
            throw new InvalidSettingsException("No column in " + "spec compatible to \"NominalValue\".");
        }
    }
    // remove all columns that should not be used
    inputCols.removeAll(exclude);
    m_specialColumns = new LinkedList<>();
    for (int i = 0; i < inSpec.getNumColumns(); i++) {
        DataColumnSpec colSpec = inSpec.getColumnSpec(i);
        String colName = colSpec.getName();
        final DataType type = colSpec.getType();
        if (m_settings.getTargetColumn().equals(colName)) {
            if (type.isCompatible(NominalValue.class)) {
                targetColSpec = colSpec;
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not nominal.");
            }
        } else if (inputCols.contains(colName)) {
            if (type.isCompatible(DoubleValue.class) || type.isCompatible(NominalValue.class)) {
                regressorColSpecs.add(colSpec);
            } else if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class) || (type.isCollectionType() && type.getCollectionElementType().isCompatible(DoubleValue.class))) {
                m_specialColumns.add(colSpec);
                // We change the table spec later to encode it as a string.
                regressorColSpecs.add(new DataColumnSpecCreator(colSpec.getName(), StringCell.TYPE).createSpec());
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not one of the allowed types, " + "which are numeric or nomial.");
            }
        }
    }
    if (null != targetColSpec) {
        // Check if target has at least two categories.
        final Set<DataCell> targetValues = targetColSpec.getDomain().getValues();
        if (targetValues != null && targetValues.size() < 2) {
            throw new InvalidSettingsException("The target column \"" + targetColSpec.getName() + "\" has one value, only. " + "At least two target categories are expected.");
        }
        String[] learnerCols = new String[regressorColSpecs.size() + 1];
        for (int i = 0; i < regressorColSpecs.size(); i++) {
            learnerCols[i] = regressorColSpecs.get(i).getName();
        }
        learnerCols[learnerCols.length - 1] = targetColSpec.getName();
        final DataColumnSpec[] updatedSpecs = new DataColumnSpec[inSpec.getNumColumns()];
        for (int i = updatedSpecs.length; i-- > 0; ) {
            final DataColumnSpec columnSpec = inSpec.getColumnSpec(i);
            final DataType type = columnSpec.getType();
            if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class)) {
                final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(columnSpec.getName(), StringCell.TYPE);
                colSpecCreator.setProperties(new DataColumnProperties(Collections.singletonMap("realType", type.isCompatible(BitVectorValue.class) ? "BitVector" : "ByteVector")));
                updatedSpecs[i] = colSpecCreator.createSpec();
            } else {
                updatedSpecs[i] = columnSpec;
            }
        }
        DataTableSpec updated = new DataTableSpec(updatedSpecs);
        PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(updated);
        creator.setTargetCols(Arrays.asList(targetColSpec));
        creator.setLearningCols(regressorColSpecs);
        // creator.addPreprocColNames(m_specialColumns.stream().flatMap(spec -> ));
        m_pmmlOutSpec = creator.createSpec();
    } else {
        throw new InvalidSettingsException("The target is " + "not in the input.");
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) DataColumnProperties(org.knime.core.data.DataColumnProperties) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Example 8 with DataColumnProperties

use of org.knime.core.data.DataColumnProperties in project knime-core by knime.

the class SourceColumnsAsProperties method toProperties.

/**
 * Creates the {@link DataColumnProperties} with the
 * {@link #PROPKEY_SOURCE_COLUMN_INDICES} keys to the column names and column values respectively.
 *
 * @param selection The model for the selected columns.
 * @param input The input {@link DataTableSpec}.
 * @return The properties with the column names and column indices encoded as a string value for the specified keys.
 */
public static DataColumnProperties toProperties(final SettingsModelColumnFilter2 selection, final DataTableSpec input) {
    Map<String, String> map = new HashMap<String, String>();
    FilterResult filterResult = selection.applyTo(input);
    map.put(PROPKEY_SOURCE_COLUMN_INDICES, indicesAsString(filterResult, input));
    return new DataColumnProperties(map);
}
Also used : HashMap(java.util.HashMap) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult) DataColumnProperties(org.knime.core.data.DataColumnProperties)

Example 9 with DataColumnProperties

use of org.knime.core.data.DataColumnProperties in project knime-core by knime.

the class PMMLGeneralRegressionTranslator method exportTo.

/**
 * {@inheritDoc}
 */
@Override
public SchemaType exportTo(final PMMLDocument pmmlDoc, final PMMLPortObjectSpec spec) {
    m_nameMapper = new DerivedFieldMapper(pmmlDoc);
    GeneralRegressionModel reg = pmmlDoc.getPMML().addNewGeneralRegressionModel();
    final JsonObjectBuilder jsonBuilder = Json.createObjectBuilder();
    if (!m_content.getVectorLengths().isEmpty()) {
        LocalTransformations localTransformations = reg.addNewLocalTransformations();
        for (final Entry<? extends String, ? extends Integer> entry : m_content.getVectorLengths().entrySet()) {
            DataColumnSpec columnSpec = spec.getDataTableSpec().getColumnSpec(entry.getKey());
            if (columnSpec != null) {
                final DataType type = columnSpec.getType();
                final DataColumnProperties props = columnSpec.getProperties();
                final boolean bitVector = type.isCompatible(BitVectorValue.class) || (type.isCompatible(StringValue.class) && props.containsProperty("realType") && "BitVector".equals(props.getProperty("realType")));
                final boolean byteVector = type.isCompatible(ByteVectorValue.class) || (type.isCompatible(StringValue.class) && props.containsProperty("realType") && "ByteVector".equals(props.getProperty("realType")));
                final String lengthAsString;
                final int width;
                if (byteVector) {
                    lengthAsString = "3";
                    width = 4;
                } else if (bitVector) {
                    lengthAsString = "1";
                    width = 1;
                } else {
                    throw new UnsupportedOperationException("Not supported type: " + type + " for column: " + columnSpec);
                }
                for (int i = 0; i < entry.getValue().intValue(); ++i) {
                    final DerivedField derivedField = localTransformations.addNewDerivedField();
                    derivedField.setOptype(OPTYPE.CONTINUOUS);
                    derivedField.setDataType(DATATYPE.INTEGER);
                    derivedField.setName(entry.getKey() + "[" + i + "]");
                    Apply apply = derivedField.addNewApply();
                    apply.setFunction("substring");
                    apply.addNewFieldRef().setField(entry.getKey());
                    Constant from = apply.addNewConstant();
                    from.setDataType(DATATYPE.INTEGER);
                    from.setStringValue(bitVector ? Long.toString(entry.getValue().longValue() - i) : Long.toString(i * width + 1L));
                    Constant length = apply.addNewConstant();
                    length.setDataType(DATATYPE.INTEGER);
                    length.setStringValue(lengthAsString);
                }
            }
            jsonBuilder.add(entry.getKey(), entry.getValue().intValue());
        }
    }
    // PMMLPortObjectSpecCreator newSpecCreator = new PMMLPortObjectSpecCreator(spec);
    // newSpecCreator.addPreprocColNames(m_content.getVectorLengths().entrySet().stream()
    // .flatMap(
    // e -> IntStream.iterate(0, o -> o + 1).limit(e.getValue()).mapToObj(i -> e.getKey() + "[" + i + "]"))
    // .collect(Collectors.toList()));
    PMMLMiningSchemaTranslator.writeMiningSchema(spec, reg);
    // if (!m_content.getVectorLengths().isEmpty()) {
    // Extension miningExtension = reg.getMiningSchema().addNewExtension();
    // miningExtension.setExtender(EXTENDER);
    // miningExtension.setName(VECTOR_COLUMNS_WITH_LENGTH);
    // miningExtension.setValue(jsonBuilder.build().toString());
    // }
    reg.setModelType(getPMMLRegModelType(m_content.getModelType()));
    reg.setFunctionName(getPMMLMiningFunction(m_content.getFunctionName()));
    String algorithmName = m_content.getAlgorithmName();
    if (algorithmName != null && !algorithmName.isEmpty()) {
        reg.setAlgorithmName(algorithmName);
    }
    String modelName = m_content.getModelName();
    if (modelName != null && !modelName.isEmpty()) {
        reg.setModelName(modelName);
    }
    String targetReferenceCategory = m_content.getTargetReferenceCategory();
    if (targetReferenceCategory != null && !targetReferenceCategory.isEmpty()) {
        reg.setTargetReferenceCategory(targetReferenceCategory);
    }
    if (m_content.getOffsetValue() != null) {
        reg.setOffsetValue(m_content.getOffsetValue());
    }
    // add parameter list
    ParameterList paramList = reg.addNewParameterList();
    for (PMMLParameter p : m_content.getParameterList()) {
        Parameter param = paramList.addNewParameter();
        param.setName(p.getName());
        String label = p.getLabel();
        if (label != null) {
            param.setLabel(m_nameMapper.getDerivedFieldName(label));
        }
    }
    // add factor list
    FactorList factorList = reg.addNewFactorList();
    for (PMMLPredictor p : m_content.getFactorList()) {
        Predictor predictor = factorList.addNewPredictor();
        predictor.setName(m_nameMapper.getDerivedFieldName(p.getName()));
    }
    // add covariate list
    CovariateList covariateList = reg.addNewCovariateList();
    for (PMMLPredictor p : m_content.getCovariateList()) {
        Predictor predictor = covariateList.addNewPredictor();
        predictor.setName(m_nameMapper.getDerivedFieldName(p.getName()));
    }
    // add PPMatrix
    PPMatrix ppMatrix = reg.addNewPPMatrix();
    for (PMMLPPCell p : m_content.getPPMatrix()) {
        PPCell cell = ppMatrix.addNewPPCell();
        cell.setValue(p.getValue());
        cell.setPredictorName(m_nameMapper.getDerivedFieldName(p.getPredictorName()));
        cell.setParameterName(p.getParameterName());
        String targetCategory = p.getTargetCategory();
        if (targetCategory != null && !targetCategory.isEmpty()) {
            cell.setTargetCategory(targetCategory);
        }
    }
    // add CovMatrix
    if (m_content.getPCovMatrix().length > 0) {
        PCovMatrix pCovMatrix = reg.addNewPCovMatrix();
        for (PMMLPCovCell p : m_content.getPCovMatrix()) {
            PCovCell covCell = pCovMatrix.addNewPCovCell();
            covCell.setPRow(p.getPRow());
            covCell.setPCol(p.getPCol());
            String tCol = p.getTCol();
            String tRow = p.getTRow();
            if (tRow != null || tCol != null) {
                covCell.setTRow(tRow);
                covCell.setTCol(tCol);
            }
            covCell.setValue(p.getValue());
            String targetCategory = p.getTargetCategory();
            if (targetCategory != null && !targetCategory.isEmpty()) {
                covCell.setTargetCategory(targetCategory);
            }
        }
    }
    // add ParamMatrix
    ParamMatrix paramMatrix = reg.addNewParamMatrix();
    for (PMMLPCell p : m_content.getParamMatrix()) {
        PCell pCell = paramMatrix.addNewPCell();
        String targetCategory = p.getTargetCategory();
        if (targetCategory != null) {
            pCell.setTargetCategory(targetCategory);
        }
        pCell.setParameterName(p.getParameterName());
        pCell.setBeta(p.getBeta());
        Integer df = p.getDf();
        if (df != null) {
            pCell.setDf(BigInteger.valueOf(df));
        }
    }
    return GeneralRegressionModel.type;
}
Also used : Predictor(org.dmg.pmml.PredictorDocument.Predictor) Apply(org.dmg.pmml.ApplyDocument.Apply) Constant(org.dmg.pmml.ConstantDocument.Constant) PPCell(org.dmg.pmml.PPCellDocument.PPCell) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) DataColumnSpec(org.knime.core.data.DataColumnSpec) FactorList(org.dmg.pmml.FactorListDocument.FactorList) PPCell(org.dmg.pmml.PPCellDocument.PPCell) PCell(org.dmg.pmml.PCellDocument.PCell) DataType(org.knime.core.data.DataType) JsonObjectBuilder(javax.json.JsonObjectBuilder) DataColumnProperties(org.knime.core.data.DataColumnProperties) ParamMatrix(org.dmg.pmml.ParamMatrixDocument.ParamMatrix) PPMatrix(org.dmg.pmml.PPMatrixDocument.PPMatrix) CovariateList(org.dmg.pmml.CovariateListDocument.CovariateList) PCovMatrix(org.dmg.pmml.PCovMatrixDocument.PCovMatrix) BigInteger(java.math.BigInteger) LocalTransformations(org.dmg.pmml.LocalTransformationsDocument.LocalTransformations) PCovCell(org.dmg.pmml.PCovCellDocument.PCovCell) GeneralRegressionModel(org.dmg.pmml.GeneralRegressionModelDocument.GeneralRegressionModel) ParameterList(org.dmg.pmml.ParameterListDocument.ParameterList) Parameter(org.dmg.pmml.ParameterDocument.Parameter) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Example 10 with DataColumnProperties

use of org.knime.core.data.DataColumnProperties in project knime-core by knime.

the class ClusterMembershipFactory method getColumnSpecs.

/**
 * {@inheritDoc}
 */
public DataColumnSpec[] getColumnSpecs() {
    int nrclusters = m_nrClusters;
    DataColumnSpec[] newSpec = new DataColumnSpec[nrclusters + 1];
    int cluster = 0;
    DataColumnSpecCreator colspecCreator = null;
    for (int j = 0; j < nrclusters; j++) {
        if (m_noise && j == (newSpec.length - 2)) {
            colspecCreator = new DataColumnSpecCreator(FuzzyClusterNodeModel.NOISESPEC_KEY, DoubleCell.TYPE);
            colspecCreator.setProperties(new DataColumnProperties(Collections.singletonMap(DataValueRenderer.PROPERTY_PREFERRED_RENDERER, DoubleBarRenderer.DESCRIPTION)));
            colspecCreator.setDomain(new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain());
            newSpec[j] = colspecCreator.createSpec();
            break;
        }
        colspecCreator = new DataColumnSpecCreator(FuzzyClusterNodeModel.CLUSTER_KEY + cluster, DoubleCell.TYPE);
        colspecCreator.setProperties(new DataColumnProperties(Collections.singletonMap(DataValueRenderer.PROPERTY_PREFERRED_RENDERER, DoubleBarRenderer.DESCRIPTION)));
        colspecCreator.setDomain(new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain());
        newSpec[j] = colspecCreator.createSpec();
        cluster++;
    }
    newSpec[newSpec.length - 1] = new DataColumnSpecCreator("Winner Cluster", StringCell.TYPE).createSpec();
    return newSpec;
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataColumnProperties(org.knime.core.data.DataColumnProperties)

Aggregations

DataColumnProperties (org.knime.core.data.DataColumnProperties)11 DataColumnSpec (org.knime.core.data.DataColumnSpec)9 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)9 DataType (org.knime.core.data.DataType)4 DataTableSpec (org.knime.core.data.DataTableSpec)3 BitVectorValue (org.knime.core.data.vector.bitvector.BitVectorValue)3 ByteVectorValue (org.knime.core.data.vector.bytevector.ByteVectorValue)3 FilterResult (org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)3 ArrayList (java.util.ArrayList)2 DataCell (org.knime.core.data.DataCell)2 DoubleValue (org.knime.core.data.DoubleValue)2 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)2 PMMLPortObjectSpecCreator (org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)2 BigInteger (java.math.BigInteger)1 HashMap (java.util.HashMap)1 Hashtable (java.util.Hashtable)1 LinkedHashMap (java.util.LinkedHashMap)1 JsonObjectBuilder (javax.json.JsonObjectBuilder)1 Apply (org.dmg.pmml.ApplyDocument.Apply)1 Constant (org.dmg.pmml.ConstantDocument.Constant)1