Search in sources :

Example 16 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class ShapeManagerNodeDialogPane method loadSettingsFrom.

/**
 * Updates this dialog by refreshing all components in the shape settings
 * tab. Inits the column name combo box and sets the values for the default
 * selected one.
 *
 * @param settings the settings to load
 * @param specs the input table specs
 * @throws NotConfigurableException if no column contains domain values
 * @see NodeDialogPane#loadSettingsFrom(NodeSettingsRO, DataTableSpec[])
 */
@Override
protected void loadSettingsFrom(final NodeSettingsRO settings, final DataTableSpec[] specs) throws NotConfigurableException {
    // remove all columns and column value to shape mappings
    m_columns.removeAllItems();
    m_map.clear();
    // read settings and write into the map
    String target = settings.getString(ShapeManagerNodeModel.SELECTED_COLUMN, null);
    // add columns and domain value mapping
    int cols = specs[0].getNumColumns();
    Shape[] shapes = ShapeFactory.getShapes().toArray(new Shape[] {});
    for (int i = 0; i < cols; i++) {
        DataColumnSpec cspec = specs[0].getColumnSpec(i);
        DataColumnDomain domain = cspec.getDomain();
        if (domain.hasValues()) {
            LinkedHashMap<DataCell, Shape> domMap = new LinkedHashMap<DataCell, Shape>();
            int j = 0;
            for (DataCell value : domain.getValues()) {
                if (value != null) {
                    String shape = settings.getString(value.toString(), // no settings -> assign different shapes
                    null);
                    if (shape == null) {
                        // bugfix 1283
                        domMap.put(value, shapes[j++ % shapes.length]);
                    } else {
                        domMap.put(value, ShapeFactory.getShape(shape));
                    }
                }
            }
            m_map.put(cspec.getName(), domMap);
        } else {
            continue;
        }
        m_columns.addItem(cspec);
        if (cspec.getName().equals(target)) {
            m_columns.setSelectedItem(cspec);
        }
    }
    if (m_map.size() == 0) {
        throw new NotConfigurableException("No column in data contains" + " domain values.");
    }
    columnChanged(getSelectedColumn());
    m_columns.addItemListener(this);
}
Also used : NotConfigurableException(org.knime.core.node.NotConfigurableException) Shape(org.knime.core.data.property.ShapeFactory.Shape) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataCell(org.knime.core.data.DataCell) LinkedHashMap(java.util.LinkedHashMap)

Example 17 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class IntervalBinCreator method createBins.

/**
 * @param colSpec the column specification
 * @param numberOfBins the number of bins to create
 * @return the created bins
 */
public List<E> createBins(final DataColumnSpec colSpec, final int numberOfBins) {
    // set the bounds for binning
    final DataColumnDomain domain = colSpec.getDomain();
    final DataCell lowerBoundCell = domain.getLowerBound();
    if (lowerBoundCell == null || lowerBoundCell.isMissing() || !lowerBoundCell.getType().isCompatible(DoubleValue.class)) {
        throw new IllegalArgumentException("The lower bound of the binning column domain " + "should be defined");
    }
    final double lowerBound = ((DoubleValue) lowerBoundCell).getDoubleValue();
    final DataCell upperBoundCell = domain.getUpperBound();
    if (upperBoundCell == null || upperBoundCell.isMissing() || !upperBoundCell.getType().isCompatible(DoubleValue.class)) {
        throw new IllegalArgumentException("The upper bound of the binning column domain " + "should be defined");
    }
    final double upperBound = ((DoubleValue) upperBoundCell).getDoubleValue();
    int noOfBins = numberOfBins;
    // start the binning
    if (noOfBins < 1) {
        noOfBins = AbstractHistogramVizModel.DEFAULT_NO_OF_BINS;
    }
    if ((lowerBound - upperBound) == 0) {
        noOfBins = 1;
    }
    final boolean isInteger = colSpec.getType().isCompatible(LongValue.class);
    double binInterval = BinningUtil.createBinInterval(upperBound, lowerBound, noOfBins, isInteger);
    final double calculatedLowerBound = BinningUtil.createBinStart(lowerBound, binInterval, isInteger);
    if (calculatedLowerBound != lowerBound) {
        binInterval = BinningUtil.createBinInterval(upperBound, calculatedLowerBound, noOfBins, isInteger);
    }
    double leftBoundary = calculatedLowerBound;
    final double lastBoundary = BinningUtil.myRoundedBorders(upperBound, binInterval, AbstractHistogramVizModel.INTERVAL_DIGITS, isInteger);
    // increase bin interval if we have rounding problems
    while (leftBoundary + (binInterval * noOfBins) < lastBoundary) {
        binInterval = binInterval + binInterval * 0.001;
    }
    boolean firstBar = true;
    createList(noOfBins);
    for (int i = 0; i < noOfBins; i++) {
        // I have to use this rounding method to avoid problems with very
        // small intervals. If the interval is very small it could happen
        // that we get the same boundaries for several bars by rounding the
        // borders
        double rightBoundary;
        if (isInteger && binInterval == 1) {
            rightBoundary = leftBoundary;
        } else {
            rightBoundary = BinningUtil.myRoundedBorders(leftBoundary + binInterval, binInterval, AbstractHistogramVizModel.INTERVAL_DIGITS, isInteger);
        }
        final String binCaption = BinningUtil.createBarName(firstBar, leftBoundary, rightBoundary);
        firstBar = false;
        addBin(binCaption, leftBoundary, rightBoundary);
        // boundary
        if (isInteger && binInterval == 1) {
            leftBoundary = rightBoundary + binInterval;
        } else {
            leftBoundary = rightBoundary;
        }
    }
    return getBins();
}
Also used : DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell)

Example 18 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class AppendedRowsTable method generateDataTableSpec.

/**
 * Factory method that determines the final {@link DataTableSpec} given the
 * tables.
 *
 * @param tableSpecs the table specs as in the constructor
 * @return the outcoming {qlink DataTableSpec}
 * @see #AppendedRowsTable(DataTable[])
 */
public static final DataTableSpec generateDataTableSpec(final DataTableSpec... tableSpecs) {
    // memorize the first column spec in the argument array for
    // each column name, we use it later on to initialize the column
    // spec creator.
    LinkedHashMap<String, DataColumnSpec> columnSet = new LinkedHashMap<String, DataColumnSpec>();
    LinkedHashMap<String, DataType> typeSet = new LinkedHashMap<String, DataType>();
    LinkedHashMap<String, DataColumnDomain> domainSet = new LinkedHashMap<String, DataColumnDomain>();
    // create final data table spec
    for (int i = 0; i < tableSpecs.length; i++) {
        DataTableSpec cur = tableSpecs[i];
        for (int c = 0; c < cur.getNumColumns(); c++) {
            DataColumnSpec colSpec = cur.getColumnSpec(c);
            String colName = colSpec.getName();
            // set the spec for this column if not yet done
            if (!columnSet.containsKey(colName)) {
                columnSet.put(colName, colSpec);
            }
            DataType colType = colSpec.getType();
            DataColumnDomain colDomain = colSpec.getDomain();
            // duplicates are welcome - but only if they match the type
            if (typeSet.containsKey(colName)) {
                DataType oldType = typeSet.get(colName);
                DataColumnDomain oldDomain = domainSet.get(colName);
                // the base type they share
                DataType type = DataType.getCommonSuperType(oldType, colType);
                assert type.isASuperTypeOf(oldType);
                assert type.isASuperTypeOf(colType);
                // that shouldn't happen though, eh: shit happens.
                if (!oldType.equals(type)) {
                    LOGGER.info("Confusing data types for column \"" + colName + "\": " + oldType.toString() + " vs. " + colType.toString() + "\n" + "Using common base type " + type.toString());
                    // that must not change the order.
                    typeSet.put(colName, type);
                }
                DataColumnDomain newDomain = merge(oldDomain, colDomain, type.getComparator());
                domainSet.put(colName, newDomain);
            } else {
                // doesn't contain the key
                typeSet.put(colName, colType);
                domainSet.put(colName, colDomain);
            }
        }
    // for all columns in the current table spec
    }
    // for all tables
    DataColumnSpec[] colSpecs = new DataColumnSpec[typeSet.size()];
    int i = 0;
    for (Map.Entry<String, DataType> entry : typeSet.entrySet()) {
        String name = entry.getKey();
        DataType type = entry.getValue();
        // domain is null, if we did not remember it (e.g. "keepDomain" was
        // false)
        DataColumnDomain domain = domainSet.get(name);
        DataColumnSpec initSpec = columnSet.get(name);
        DataColumnSpecCreator specCreator = new DataColumnSpecCreator(initSpec);
        specCreator.setDomain(domain);
        specCreator.setType(type);
        colSpecs[i++] = specCreator.createSpec();
    }
    return new DataTableSpec(colSpecs);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataType(org.knime.core.data.DataType) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 19 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class PredictorHelper method createOutTableSpec.

/**
 * Computes the output table's specifaction based on common node settings.
 *
 * @param dataSpec The input table {@link DataColumnSpec}.
 * @param modelSpec The model {@link PMMLPortObjectSpec}.
 * @param addProbs Add the probability columns?
 * @param predictionCol Custom name of the prediction column.
 * @param shouldOverride Should we use that name?
 * @param suffix Suffix for probability columns.
 * @return The output table {@link DataTableSpec}.
 * @throws InvalidSettingsException Invalid settings for the prediction column name.
 */
public DataTableSpec createOutTableSpec(final PortObjectSpec dataSpec, final PortObjectSpec modelSpec, final boolean addProbs, final String predictionCol, final boolean shouldOverride, final String suffix) throws InvalidSettingsException {
    CheckUtils.checkSettingNotNull(predictionCol, "Prediction column name cannot be null");
    CheckUtils.checkSetting(!predictionCol.trim().isEmpty(), "Prediction column name cannot be empty");
    List<DataCell> predValues = null;
    if (addProbs) {
        predValues = getPredictionValues((PMMLPortObjectSpec) modelSpec);
        if (predValues == null) {
            // no out spec can be determined
            return null;
        }
    }
    int numCols = (predValues == null ? 0 : predValues.size()) + 1;
    DataTableSpec inSpec = (DataTableSpec) dataSpec;
    DataColumnSpec[] newCols = new DataColumnSpec[numCols];
    /* Set bar renderer and domain [0,1] as default for the double cells
         * containing the distribution */
    // DataColumnProperties propsRendering = new DataColumnProperties(
    // Collections.singletonMap(
    // DataValueRenderer.PROPERTY_PREFERRED_RENDERER,
    // DoubleBarRenderer.DESCRIPTION));
    DataColumnDomain domain = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain();
    String trainingColumnName = ((PMMLPortObjectSpec) modelSpec).getTargetFields().iterator().next();
    // add all distribution columns
    for (int i = 0; i < numCols - 1; i++) {
        assert predValues != null;
        DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(probabilityColumnName(trainingColumnName, predValues.get(i).toString(), suffix), DoubleCell.TYPE);
        // colSpecCreator.setProperties(propsRendering);
        colSpecCreator.setDomain(domain);
        newCols[i] = colSpecCreator.createSpec();
    }
    // add the prediction column
    String predictionColumnName = computePredictionColumnName(predictionCol, shouldOverride, trainingColumnName);
    newCols[numCols - 1] = new DataColumnSpecCreator(predictionColumnName, StringCell.TYPE).createSpec();
    DataTableSpec newColSpec = new DataTableSpec(newCols);
    return new DataTableSpec(inSpec, newColSpec);
}
Also used : PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DialogComponentString(org.knime.core.node.defaultnodesettings.DialogComponentString)

Example 20 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class NaiveBayesLearnerNodeModel2 method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    // check the internal variables if they are valid
    final PortObjectSpec inSpec = inSpecs[TRAINING_DATA_PORT];
    if (!(inSpec instanceof DataTableSpec)) {
        throw new IllegalArgumentException("Invalid input data");
    }
    final DataTableSpec tableSpec = (DataTableSpec) inSpec;
    if (m_classifyColumnName.getStringValue() == null) {
        String predictedClassName = null;
        for (DataColumnSpec colSpec : tableSpec) {
            if (colSpec.getType().isCompatible(NominalValue.class)) {
                if (predictedClassName == null) {
                    predictedClassName = colSpec.getName();
                } else {
                    throw new InvalidSettingsException("Please define the classification column");
                }
            }
        }
        m_classifyColumnName.setStringValue(predictedClassName);
        setWarningMessage("Classification column preset to " + predictedClassName);
    }
    final String classColumn = m_classifyColumnName.getStringValue();
    final DataColumnSpec classColSpec = tableSpec.getColumnSpec(classColumn);
    if (classColSpec == null) {
        throw new InvalidSettingsException("Classification column not found in input table");
    }
    if (tableSpec.getNumColumns() < 2) {
        throw new InvalidSettingsException("Input table should contain at least 2 columns");
    }
    final int maxNoOfNominalVals = m_maxNoOfNominalVals.getIntValue();
    // and check each nominal column with a valid domain if it contains more values than allowed
    // this needs to be in sync with the NaiveBayesModel.createModelMap method!!!
    final List<String> ignoredColumns = new LinkedList<>();
    final List<String> toBigNominalColumns = new LinkedList<>();
    final List<String> learnCols = new LinkedList<>();
    for (final DataColumnSpec colSpec : tableSpec) {
        final AttributeModel model = NaiveBayesModel.getCompatibleModel(colSpec, classColumn, maxNoOfNominalVals, m_ignoreMissingVals.getBooleanValue(), m_pmmlCompatible.getBooleanValue());
        if (model == null) {
            // the column type is not supported by Naive Bayes
            ignoredColumns.add(colSpec.getName());
            continue;
        }
        final DataType colType = colSpec.getType();
        if (colType.isCompatible(NominalValue.class)) {
            final DataColumnDomain domain = colSpec.getDomain();
            if (domain != null && domain.getValues() != null) {
                if (domain.getValues().size() > maxNoOfNominalVals) {
                    // unique values
                    if (colSpec.getName().equals(classColumn)) {
                        // contains too many unique values
                        throw new InvalidSettingsException("Class column domain contains too many unique values" + " (count: " + domain.getValues().size() + ")");
                    }
                    toBigNominalColumns.add(colSpec.getName() + " (count: " + domain.getValues().size() + ")");
                }
            }
            learnCols.add(model.getAttributeName());
        }
    }
    warningMessage("The following columns will possibly be skipped due to too many values: ", toBigNominalColumns);
    warningMessage("The following columns are not supported and thus will be ignored: ", ignoredColumns);
    if (learnCols.size() < 1) {
        throw new InvalidSettingsException("Not enough valid columns");
    }
    final PMMLPortObjectSpec modelSpec = m_pmmlInEnabled ? (PMMLPortObjectSpec) inSpecs[MODEL_INPORT] : null;
    final PMMLPortObjectSpec pmmlSpec = createPMMLSpec(tableSpec, modelSpec, learnCols, classColumn);
    return new PortObjectSpec[] { pmmlSpec, NaiveBayesModel.createStatisticsTableSpec(classColSpec.getType(), m_ignoreMissingVals.getBooleanValue()) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) LinkedList(java.util.LinkedList) DataColumnSpec(org.knime.core.data.DataColumnSpec) AttributeModel(org.knime.base.node.mine.bayes.naivebayes.datamodel2.AttributeModel) DataColumnDomain(org.knime.core.data.DataColumnDomain) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) DataType(org.knime.core.data.DataType)

Aggregations

DataColumnDomain (org.knime.core.data.DataColumnDomain)46 DataColumnSpec (org.knime.core.data.DataColumnSpec)34 DataCell (org.knime.core.data.DataCell)32 DataTableSpec (org.knime.core.data.DataTableSpec)20 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 DoubleValue (org.knime.core.data.DoubleValue)13 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)12 DataType (org.knime.core.data.DataType)11 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)8 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)7 PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)6 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 LinkedHashSet (java.util.LinkedHashSet)5 DoubleCell (org.knime.core.data.def.DoubleCell)5 HashMap (java.util.HashMap)3 LinkedHashMap (java.util.LinkedHashMap)3 LinkedList (java.util.LinkedList)3 Set (java.util.Set)3 DataRow (org.knime.core.data.DataRow)3