Search in sources :

Example 1 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class TwoSampleTTestNodeDialog method initGroupComboBoxes.

private void initGroupComboBoxes(final JComboBox groupOne) {
    Object selected = groupOne.getSelectedItem();
    groupOne.removeAllItems();
    String col = m_groupingColumn.getSelectedColumn();
    if (col != null && m_spec.containsName(col)) {
        DataColumnSpec colSpec = m_spec.getColumnSpec(col);
        DataColumnDomain domain = colSpec.getDomain();
        if (domain.hasValues()) {
            for (DataCell cell : domain.getValues()) {
                groupOne.addItem(cell.toString());
            }
        } else if (domain.hasBounds()) {
            groupOne.addItem(domain.getLowerBound().toString());
            groupOne.addItem(domain.getUpperBound().toString());
        }
    }
    groupOne.setSelectedItem(selected);
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataCell(org.knime.core.data.DataCell)

Example 2 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class StatisticsTable method calculateAllMoments.

/**
 * Calculates <b>all the statistical moments in one pass </b>. After the
 * call of this operation, the statistical moments can be obtained very fast
 * from all the other methods.
 *
 * @param rowCount Row count of table for progress, may be NaN if unknown.
 * @param exec object to check with if user canceled the operation
 * @throws CanceledExecutionException if user canceled
 * @throws IllegalArgumentException if rowCount argument < 0
 */
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
    if (rowCount < 0.0) {
        throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
    }
    DataTableSpec origSpec = m_table.getDataTableSpec();
    int numOfCols = origSpec.getNumColumns();
    // the number of non-missing cells in each column
    int[] validCount = new int[numOfCols];
    double[] sumsquare = new double[numOfCols];
    final DataValueComparator[] comp = new DataValueComparator[numOfCols];
    for (int i = 0; i < numOfCols; i++) {
        sumsquare[i] = 0.0;
        validCount[i] = 0;
        comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
        assert comp[i] != null;
    }
    int nrRows = 0;
    for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
        DataRow row = rowIt.next();
        if (exec != null) {
            double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
            exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
            // throws exception if user canceled
            exec.checkCanceled();
        }
        for (int c = 0; c < numOfCols; c++) {
            final DataCell cell = row.getCell(c);
            if (!(cell.isMissing())) {
                // keep the min and max for each column
                if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
                    m_minValues[c] = cell;
                }
                if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
                    m_maxValues[c] = cell;
                }
                // for double columns we calc the sum (for the mean calc)
                DataType type = origSpec.getColumnSpec(c).getType();
                if (type.isCompatible(DoubleValue.class)) {
                    double d = ((DoubleValue) cell).getDoubleValue();
                    if (Double.isNaN(m_sum[c])) {
                        m_sum[c] = d;
                    } else {
                        m_sum[c] += d;
                    }
                    sumsquare[c] += d * d;
                    validCount[c]++;
                }
            } else {
                m_missingValueCnt[c]++;
            }
        }
        calculateMomentInSubClass(row);
    }
    m_nrRows = nrRows;
    for (int j = 0; j < numOfCols; j++) {
        // missing values
        if (validCount[j] == 0 || m_minValues[j] == null) {
            DataCell mc = DataType.getMissingCell();
            m_minValues[j] = mc;
            m_maxValues[j] = mc;
            m_meanValues[j] = Double.NaN;
            m_varianceValues[j] = Double.NaN;
        } else {
            m_meanValues[j] = m_sum[j] / validCount[j];
            if (validCount[j] > 1) {
                m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
            } else {
                m_varianceValues[j] = 0.0;
            }
            // round-off errors resulting in negative variance values
            if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
                m_varianceValues[j] = 0.0;
            }
            assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
        }
    }
    // compute resulting table spec
    int nrCols = m_table.getDataTableSpec().getNumColumns();
    DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
    for (int c = 0; c < nrCols; c++) {
        DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
        // we create domains with our bounds.
        Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
        DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
        DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
        creator.setDomain(newDomain);
        cSpec[c] = creator.createSpec();
    }
    m_tSpec = new DataTableSpec(cSpec);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType)

Example 3 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class NaiveBayesLearnerNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    // check the internal variables if they are valid
    final String classColumn = m_classifyColumnName.getStringValue();
    if (classColumn == null || classColumn.length() < 1) {
        throw new InvalidSettingsException("Please define the classification column");
    }
    final PortObjectSpec inSpec = inSpecs[TRAINING_DATA_PORT];
    if (!(inSpec instanceof DataTableSpec)) {
        throw new IllegalArgumentException("Invalid input data");
    }
    final DataTableSpec tableSpec = (DataTableSpec) inSpec;
    if (tableSpec.findColumnIndex(classColumn) < 0) {
        throw new InvalidSettingsException("Please define the classification column");
    }
    if (tableSpec.getNumColumns() < 2) {
        throw new InvalidSettingsException("Input table should contain at least 2 columns");
    }
    final int maxNoOfNominalVals = m_maxNoOfNominalVals.getIntValue();
    // check if the table contains at least one nominal column
    // and check each nominal column with a valid domain
    // if it contains more values than allowed
    boolean containsNominalCol = false;
    final List<String> toBigNominalColumns = new ArrayList<>();
    for (int i = 0, length = tableSpec.getNumColumns(); i < length; i++) {
        final DataColumnSpec colSpec = tableSpec.getColumnSpec(i);
        if (colSpec.getType().isCompatible(NominalValue.class)) {
            containsNominalCol = true;
            final DataColumnDomain domain = colSpec.getDomain();
            if (domain != null && domain.getValues() != null) {
                if (domain.getValues().size() > maxNoOfNominalVals) {
                    // unique values
                    if (colSpec.getName().equals(classColumn)) {
                        // contains too many unique values
                        throw new InvalidSettingsException("Class column domain contains too many unique values" + " (" + domain.getValues().size() + ")");
                    }
                    toBigNominalColumns.add(colSpec.getName() + " (" + domain.getValues().size() + ")");
                }
            }
        }
    }
    if (!containsNominalCol) {
        throw new InvalidSettingsException("No possible class attribute found in input table");
    }
    if (toBigNominalColumns.size() == 1) {
        setWarningMessage("Column " + toBigNominalColumns.get(0) + " will possibly be skipped.");
    } else if (toBigNominalColumns.size() > 1) {
        final StringBuilder buf = new StringBuilder();
        buf.append("The following columns will possibly be skipped: ");
        for (int i = 0, length = toBigNominalColumns.size(); i < length; i++) {
            if (i != 0) {
                buf.append(", ");
            }
            if (i > 3) {
                buf.append("...");
                break;
            }
            buf.append(toBigNominalColumns.get(i));
        }
        setWarningMessage(buf.toString());
    }
    if (tableSpec.getNumColumns() - toBigNominalColumns.size() < 1) {
        throw new InvalidSettingsException("Not enough valid columns");
    }
    return new PortObjectSpec[] { new NaiveBayesPortObjectSpec(tableSpec, tableSpec.getColumnSpec(classColumn)) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ArrayList(java.util.ArrayList) NaiveBayesPortObjectSpec(org.knime.base.node.mine.bayes.naivebayes.port.NaiveBayesPortObjectSpec) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) NaiveBayesPortObjectSpec(org.knime.base.node.mine.bayes.naivebayes.port.NaiveBayesPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec)

Example 4 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class DecTreePredictorNodeModel method createOutTableSpec.

private DataTableSpec createOutTableSpec(final PortObjectSpec[] inSpecs) {
    LinkedList<DataCell> predValues = null;
    if (m_showDistribution.getBooleanValue()) {
        predValues = getPredictionValues((PMMLPortObjectSpec) inSpecs[INMODELPORT]);
        if (predValues == null) {
            // no out spec can be determined
            return null;
        }
    }
    int numCols = (predValues == null ? 0 : predValues.size()) + 1;
    DataTableSpec inSpec = (DataTableSpec) inSpecs[INDATAPORT];
    UniqueNameGenerator nameGenerator = new UniqueNameGenerator(inSpec);
    DataColumnSpec[] newCols = new DataColumnSpec[numCols];
    /* Set bar renderer and domain [0,1] as default for the double cells
         * containing the distribution */
    // DataColumnProperties propsRendering = new DataColumnProperties(
    // Collections.singletonMap(
    // DataValueRenderer.PROPERTY_PREFERRED_RENDERER,
    // DoubleBarRenderer.DESCRIPTION));
    DataColumnDomain domain = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain();
    // add all distribution columns
    for (int i = 0; i < numCols - 1; i++) {
        DataColumnSpecCreator colSpecCreator = nameGenerator.newCreator(predValues.get(i).toString(), DoubleCell.TYPE);
        // colSpecCreator.setProperties(propsRendering);
        colSpecCreator.setDomain(domain);
        newCols[i] = colSpecCreator.createSpec();
    }
    // add the prediction column
    newCols[numCols - 1] = nameGenerator.newColumn("Prediction (DecTree)", StringCell.TYPE);
    DataTableSpec newColSpec = new DataTableSpec(newCols);
    return new DataTableSpec(inSpec, newColSpec);
}
Also used : PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator)

Example 5 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class CollectionSplitNodeModel method refineTypes.

/**
 * Retype the argument table to use the types as determined by the
 * cell factory.
 */
private BufferedDataTable refineTypes(final BufferedDataTable table, final SplitCellFactory fac, final ExecutionContext exec) {
    HashMap<String, Integer> colMap = new HashMap<String, Integer>();
    DataTableSpec spec = table.getDataTableSpec();
    DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
    for (int i = 0; i < spec.getNumColumns(); i++) {
        colMap.put(spec.getColumnSpec(i).getName(), i);
        newColSpecs[i] = spec.getColumnSpec(i);
    }
    DataColumnSpec[] oldReplacedSpecs = fac.getColumnSpecs();
    DataType[] mostSpecificTypes = fac.getCommonTypes();
    DataColumnDomain[] domains = fac.getDomains();
    for (int i = 0; i < oldReplacedSpecs.length; i++) {
        DataColumnSpec s = oldReplacedSpecs[i];
        Integer index = colMap.get(s.getName());
        DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
        creator.setType(mostSpecificTypes[i]);
        creator.setDomain(domains[i]);
        newColSpecs[index] = creator.createSpec();
    }
    DataTableSpec newSpec = new DataTableSpec(spec.getName(), newColSpecs);
    return exec.createSpecReplacerTable(table, newSpec);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) HashMap(java.util.HashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataType(org.knime.core.data.DataType)

Aggregations

DataColumnDomain (org.knime.core.data.DataColumnDomain)46 DataColumnSpec (org.knime.core.data.DataColumnSpec)34 DataCell (org.knime.core.data.DataCell)32 DataTableSpec (org.knime.core.data.DataTableSpec)20 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 DoubleValue (org.knime.core.data.DoubleValue)13 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)12 DataType (org.knime.core.data.DataType)11 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)8 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)7 PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)6 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 LinkedHashSet (java.util.LinkedHashSet)5 DoubleCell (org.knime.core.data.def.DoubleCell)5 HashMap (java.util.HashMap)3 LinkedHashMap (java.util.LinkedHashMap)3 LinkedList (java.util.LinkedList)3 Set (java.util.Set)3 DataRow (org.knime.core.data.DataRow)3