Search in sources :

Example 11 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class StatisticsTable method calculateAllMoments.

/**
 * Calculates <b>all the statistical moments in one pass </b>. After the
 * call of this operation, the statistical moments can be obtained very fast
 * from all the other methods.
 *
 * @param rowCount Row count of table for progress, may be NaN if unknown.
 * @param exec object to check with if user canceled the operation
 * @throws CanceledExecutionException if user canceled
 * @throws IllegalArgumentException if rowCount argument < 0
 */
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
    if (rowCount < 0.0) {
        throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
    }
    DataTableSpec origSpec = m_table.getDataTableSpec();
    int numOfCols = origSpec.getNumColumns();
    // the number of non-missing cells in each column
    int[] validCount = new int[numOfCols];
    double[] sumsquare = new double[numOfCols];
    final DataValueComparator[] comp = new DataValueComparator[numOfCols];
    for (int i = 0; i < numOfCols; i++) {
        sumsquare[i] = 0.0;
        validCount[i] = 0;
        comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
        assert comp[i] != null;
    }
    int nrRows = 0;
    for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
        DataRow row = rowIt.next();
        if (exec != null) {
            double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
            exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
            // throws exception if user canceled
            exec.checkCanceled();
        }
        for (int c = 0; c < numOfCols; c++) {
            final DataCell cell = row.getCell(c);
            if (!(cell.isMissing())) {
                // keep the min and max for each column
                if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
                    m_minValues[c] = cell;
                }
                if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
                    m_maxValues[c] = cell;
                }
                // for double columns we calc the sum (for the mean calc)
                DataType type = origSpec.getColumnSpec(c).getType();
                if (type.isCompatible(DoubleValue.class)) {
                    double d = ((DoubleValue) cell).getDoubleValue();
                    if (Double.isNaN(m_sum[c])) {
                        m_sum[c] = d;
                    } else {
                        m_sum[c] += d;
                    }
                    sumsquare[c] += d * d;
                    validCount[c]++;
                }
            } else {
                m_missingValueCnt[c]++;
            }
        }
        calculateMomentInSubClass(row);
    }
    m_nrRows = nrRows;
    for (int j = 0; j < numOfCols; j++) {
        // missing values
        if (validCount[j] == 0 || m_minValues[j] == null) {
            DataCell mc = DataType.getMissingCell();
            m_minValues[j] = mc;
            m_maxValues[j] = mc;
            m_meanValues[j] = Double.NaN;
            m_varianceValues[j] = Double.NaN;
        } else {
            m_meanValues[j] = m_sum[j] / validCount[j];
            if (validCount[j] > 1) {
                m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
            } else {
                m_varianceValues[j] = 0.0;
            }
            // round-off errors resulting in negative variance values
            if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
                m_varianceValues[j] = 0.0;
            }
            assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
        }
    }
    // compute resulting table spec
    int nrCols = m_table.getDataTableSpec().getNumColumns();
    DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
    for (int c = 0; c < nrCols; c++) {
        DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
        // we create domains with our bounds.
        Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
        DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
        DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
        creator.setDomain(newDomain);
        cSpec[c] = creator.createSpec();
    }
    m_tSpec = new DataTableSpec(cSpec);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType)

Example 12 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class SVMPredictor method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    ArrayList<Double> values = new ArrayList<Double>();
    for (int i = 0; i < m_colindices.length; i++) {
        if (row.getCell(m_colindices[i]).isMissing()) {
            return new DataCell[] { DataType.getMissingCell() };
        }
        DoubleValue dv = (DoubleValue) row.getCell(m_colindices[i]);
        values.add(dv.getDoubleValue());
    }
    String classvalue = doPredict(values);
    return new DataCell[] { new StringCell(classvalue) };
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) StringCell(org.knime.core.data.def.StringCell) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell)

Example 13 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class AutoBinner method calcDomainBoundsIfNeccessary.

/**
 * Determines the per column min/max values of the given data if not already
 * present in the domain.
 * @param data the data
 * @param exec the execution context
 * @param recalcValuesFor The columns
 * @return The data with extended domain information
 * @throws InvalidSettingsException
 * @throws CanceledExecutionException
 */
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
    if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
        return data;
    }
    List<Integer> valuesI = new ArrayList<Integer>();
    for (String colName : recalcValuesFor) {
        DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
        if (!colSpec.getType().isCompatible(DoubleValue.class)) {
            throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
        }
        if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
            valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
        }
    }
    if (valuesI.isEmpty()) {
        return data;
    }
    Map<Integer, Double> min = new HashMap<Integer, Double>();
    Map<Integer, Double> max = new HashMap<Integer, Double>();
    for (int col : valuesI) {
        min.put(col, Double.MAX_VALUE);
        max.put(col, Double.MIN_VALUE);
    }
    int c = 0;
    for (DataRow row : data) {
        c++;
        exec.checkCanceled();
        exec.setProgress(c / (double) data.getRowCount());
        for (int col : valuesI) {
            double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
            if (min.get(col) > val) {
                min.put(col, val);
            }
            if (max.get(col) < val) {
                min.put(col, val);
            }
        }
    }
    List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
    int cc = 0;
    for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
        if (recalcValuesFor.contains(columnSpec.getName())) {
            DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
            DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
            specCreator.setDomain(domainCreator.createDomain());
            DataColumnSpec newColSpec = specCreator.createSpec();
            newColSpecList.add(newColSpec);
        } else {
            newColSpecList.add(columnSpec);
        }
        cc++;
    }
    DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
    BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
    return newDataTable;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 14 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class AutoBinner method createEdgesFromQuantiles.

private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
    double[] edges = new double[sampleQuantiles.length];
    long n = data.size();
    long c = 0;
    int cc = 0;
    RowIterator iter = data.iterator();
    DataRow rowQ = null;
    DataRow rowQ1 = null;
    if (iter.hasNext()) {
        rowQ1 = iter.next();
        rowQ = rowQ1;
    }
    for (double p : sampleQuantiles) {
        double h = (n - 1) * p + 1;
        int q = (int) Math.floor(h);
        while ((1.0 == p || c < q) && iter.hasNext()) {
            rowQ = rowQ1;
            rowQ1 = iter.next();
            c++;
            exec.setProgress(c / (double) n);
            exec.checkCanceled();
        }
        rowQ = 1.0 != p ? rowQ : rowQ1;
        final DataCell xqCell = rowQ.getCell(0);
        final DataCell xq1Cell = rowQ1.getCell(0);
        // data first?)
        if (xqCell.isMissing() || xq1Cell.isMissing()) {
            throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
        }
        // for quantile calculation see also
        // http://en.wikipedia.org/wiki/
        // Quantile#Estimating_the_quantiles_of_a_population.
        // this implements R-7
        double xq = ((DoubleValue) xqCell).getDoubleValue();
        double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
        double quantile = xq + (h - q) * (xq1 - xq);
        edges[cc] = quantile;
        cc++;
    }
    return edges;
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 15 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class AutoBinner method createEdgesFromQuantiles.

@SuppressWarnings("null")
private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
    double[] edges = new double[sampleQuantiles.length];
    long n = data.size();
    long c = 0;
    int cc = 0;
    RowIterator iter = data.iterator();
    DataRow rowQ = null;
    DataRow rowQ1 = null;
    if (iter.hasNext()) {
        rowQ1 = iter.next();
        rowQ = rowQ1;
    }
    for (double p : sampleQuantiles) {
        double h = (n - 1) * p + 1;
        int q = (int) Math.floor(h);
        while ((1.0 == p || c < q) && iter.hasNext()) {
            rowQ = rowQ1;
            rowQ1 = iter.next();
            c++;
            exec.setProgress(c / (double) n);
            exec.checkCanceled();
        }
        rowQ = 1.0 != p ? rowQ : rowQ1;
        final DataCell xqCell = rowQ.getCell(0);
        final DataCell xq1Cell = rowQ1.getCell(0);
        // data first?)
        if (xqCell.isMissing() || xq1Cell.isMissing()) {
            throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
        }
        // for quantile calculation see also
        // http://en.wikipedia.org/wiki/
        // Quantile#Estimating_the_quantiles_of_a_population.
        // this implements R-7
        double xq = ((DoubleValue) xqCell).getDoubleValue();
        double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
        double quantile = xq + (h - q) * (xq1 - xq);
        edges[cc] = quantile;
        cc++;
    }
    return edges;
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Aggregations

DoubleValue (org.knime.core.data.DoubleValue)154 DataCell (org.knime.core.data.DataCell)103 DataRow (org.knime.core.data.DataRow)71 DataColumnSpec (org.knime.core.data.DataColumnSpec)38 DataTableSpec (org.knime.core.data.DataTableSpec)38 DoubleCell (org.knime.core.data.def.DoubleCell)32 ArrayList (java.util.ArrayList)26 BufferedDataTable (org.knime.core.node.BufferedDataTable)26 DataType (org.knime.core.data.DataType)23 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)21 LinkedHashMap (java.util.LinkedHashMap)18 IntValue (org.knime.core.data.IntValue)15 HashMap (java.util.HashMap)14 RowIterator (org.knime.core.data.RowIterator)14 RowKey (org.knime.core.data.RowKey)13 DefaultRow (org.knime.core.data.def.DefaultRow)13 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)12 LongValue (org.knime.core.data.LongValue)10 StringValue (org.knime.core.data.StringValue)10 DateAndTimeValue (org.knime.core.data.date.DateAndTimeValue)10