Search in sources :

Example 1 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class StatisticsTable method calculateAllMoments.

/**
 * Calculates <b>all the statistical moments in one pass </b>. After the
 * call of this operation, the statistical moments can be obtained very fast
 * from all the other methods.
 *
 * @param rowCount Row count of table for progress, may be NaN if unknown.
 * @param exec object to check with if user canceled the operation
 * @throws CanceledExecutionException if user canceled
 * @throws IllegalArgumentException if rowCount argument < 0
 */
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
    if (rowCount < 0.0) {
        throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
    }
    DataTableSpec origSpec = m_table.getDataTableSpec();
    int numOfCols = origSpec.getNumColumns();
    // the number of non-missing cells in each column
    int[] validCount = new int[numOfCols];
    double[] sumsquare = new double[numOfCols];
    final DataValueComparator[] comp = new DataValueComparator[numOfCols];
    for (int i = 0; i < numOfCols; i++) {
        sumsquare[i] = 0.0;
        validCount[i] = 0;
        comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
        assert comp[i] != null;
    }
    int nrRows = 0;
    for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
        DataRow row = rowIt.next();
        if (exec != null) {
            double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
            exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
            // throws exception if user canceled
            exec.checkCanceled();
        }
        for (int c = 0; c < numOfCols; c++) {
            final DataCell cell = row.getCell(c);
            if (!(cell.isMissing())) {
                // keep the min and max for each column
                if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
                    m_minValues[c] = cell;
                }
                if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
                    m_maxValues[c] = cell;
                }
                // for double columns we calc the sum (for the mean calc)
                DataType type = origSpec.getColumnSpec(c).getType();
                if (type.isCompatible(DoubleValue.class)) {
                    double d = ((DoubleValue) cell).getDoubleValue();
                    if (Double.isNaN(m_sum[c])) {
                        m_sum[c] = d;
                    } else {
                        m_sum[c] += d;
                    }
                    sumsquare[c] += d * d;
                    validCount[c]++;
                }
            } else {
                m_missingValueCnt[c]++;
            }
        }
        calculateMomentInSubClass(row);
    }
    m_nrRows = nrRows;
    for (int j = 0; j < numOfCols; j++) {
        // missing values
        if (validCount[j] == 0 || m_minValues[j] == null) {
            DataCell mc = DataType.getMissingCell();
            m_minValues[j] = mc;
            m_maxValues[j] = mc;
            m_meanValues[j] = Double.NaN;
            m_varianceValues[j] = Double.NaN;
        } else {
            m_meanValues[j] = m_sum[j] / validCount[j];
            if (validCount[j] > 1) {
                m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
            } else {
                m_varianceValues[j] = 0.0;
            }
            // round-off errors resulting in negative variance values
            if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
                m_varianceValues[j] = 0.0;
            }
            assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
        }
    }
    // compute resulting table spec
    int nrCols = m_table.getDataTableSpec().getNumColumns();
    DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
    for (int c = 0; c < nrCols; c++) {
        DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
        // we create domains with our bounds.
        Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
        DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
        DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
        creator.setDomain(newDomain);
        cSpec[c] = creator.createSpec();
    }
    m_tSpec = new DataTableSpec(cSpec);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType)

Example 2 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class AutoBinner method createEdgesFromQuantiles.

private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
    double[] edges = new double[sampleQuantiles.length];
    long n = data.size();
    long c = 0;
    int cc = 0;
    RowIterator iter = data.iterator();
    DataRow rowQ = null;
    DataRow rowQ1 = null;
    if (iter.hasNext()) {
        rowQ1 = iter.next();
        rowQ = rowQ1;
    }
    for (double p : sampleQuantiles) {
        double h = (n - 1) * p + 1;
        int q = (int) Math.floor(h);
        while ((1.0 == p || c < q) && iter.hasNext()) {
            rowQ = rowQ1;
            rowQ1 = iter.next();
            c++;
            exec.setProgress(c / (double) n);
            exec.checkCanceled();
        }
        rowQ = 1.0 != p ? rowQ : rowQ1;
        final DataCell xqCell = rowQ.getCell(0);
        final DataCell xq1Cell = rowQ1.getCell(0);
        // data first?)
        if (xqCell.isMissing() || xq1Cell.isMissing()) {
            throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
        }
        // for quantile calculation see also
        // http://en.wikipedia.org/wiki/
        // Quantile#Estimating_the_quantiles_of_a_population.
        // this implements R-7
        double xq = ((DoubleValue) xqCell).getDoubleValue();
        double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
        double quantile = xq + (h - q) * (xq1 - xq);
        edges[cc] = quantile;
        cc++;
    }
    return edges;
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 3 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class AutoBinner method createEdgesFromQuantiles.

@SuppressWarnings("null")
private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
    double[] edges = new double[sampleQuantiles.length];
    long n = data.size();
    long c = 0;
    int cc = 0;
    RowIterator iter = data.iterator();
    DataRow rowQ = null;
    DataRow rowQ1 = null;
    if (iter.hasNext()) {
        rowQ1 = iter.next();
        rowQ = rowQ1;
    }
    for (double p : sampleQuantiles) {
        double h = (n - 1) * p + 1;
        int q = (int) Math.floor(h);
        while ((1.0 == p || c < q) && iter.hasNext()) {
            rowQ = rowQ1;
            rowQ1 = iter.next();
            c++;
            exec.setProgress(c / (double) n);
            exec.checkCanceled();
        }
        rowQ = 1.0 != p ? rowQ : rowQ1;
        final DataCell xqCell = rowQ.getCell(0);
        final DataCell xq1Cell = rowQ1.getCell(0);
        // data first?)
        if (xqCell.isMissing() || xq1Cell.isMissing()) {
            throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
        }
        // for quantile calculation see also
        // http://en.wikipedia.org/wiki/
        // Quantile#Estimating_the_quantiles_of_a_population.
        // this implements R-7
        double xq = ((DoubleValue) xqCell).getDoubleValue();
        double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
        double quantile = xq + (h - q) * (xq1 - xq);
        edges[cc] = quantile;
        cc++;
    }
    return edges;
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 4 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class JoinerTest method compareTables.

private void compareTables(final BufferedDataTable reference, final BufferedDataTable test) {
    // Check if it has the same results as defaultResult
    assertThat("Unequal number of rows in result table", test.getRowCount(), is(reference.getRowCount()));
    RowIterator referenceIter = reference.iterator();
    RowIterator testIter = test.iterator();
    while (referenceIter.hasNext()) {
        DataRow refRow = referenceIter.next();
        DataRow testRow = testIter.next();
        assertThat("Unexpected row key", testRow.getKey(), is(refRow.getKey()));
        Iterator<DataCell> refCell = refRow.iterator();
        Iterator<DataCell> testCell = testRow.iterator();
        while (refCell.hasNext()) {
            assertThat("Unexpected cell in row " + refRow.getKey(), testCell.next(), is(refCell.next()));
        }
    }
}
Also used : RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 5 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class FilterColumnTableTest method tableTest.

/*
     * Invoked on each testXXX() method to test all rows and cells on equality
     * by iterating through the entire table, that is, the filter as well as the
     * original data table. @param The filter table to test equality on.
     */
private void tableTest(final FilterColumnTable f) {
    final int[] columns = f.getColumnIndices();
    RowIterator fIt = f.iterator();
    RowIterator tIt = m_table.iterator();
    for (; fIt.hasNext() && tIt.hasNext(); ) {
        DataRow rf = fIt.next();
        DataRow rt = tIt.next();
        // check also if the same rows are compared
        assertTrue(rf.getKey().equals(rt.getKey()));
        for (int i = 0; i < columns.length; i++) {
            // check cell from original with the mapped one
            assertTrue(rf.getCell(i).equals(rt.getCell(columns[i])));
        }
    }
}
Also used : RowIterator(org.knime.core.data.RowIterator) DataRow(org.knime.core.data.DataRow)

Aggregations

RowIterator (org.knime.core.data.RowIterator)79 DataRow (org.knime.core.data.DataRow)62 DataCell (org.knime.core.data.DataCell)28 DataTableSpec (org.knime.core.data.DataTableSpec)19 RowKey (org.knime.core.data.RowKey)15 DoubleValue (org.knime.core.data.DoubleValue)14 BufferedDataTable (org.knime.core.node.BufferedDataTable)14 DataColumnSpec (org.knime.core.data.DataColumnSpec)11 ArrayList (java.util.ArrayList)9 DefaultRow (org.knime.core.data.def.DefaultRow)8 PreparedStatement (java.sql.PreparedStatement)7 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)7 DataType (org.knime.core.data.DataType)6 HashSet (java.util.HashSet)5 Random (java.util.Random)5 TimeZone (java.util.TimeZone)5 DataTable (org.knime.core.data.DataTable)5 DoubleCell (org.knime.core.data.def.DoubleCell)5 StringCell (org.knime.core.data.def.StringCell)5 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)5