Search in sources :

Example 1 with RowIterator

use of in project knime-core by knime.

the class StatisticsTable method calculateAllMoments.

 * Calculates <b>all the statistical moments in one pass </b>. After the
 * call of this operation, the statistical moments can be obtained very fast
 * from all the other methods.
 * @param rowCount Row count of table for progress, may be NaN if unknown.
 * @param exec object to check with if user canceled the operation
 * @throws CanceledExecutionException if user canceled
 * @throws IllegalArgumentException if rowCount argument < 0
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
    if (rowCount < 0.0) {
        throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
    DataTableSpec origSpec = m_table.getDataTableSpec();
    int numOfCols = origSpec.getNumColumns();
    // the number of non-missing cells in each column
    int[] validCount = new int[numOfCols];
    double[] sumsquare = new double[numOfCols];
    final DataValueComparator[] comp = new DataValueComparator[numOfCols];
    for (int i = 0; i < numOfCols; i++) {
        sumsquare[i] = 0.0;
        validCount[i] = 0;
        comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
        assert comp[i] != null;
    int nrRows = 0;
    for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
        DataRow row =;
        if (exec != null) {
            double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
            exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
            // throws exception if user canceled
        for (int c = 0; c < numOfCols; c++) {
            final DataCell cell = row.getCell(c);
            if (!(cell.isMissing())) {
                // keep the min and max for each column
                if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
                    m_minValues[c] = cell;
                if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
                    m_maxValues[c] = cell;
                // for double columns we calc the sum (for the mean calc)
                DataType type = origSpec.getColumnSpec(c).getType();
                if (type.isCompatible(DoubleValue.class)) {
                    double d = ((DoubleValue) cell).getDoubleValue();
                    if (Double.isNaN(m_sum[c])) {
                        m_sum[c] = d;
                    } else {
                        m_sum[c] += d;
                    sumsquare[c] += d * d;
            } else {
    m_nrRows = nrRows;
    for (int j = 0; j < numOfCols; j++) {
        // missing values
        if (validCount[j] == 0 || m_minValues[j] == null) {
            DataCell mc = DataType.getMissingCell();
            m_minValues[j] = mc;
            m_maxValues[j] = mc;
            m_meanValues[j] = Double.NaN;
            m_varianceValues[j] = Double.NaN;
        } else {
            m_meanValues[j] = m_sum[j] / validCount[j];
            if (validCount[j] > 1) {
                m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
            } else {
                m_varianceValues[j] = 0.0;
            // round-off errors resulting in negative variance values
            if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
                m_varianceValues[j] = 0.0;
            assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
    // compute resulting table spec
    int nrCols = m_table.getDataTableSpec().getNumColumns();
    DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
    for (int c = 0; c < nrCols; c++) {
        DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
        // we create domains with our bounds.
        Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
        DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
        DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
        cSpec[c] = creator.createSpec();
    m_tSpec = new DataTableSpec(cSpec);
Also used : DataTableSpec( DataColumnSpecCreator( DataColumnDomainCreator( DataValueComparator( DataRow( DataColumnSpec( DataColumnDomain( DoubleValue( RowIterator( DataCell( DataType(

Example 2 with RowIterator

use of in project knime-core by knime.

the class AutoBinner method createEdgesFromQuantiles.

private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
    double[] edges = new double[sampleQuantiles.length];
    long n = data.size();
    long c = 0;
    int cc = 0;
    RowIterator iter = data.iterator();
    DataRow rowQ = null;
    DataRow rowQ1 = null;
    if (iter.hasNext()) {
        rowQ1 =;
        rowQ = rowQ1;
    for (double p : sampleQuantiles) {
        double h = (n - 1) * p + 1;
        int q = (int) Math.floor(h);
        while ((1.0 == p || c < q) && iter.hasNext()) {
            rowQ = rowQ1;
            rowQ1 =;
            exec.setProgress(c / (double) n);
        rowQ = 1.0 != p ? rowQ : rowQ1;
        final DataCell xqCell = rowQ.getCell(0);
        final DataCell xq1Cell = rowQ1.getCell(0);
        // data first?)
        if (xqCell.isMissing() || xq1Cell.isMissing()) {
            throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
        // for quantile calculation see also
        // Quantile#Estimating_the_quantiles_of_a_population.
        // this implements R-7
        double xq = ((DoubleValue) xqCell).getDoubleValue();
        double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
        double quantile = xq + (h - q) * (xq1 - xq);
        edges[cc] = quantile;
    return edges;
Also used : DoubleValue( RowIterator( DataCell( DataRow(

Example 3 with RowIterator

use of in project knime-core by knime.

the class AutoBinner method createEdgesFromQuantiles.

private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
    double[] edges = new double[sampleQuantiles.length];
    long n = data.size();
    long c = 0;
    int cc = 0;
    RowIterator iter = data.iterator();
    DataRow rowQ = null;
    DataRow rowQ1 = null;
    if (iter.hasNext()) {
        rowQ1 =;
        rowQ = rowQ1;
    for (double p : sampleQuantiles) {
        double h = (n - 1) * p + 1;
        int q = (int) Math.floor(h);
        while ((1.0 == p || c < q) && iter.hasNext()) {
            rowQ = rowQ1;
            rowQ1 =;
            exec.setProgress(c / (double) n);
        rowQ = 1.0 != p ? rowQ : rowQ1;
        final DataCell xqCell = rowQ.getCell(0);
        final DataCell xq1Cell = rowQ1.getCell(0);
        // data first?)
        if (xqCell.isMissing() || xq1Cell.isMissing()) {
            throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
        // for quantile calculation see also
        // Quantile#Estimating_the_quantiles_of_a_population.
        // this implements R-7
        double xq = ((DoubleValue) xqCell).getDoubleValue();
        double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
        double quantile = xq + (h - q) * (xq1 - xq);
        edges[cc] = quantile;
    return edges;
Also used : DoubleValue( RowIterator( DataCell( DataRow(

Example 4 with RowIterator

use of in project knime-core by knime.

the class JoinerTest method compareTables.

private void compareTables(final BufferedDataTable reference, final BufferedDataTable test) {
    // Check if it has the same results as defaultResult
    assertThat("Unequal number of rows in result table", test.getRowCount(), is(reference.getRowCount()));
    RowIterator referenceIter = reference.iterator();
    RowIterator testIter = test.iterator();
    while (referenceIter.hasNext()) {
        DataRow refRow =;
        DataRow testRow =;
        assertThat("Unexpected row key", testRow.getKey(), is(refRow.getKey()));
        Iterator<DataCell> refCell = refRow.iterator();
        Iterator<DataCell> testCell = testRow.iterator();
        while (refCell.hasNext()) {
            assertThat("Unexpected cell in row " + refRow.getKey(),, is(;
Also used : RowIterator( DataCell( DataRow(

Example 5 with RowIterator

use of in project knime-core by knime.

the class FilterColumnTableTest method tableTest.

     * Invoked on each testXXX() method to test all rows and cells on equality
     * by iterating through the entire table, that is, the filter as well as the
     * original data table. @param The filter table to test equality on.
private void tableTest(final FilterColumnTable f) {
    final int[] columns = f.getColumnIndices();
    RowIterator fIt = f.iterator();
    RowIterator tIt = m_table.iterator();
    for (; fIt.hasNext() && tIt.hasNext(); ) {
        DataRow rf =;
        DataRow rt =;
        // check also if the same rows are compared
        for (int i = 0; i < columns.length; i++) {
            // check cell from original with the mapped one
Also used : RowIterator( DataRow(


RowIterator ( DataRow ( DataCell ( DataTableSpec ( RowKey ( DoubleValue ( BufferedDataTable (org.knime.core.node.BufferedDataTable)14 DataColumnSpec ( ArrayList (java.util.ArrayList)9 DefaultRow ( PreparedStatement (java.sql.PreparedStatement)7 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)7 DataType ( HashSet (java.util.HashSet)5 Random (java.util.Random)5 TimeZone (java.util.TimeZone)5 DataTable ( DoubleCell ( StringCell ( CanceledExecutionException (org.knime.core.node.CanceledExecutionException)5