Search in sources :

Example 41 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class Sampler method countRows.

/*
     * Counts rows in table.
     *
     * If the table is of type
     * {@link org.knime.core.node.BufferedDataTable} the row count is
     * retreived directly.
     */
private static final long countRows(final DataTable table, final ExecutionMonitor exec) throws CanceledExecutionException {
    // if buffered table
    if (table instanceof BufferedDataTable) {
        return ((BufferedDataTable) table).size();
    }
    // determine row count
    long rowCount = 0;
    for (RowIterator it = table.iterator(); it.hasNext(); rowCount++) {
        DataRow row = it.next();
        if (exec != null) {
            exec.setMessage("Counting Rows... " + rowCount + " (\"" + row.getKey() + "\")");
            exec.checkCanceled();
        }
    }
    return rowCount;
}
Also used : RowIterator(org.knime.core.data.RowIterator) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataRow(org.knime.core.data.DataRow)

Example 42 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class CacheNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
    // it writes only the cells that are "visible" in the input table
    // think of one of the wrappers, e.g. the column filter that
    // hides 90% of the columns. Any iterator will nevertheless instantiate
    // also the cells in the hidden columns and thus make the iteration
    // slow.
    BufferedDataContainer con = exec.createDataContainer(data[0].getDataTableSpec());
    final long totalCount = data[0].size();
    long row = 1;
    try {
        for (RowIterator it = data[0].iterator(); it.hasNext(); row++) {
            DataRow next = it.next();
            String message = "Caching row " + row + "/" + totalCount + " (\"" + next.getKey() + "\")";
            exec.setProgress(row / (double) totalCount, message);
            exec.checkCanceled();
            con.addRowToTable(next);
        }
    } finally {
        con.close();
    }
    return new BufferedDataTable[] { con.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowIterator(org.knime.core.data.RowIterator) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataRow(org.knime.core.data.DataRow)

Example 43 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class CrosstabNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final BufferedDataTable table = inData[0];
    final List<String> cols = Arrays.asList(new String[] { m_settings.getRowVarColumn(), m_settings.getColVarColumn() });
    final GroupByTable groupTable = createGroupByTable(exec.createSubExecutionContext(0.6), table, cols);
    final BufferedDataTable freqTable = groupTable.getBufferedTable();
    // the index of the row variable in the group table
    final int rowVarI = 0;
    // the index of the column variable in the group table
    final int colVarI = 1;
    // the index of the frequency in the group table
    final int freqI = 2;
    final CrosstabTotals totals = computeTotals(freqTable, rowVarI, colVarI, freqI);
    final CrosstabProperties naming = CrosstabProperties.create(m_settings.getNamingVersion());
    final CrosstabStatisticsCalculator stats = new CrosstabStatisticsCalculator(freqTable, rowVarI, colVarI, freqI, totals, naming);
    stats.run(exec.createSubExecutionContext(0.1));
    final BufferedDataTable propsTable = stats.getTable();
    final int cellChiSquareI = propsTable.getDataTableSpec().findColumnIndex(naming.getCellChiSquareName());
    // create output table
    final BufferedDataContainer cont = exec.createDataContainer(createOutSpec(table.getSpec()));
    final RowIterator freqIter = freqTable.iterator();
    final RowIterator statsIter = propsTable.iterator();
    final Map<String, Integer> props = new LinkedHashMap<String, Integer>();
    for (int i = 0; i < m_settings.getProperties().size(); i++) {
        final String prop = m_settings.getProperties().get(i);
        props.put(prop, i + 2);
    }
    for (long i = 0; i < freqTable.size(); i++) {
        final DataCell[] cells = new DataCell[props.size() + 2];
        final DataRow freqRow = freqIter.next();
        // add the row variable
        final DataCell rowVar = freqRow.getCell(rowVarI);
        cells[0] = rowVar;
        // add the column variable
        final DataCell colVar = freqRow.getCell(colVarI);
        cells[1] = colVar;
        // the frequency
        final DataCell freqCell = freqRow.getCell(freqI);
        final double freq = freqCell.isMissing() ? 0.0 : ((DoubleValue) freqCell).getDoubleValue();
        addToCells(cells, props, naming.getFrequencyName(), new DoubleCell(freq));
        // the cell chi-square
        final DataRow statsRow = statsIter.next();
        addToCells(cells, props, naming.getCellChiSquareName(), statsRow.getCell(cellChiSquareI));
        // the total
        final double total = totals.getTotal();
        addToCells(cells, props, naming.getTotalCountName(), new DoubleCell(total));
        // the rowTotal
        final double rowTotal = totals.getRowTotal().get(rowVar);
        addToCells(cells, props, naming.getTotalRowCountName(), new DoubleCell(rowTotal));
        // the column Total
        final double colTotal = totals.getColTotal().get(colVar);
        addToCells(cells, props, naming.getTotalColCountName(), new DoubleCell(colTotal));
        // the percent = frequency / total
        final double percent = 100 * (freq / total);
        addToCells(cells, props, naming.getPercentName(), new DoubleCell(percent));
        // the row percent
        final double rowPercent = 0.0 == freq ? 0.0 : 100.0 * (freq / rowTotal);
        addToCells(cells, props, naming.getRowPercentName(), new DoubleCell(rowPercent));
        // the col percent
        final double colPercent = 0.0 == freq ? 0.0 : 100.0 * (freq / colTotal);
        addToCells(cells, props, naming.getColPercentName(), new DoubleCell(colPercent));
        // the expected frequency
        final double expected = 0.0 == total ? 0.0 : colTotal / total * rowTotal;
        addToCells(cells, props, naming.getExpectedFrequencyName(), new DoubleCell(expected));
        // the deviation (the difference of the frequency to the
        // expected frequency)
        final double deviation = freq - expected;
        addToCells(cells, props, naming.getDeviationName(), new DoubleCell(deviation));
        final DefaultRow row = new DefaultRow(RowKey.createRowKey(i), cells);
        cont.addRowToTable(row);
    }
    cont.close();
    m_outTable = cont.getTable();
    m_statistics = stats.getStatistics();
    m_statOutTable = stats.getStatistics().getTable();
    m_totals = totals;
    return new BufferedDataTable[] { m_outTable, m_statOutTable };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) RowIterator(org.knime.core.data.RowIterator) BufferedDataTable(org.knime.core.node.BufferedDataTable) BigGroupByTable(org.knime.base.node.preproc.groupby.BigGroupByTable) GroupByTable(org.knime.base.node.preproc.groupby.GroupByTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 44 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class EntropyCalculator method getClusterMap.

private static HashMap<RowKey, Set<RowKey>> getClusterMap(final DataTable table, final int colIndex, final ExecutionMonitor ex) throws CanceledExecutionException {
    HashMap<RowKey, Set<RowKey>> result = new LinkedHashMap<RowKey, Set<RowKey>>();
    int rowCount = -1;
    if (table instanceof BufferedDataTable) {
        rowCount = ((BufferedDataTable) table).getRowCount();
    }
    // row counter
    int i = 1;
    final String name = table.getDataTableSpec().getName();
    for (RowIterator it = table.iterator(); it.hasNext(); i++) {
        DataRow row = it.next();
        String m = "Scanning row " + i + " of table \"" + name + "\".";
        if (rowCount >= 0) {
            ex.setProgress(i / (double) rowCount, m);
        } else {
            ex.setMessage(m);
        }
        ex.checkCanceled();
        RowKey id = row.getKey();
        RowKey clusterMember = new RowKey(row.getCell(colIndex).toString());
        Set<RowKey> members = result.get(clusterMember);
        if (members == null) {
            members = new HashSet<RowKey>();
            result.put(clusterMember, members);
        }
        members.add(id);
    }
    return result;
}
Also used : HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) RowIterator(org.knime.core.data.RowIterator) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap)

Example 45 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class AppendedRowsNodeModel method createDuplicateMap.

private Map<RowKey, RowKey> createDuplicateMap(final DataTable table, final ExecutionContext exec, final String suffix) throws CanceledExecutionException {
    Map<RowKey, RowKey> duplicateMap = new HashMap<RowKey, RowKey>();
    RowIterator it = table.iterator();
    DataRow row;
    while (it.hasNext()) {
        row = it.next();
        RowKey origKey = row.getKey();
        RowKey key = origKey;
        while (duplicateMap.containsKey(key)) {
            exec.checkCanceled();
            String newId = key.toString() + suffix;
            key = new RowKey(newId);
        }
        duplicateMap.put(key, origKey);
    }
    return duplicateMap;
}
Also used : RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) RowIterator(org.knime.core.data.RowIterator) DataRow(org.knime.core.data.DataRow)

Aggregations

RowIterator (org.knime.core.data.RowIterator)77 DataRow (org.knime.core.data.DataRow)62 DataCell (org.knime.core.data.DataCell)28 DataTableSpec (org.knime.core.data.DataTableSpec)20 RowKey (org.knime.core.data.RowKey)16 DoubleValue (org.knime.core.data.DoubleValue)14 BufferedDataTable (org.knime.core.node.BufferedDataTable)13 DataColumnSpec (org.knime.core.data.DataColumnSpec)11 ArrayList (java.util.ArrayList)9 DefaultRow (org.knime.core.data.def.DefaultRow)8 PreparedStatement (java.sql.PreparedStatement)7 DataType (org.knime.core.data.DataType)6 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)6 HashSet (java.util.HashSet)5 Random (java.util.Random)5 TimeZone (java.util.TimeZone)5 DataTable (org.knime.core.data.DataTable)5 DoubleCell (org.knime.core.data.def.DoubleCell)5 StringCell (org.knime.core.data.def.StringCell)5 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)5