use of org.knime.core.data.RowIterator in project knime-core by knime.
the class Sampler method countRows.
/*
* Counts rows in table.
*
* If the table is of type
* {@link org.knime.core.node.BufferedDataTable} the row count is
* retreived directly.
*/
private static final long countRows(final DataTable table, final ExecutionMonitor exec) throws CanceledExecutionException {
// if buffered table
if (table instanceof BufferedDataTable) {
return ((BufferedDataTable) table).size();
}
// determine row count
long rowCount = 0;
for (RowIterator it = table.iterator(); it.hasNext(); rowCount++) {
DataRow row = it.next();
if (exec != null) {
exec.setMessage("Counting Rows... " + rowCount + " (\"" + row.getKey() + "\")");
exec.checkCanceled();
}
}
return rowCount;
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class CacheNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
// it writes only the cells that are "visible" in the input table
// think of one of the wrappers, e.g. the column filter that
// hides 90% of the columns. Any iterator will nevertheless instantiate
// also the cells in the hidden columns and thus make the iteration
// slow.
BufferedDataContainer con = exec.createDataContainer(data[0].getDataTableSpec());
final long totalCount = data[0].size();
long row = 1;
try {
for (RowIterator it = data[0].iterator(); it.hasNext(); row++) {
DataRow next = it.next();
String message = "Caching row " + row + "/" + totalCount + " (\"" + next.getKey() + "\")";
exec.setProgress(row / (double) totalCount, message);
exec.checkCanceled();
con.addRowToTable(next);
}
} finally {
con.close();
}
return new BufferedDataTable[] { con.getTable() };
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class CrosstabNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final BufferedDataTable table = inData[0];
final List<String> cols = Arrays.asList(new String[] { m_settings.getRowVarColumn(), m_settings.getColVarColumn() });
final GroupByTable groupTable = createGroupByTable(exec.createSubExecutionContext(0.6), table, cols);
final BufferedDataTable freqTable = groupTable.getBufferedTable();
// the index of the row variable in the group table
final int rowVarI = 0;
// the index of the column variable in the group table
final int colVarI = 1;
// the index of the frequency in the group table
final int freqI = 2;
final CrosstabTotals totals = computeTotals(freqTable, rowVarI, colVarI, freqI);
final CrosstabProperties naming = CrosstabProperties.create(m_settings.getNamingVersion());
final CrosstabStatisticsCalculator stats = new CrosstabStatisticsCalculator(freqTable, rowVarI, colVarI, freqI, totals, naming);
stats.run(exec.createSubExecutionContext(0.1));
final BufferedDataTable propsTable = stats.getTable();
final int cellChiSquareI = propsTable.getDataTableSpec().findColumnIndex(naming.getCellChiSquareName());
// create output table
final BufferedDataContainer cont = exec.createDataContainer(createOutSpec(table.getSpec()));
final RowIterator freqIter = freqTable.iterator();
final RowIterator statsIter = propsTable.iterator();
final Map<String, Integer> props = new LinkedHashMap<String, Integer>();
for (int i = 0; i < m_settings.getProperties().size(); i++) {
final String prop = m_settings.getProperties().get(i);
props.put(prop, i + 2);
}
for (long i = 0; i < freqTable.size(); i++) {
final DataCell[] cells = new DataCell[props.size() + 2];
final DataRow freqRow = freqIter.next();
// add the row variable
final DataCell rowVar = freqRow.getCell(rowVarI);
cells[0] = rowVar;
// add the column variable
final DataCell colVar = freqRow.getCell(colVarI);
cells[1] = colVar;
// the frequency
final DataCell freqCell = freqRow.getCell(freqI);
final double freq = freqCell.isMissing() ? 0.0 : ((DoubleValue) freqCell).getDoubleValue();
addToCells(cells, props, naming.getFrequencyName(), new DoubleCell(freq));
// the cell chi-square
final DataRow statsRow = statsIter.next();
addToCells(cells, props, naming.getCellChiSquareName(), statsRow.getCell(cellChiSquareI));
// the total
final double total = totals.getTotal();
addToCells(cells, props, naming.getTotalCountName(), new DoubleCell(total));
// the rowTotal
final double rowTotal = totals.getRowTotal().get(rowVar);
addToCells(cells, props, naming.getTotalRowCountName(), new DoubleCell(rowTotal));
// the column Total
final double colTotal = totals.getColTotal().get(colVar);
addToCells(cells, props, naming.getTotalColCountName(), new DoubleCell(colTotal));
// the percent = frequency / total
final double percent = 100 * (freq / total);
addToCells(cells, props, naming.getPercentName(), new DoubleCell(percent));
// the row percent
final double rowPercent = 0.0 == freq ? 0.0 : 100.0 * (freq / rowTotal);
addToCells(cells, props, naming.getRowPercentName(), new DoubleCell(rowPercent));
// the col percent
final double colPercent = 0.0 == freq ? 0.0 : 100.0 * (freq / colTotal);
addToCells(cells, props, naming.getColPercentName(), new DoubleCell(colPercent));
// the expected frequency
final double expected = 0.0 == total ? 0.0 : colTotal / total * rowTotal;
addToCells(cells, props, naming.getExpectedFrequencyName(), new DoubleCell(expected));
// the deviation (the difference of the frequency to the
// expected frequency)
final double deviation = freq - expected;
addToCells(cells, props, naming.getDeviationName(), new DoubleCell(deviation));
final DefaultRow row = new DefaultRow(RowKey.createRowKey(i), cells);
cont.addRowToTable(row);
}
cont.close();
m_outTable = cont.getTable();
m_statistics = stats.getStatistics();
m_statOutTable = stats.getStatistics().getTable();
m_totals = totals;
return new BufferedDataTable[] { m_outTable, m_statOutTable };
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class EntropyCalculator method getClusterMap.
private static HashMap<RowKey, Set<RowKey>> getClusterMap(final DataTable table, final int colIndex, final ExecutionMonitor ex) throws CanceledExecutionException {
HashMap<RowKey, Set<RowKey>> result = new LinkedHashMap<RowKey, Set<RowKey>>();
int rowCount = -1;
if (table instanceof BufferedDataTable) {
rowCount = ((BufferedDataTable) table).getRowCount();
}
// row counter
int i = 1;
final String name = table.getDataTableSpec().getName();
for (RowIterator it = table.iterator(); it.hasNext(); i++) {
DataRow row = it.next();
String m = "Scanning row " + i + " of table \"" + name + "\".";
if (rowCount >= 0) {
ex.setProgress(i / (double) rowCount, m);
} else {
ex.setMessage(m);
}
ex.checkCanceled();
RowKey id = row.getKey();
RowKey clusterMember = new RowKey(row.getCell(colIndex).toString());
Set<RowKey> members = result.get(clusterMember);
if (members == null) {
members = new HashSet<RowKey>();
result.put(clusterMember, members);
}
members.add(id);
}
return result;
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class AppendedRowsNodeModel method createDuplicateMap.
private Map<RowKey, RowKey> createDuplicateMap(final DataTable table, final ExecutionContext exec, final String suffix) throws CanceledExecutionException {
Map<RowKey, RowKey> duplicateMap = new HashMap<RowKey, RowKey>();
RowIterator it = table.iterator();
DataRow row;
while (it.hasNext()) {
row = it.next();
RowKey origKey = row.getKey();
RowKey key = origKey;
while (duplicateMap.containsKey(key)) {
exec.checkCanceled();
String newId = key.toString() + suffix;
key = new RowKey(newId);
}
duplicateMap.put(key, origKey);
}
return duplicateMap;
}
Aggregations