use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class JoinerNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
BufferedDataContainer dc = exec.createDataContainer(JoinedTable.createSpec(inData[0].getDataTableSpec(), inData[1].getDataTableSpec(), m_method, m_suffix));
DataTable leftTable = inData[0];
DataTable rightTable = inData[1];
// in the output
if (JoinedTable.METHOD_FILTER.equals(m_method)) {
DataTableSpec leftTableSpec = leftTable.getDataTableSpec();
DataTableSpec rightTableSpec = rightTable.getDataTableSpec();
LinkedHashSet<String> leftHash = new LinkedHashSet<String>();
for (DataColumnSpec c : leftTableSpec) {
leftHash.add(c.getName());
}
LinkedHashSet<String> rightHash = new LinkedHashSet<String>();
for (DataColumnSpec c : rightTableSpec) {
rightHash.add(c.getName());
}
rightHash.removeAll(leftHash);
String[] survivors = rightHash.toArray(new String[rightHash.size()]);
if (survivors.length < rightTableSpec.getNumColumns()) {
rightTable = new FilterColumnTable(rightTable, survivors);
}
}
final BitSet rightRows = new BitSet(inData[1].getRowCount());
final LinkedHashMap<RowKey, SoftReference<Helper>> map = new LinkedHashMap<RowKey, SoftReference<Helper>>(1024);
m_leftRows = 0;
m_outputRows = 0;
m_leftIt = null;
m_rightIt = null;
m_firstMapHelper = null;
m_exec = exec;
if (m_ignoreMissingRows) {
m_max = Math.min(inData[0].getRowCount(), inData[1].getRowCount());
} else {
m_max = Math.max(inData[0].getRowCount(), inData[1].getRowCount());
}
while (true) {
if (!readLeftChunk(leftTable, map)) {
if (!m_ignoreMissingRows) {
processRemainingRightRows(dc, leftTable, rightTable, rightRows);
}
break;
}
if ((m_rightIt == null) || (!m_rightIt.hasNext()) || (rightRows.nextClearBit(0) <= m_rightIt.getIndex())) {
m_rightIt = new CounterRowIterator(rightTable.iterator());
}
while (m_rightIt.hasNext() && (map.size() > 0)) {
m_exec.checkCanceled();
DataRow rightRow = m_rightIt.next();
SoftReference<Helper> sr = map.get(rightRow.getKey());
if (sr != null) {
Helper h = sr.get();
if (h == null) {
map.remove(rightRow.getKey());
} else {
h.m_rightRow = rightRow;
h.m_rightIndex = m_rightIt.getIndex();
if (h.m_leftIndex == m_leftRows) {
// m_firstMapHelper = h;
assert h.m_predecessor == null || !map.containsKey(h.m_predecessor.m_leftRow.getKey());
h.m_predecessor = null;
DataRow joinedRow = new JoinedRow(h.m_leftRow, h.m_rightRow);
dc.addRowToTable(joinedRow);
map.remove(rightRow.getKey());
rightRows.set(m_rightIt.getIndex());
m_leftRows++;
m_outputRows++;
printProgress(rightRow.getKey());
}
}
}
}
processRemainingLeftRowsInMap(dc, rightTable, map, rightRows);
if (!m_ignoreMissingRows) {
if (rightRows.cardinality() == inData[1].getRowCount()) {
processRemainingLeftRowsInTable(dc, leftTable, rightTable);
}
} else {
m_leftRows += map.size();
map.clear();
if (rightRows.cardinality() == inData[1].getRowCount()) {
break;
}
}
}
m_leftIt = null;
m_rightIt = null;
m_exec = null;
m_firstMapHelper = null;
dc.close();
return new BufferedDataTable[] { dc.getTable() };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class PivotNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final DataTableSpec inspec = inData[0].getDataTableSpec();
final int group = inspec.findColumnIndex(m_group.getStringValue());
final int pivot = inspec.findColumnIndex(m_pivot.getStringValue());
final int aggre = (m_makeAgg.getStringValue().equals(PivotNodeDialogPane.MAKE_AGGREGATION[1]) ? inspec.findColumnIndex(m_agg.getStringValue()) : -1);
PivotAggregationMethod aggMethod;
if (aggre < 0) {
aggMethod = PivotAggregationMethod.COUNT;
} else {
aggMethod = PivotAggregationMethod.METHODS.get(m_aggMethod.getStringValue());
}
// pair contains group and pivot plus the aggregation value
final Map<Pair<String, String>, Double[]> map = new LinkedHashMap<Pair<String, String>, Double[]>();
// list of pivot values
final Set<String> pivotList = new LinkedHashSet<String>();
final DataColumnSpec pivotSpec = inspec.getColumnSpec(pivot);
if (pivotSpec.getDomain().hasValues()) {
for (DataCell domValue : pivotSpec.getDomain().getValues()) {
pivotList.add(domValue.toString());
}
}
// list of group values
final Set<String> groupList = new LinkedHashSet<String>();
final LinkedHashMap<RowKey, Set<RowKey>> mapping = new LinkedHashMap<RowKey, Set<RowKey>>();
final double nrRows = inData[0].getRowCount();
int rowCnt = 0;
ExecutionContext subExec = exec.createSubExecutionContext(0.75);
// final all group, pivot pair and aggregate the values of each group
for (final DataRow row : inData[0]) {
subExec.checkCanceled();
subExec.setProgress(++rowCnt / nrRows, "Aggregating row: \"" + row.getKey().getString() + "\" (" + rowCnt + "\\" + (int) nrRows + ")");
final String groupString = row.getCell(group).toString();
groupList.add(groupString);
final DataCell pivotCell = row.getCell(pivot);
// if missing values should be ignored
if (pivotCell.isMissing()) {
if (m_ignoreMissValues.getBooleanValue()) {
continue;
}
}
final String pivotString = pivotCell.toString();
pivotList.add(pivotString);
final Pair<String, String> pair = new Pair<String, String>(groupString, pivotString);
Double[] aggValue = map.get(pair);
if (aggValue == null) {
aggValue = aggMethod.init();
map.put(pair, aggValue);
}
if (aggre < 0) {
aggMethod.compute(aggValue, null);
} else {
final DataCell value = row.getCell(aggre);
aggMethod.compute(aggValue, value);
}
if (m_hiliting.getBooleanValue()) {
final RowKey groupKey = new RowKey(groupString);
Set<RowKey> set = mapping.get(groupKey);
if (set == null) {
set = new LinkedHashSet<RowKey>();
mapping.put(groupKey, set);
}
set.add(row.getKey());
}
}
final DataTableSpec outspec = initSpec(pivotList);
// will contain the final pivoting table
final BufferedDataContainer buf = exec.createDataContainer(outspec);
final double nrElements = groupList.size();
int elementCnt = 0;
subExec = exec.createSubExecutionContext(0.25);
for (final String groupString : groupList) {
subExec.checkCanceled();
subExec.setProgress(++elementCnt / nrElements, "Computing aggregation of group \"" + groupString + "\" (" + elementCnt + "\\" + (int) nrElements + ")");
// contains the aggregated values
final DataCell[] aggValues = new DataCell[pivotList.size()];
// pivot index
int idx = 0;
for (final String pivotString : pivotList) {
final Pair<String, String> newPair = new Pair<String, String>(groupString, pivotString);
final Double[] aggValue = map.get(newPair);
aggValues[idx] = aggMethod.done(aggValue);
idx++;
}
// create new row with the given group id and aggregation values
buf.addRowToTable(new DefaultRow(groupString, aggValues));
}
buf.close();
if (m_hiliting.getBooleanValue()) {
m_translator.setMapper(new DefaultHiLiteMapper(mapping));
}
return new BufferedDataTable[] { buf.getTable() };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class DatabaseLoopingNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
final BufferedDataTable inputTable = (BufferedDataTable) inData[0];
final long rowCount = inputTable.size();
final String column = m_columnModel.getStringValue();
final DataTableSpec spec = inputTable.getDataTableSpec();
final int colIdx = spec.findColumnIndex(column);
if (colIdx < 0) {
throw new InvalidSettingsException("Column " + column + " not found in input table.");
}
final Set<DataCell> values = new HashSet<>();
BufferedDataContainer buf = null;
final String oQuery = getQuery();
final Collection<DataCell> curSet = new LinkedHashSet<>();
final DBReader load = loadConnectionSettings(inData[getNrInPorts() - 1]);
try {
final int noValues = m_noValues.getIntValue();
MutableInteger rowCnt = new MutableInteger(0);
for (Iterator<DataRow> it = inputTable.iterator(); it.hasNext(); ) {
exec.checkCanceled();
DataCell cell = it.next().getCell(colIdx);
if (values.contains(cell) && !it.hasNext() && curSet.isEmpty()) {
continue;
}
values.add(cell);
curSet.add(cell);
if (curSet.size() == noValues || !it.hasNext()) {
StringBuilder queryValues = new StringBuilder();
for (DataCell v : curSet) {
if (queryValues.length() > 0) {
queryValues.append("','");
}
queryValues.append(v.toString());
}
String newQuery = parseQuery(oQuery.replaceAll(IN_PLACE_HOLDER, queryValues.toString()));
load.updateQuery(newQuery);
exec.setProgress(values.size() * (double) noValues / rowCount, "Selecting all values \"" + queryValues + "\"...");
final BufferedDataTable table = getResultTable(exec, inData, load);
if (buf == null) {
DataTableSpec resSpec = table.getDataTableSpec();
buf = exec.createDataContainer(createSpec(resSpec, spec.getColumnSpec(column)));
}
if (m_aggByRow.getBooleanValue()) {
aggregate(table, rowCnt, buf, CollectionCellFactory.createListCell(curSet));
} else {
notAggregate(table, rowCnt, buf, CollectionCellFactory.createListCell(curSet));
}
curSet.clear();
}
}
if (buf == null) {
// create empty dummy container with spec generated during #configure
final PortObjectSpec[] inSpec;
if ((inData.length > 1) && (inData[1] instanceof DatabaseConnectionPortObject)) {
DatabaseConnectionPortObject dbPort = (DatabaseConnectionPortObject) inData[1];
inSpec = new PortObjectSpec[] { inputTable.getSpec(), dbPort.getSpec() };
} else {
inSpec = new PortObjectSpec[] { inputTable.getSpec() };
}
final String newQuery = createDummyValueQuery(spec, colIdx, oQuery);
setQuery(newQuery);
final DataTableSpec resultSpec = getResultSpec(inSpec);
final DataTableSpec outSpec = createSpec(resultSpec, spec.getColumnSpec(column));
buf = exec.createDataContainer(outSpec);
}
buf.close();
} catch (CanceledExecutionException cee) {
throw cee;
} catch (Exception e) {
setLastSpec(null);
throw e;
} finally {
// reset query to original
setQuery(oQuery);
}
final BufferedDataTable resultTable = buf.getTable();
setLastSpec(resultTable.getDataTableSpec());
return new BufferedDataTable[] { resultTable };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class Normalizer2NodeModel method calculate.
/**
* New normalized {@link org.knime.core.data.DataTable} is created depending
* on the mode.
*/
/**
* @param inData The input data.
* @param exec For BufferedDataTable creation and progress.
* @return the result of the calculation
* @throws Exception If the node calculation fails for any reason.
*/
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getSpec();
// extract selected numeric columns
updateNumericColumnSelection(inSpec);
Normalizer2 ntable = new Normalizer2(inTable, m_columns);
long rowcount = inTable.size();
ExecutionContext prepareExec = exec.createSubExecutionContext(0.3);
AffineTransTable outTable;
boolean fixDomainBounds = false;
switch(m_mode) {
case NONORM_MODE:
return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
case MINMAX_MODE:
fixDomainBounds = true;
outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
break;
case ZSCORE_MODE:
outTable = ntable.doZScoreNorm(prepareExec);
break;
case DECIMALSCALING_MODE:
outTable = ntable.doDecimalScaling(prepareExec);
break;
default:
throw new Exception("No mode set");
}
if (outTable.getErrorMessage() != null) {
// something went wrong, report and throw an exception
throw new Exception(outTable.getErrorMessage());
}
if (ntable.getErrorMessage() != null) {
// something went wrong during initialization, report.
setWarningMessage(ntable.getErrorMessage());
}
DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
AffineTransConfiguration configuration = outTable.getConfiguration();
DataTableSpec spec = outTable.getDataTableSpec();
// the same transformation, which is not guaranteed to snap to min/max)
if (fixDomainBounds) {
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < newColSpecs.length; i++) {
newColSpecs[i] = spec.getColumnSpec(i);
}
for (int i = 0; i < m_columns.length; i++) {
int index = spec.findColumnIndex(m_columns[i]);
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
domCreator.setLowerBound(new DoubleCell(m_min));
domCreator.setUpperBound(new DoubleCell(m_max));
creator.setDomain(domCreator.createDomain());
newColSpecs[index] = creator.createSpec();
}
spec = new DataTableSpec(spec.getName(), newColSpecs);
}
ExecutionMonitor normExec = exec.createSubProgress(.7);
BufferedDataContainer container = exec.createDataContainer(spec);
long count = 1;
for (DataRow row : outTable) {
normExec.checkCanceled();
normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
container.addRowToTable(row);
count++;
}
container.close();
return new CalculationResult(container.getTable(), modelSpec, configuration);
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class ColumnToGridNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
String[] includes = m_configuration.getIncludes();
String groupColumn = m_configuration.getGroupColumn();
final ExecutionMonitor mainExec;
final BufferedDataTable inputTable;
if (groupColumn != null) {
exec.setMessage("Sorting input table");
BufferedDataTable in = inData[0];
ExecutionContext sortExec = exec.createSubExecutionContext(0.5);
ColumnRearranger sortFilterRearranger = new ColumnRearranger(in.getDataTableSpec());
String[] relevantCols = new String[includes.length + 1];
System.arraycopy(includes, 0, relevantCols, 0, includes.length);
relevantCols[relevantCols.length - 1] = groupColumn;
sortFilterRearranger.keepOnly(relevantCols);
BufferedDataTable toBeSortedTable = exec.createColumnRearrangeTable(in, sortFilterRearranger, exec.createSubProgress(0.0));
SortedTable sorter = new SortedTable(toBeSortedTable, Collections.singletonList(groupColumn), new boolean[] { true }, sortExec);
inputTable = sorter.getBufferedDataTable();
mainExec = exec.createSubProgress(0.5);
} else {
inputTable = inData[0];
mainExec = exec;
}
exec.setMessage("Assembling output");
DataTableSpec spec = inputTable.getDataTableSpec();
DataTableSpec outSpec = createOutputSpec(spec);
BufferedDataContainer cont = exec.createDataContainer(outSpec);
int[] includeIndices = new int[includes.length];
for (int i = 0; i < includes.length; i++) {
int index = spec.findColumnIndex(includes[i]);
includeIndices[i] = index;
}
int gridCount = m_configuration.getColCount();
final int cellCount;
final int groupColIndex;
if (groupColumn != null) {
cellCount = includeIndices.length * gridCount + 1;
groupColIndex = spec.findColumnIndex(groupColumn);
} else {
cellCount = includeIndices.length * gridCount;
groupColIndex = -1;
}
final DataCell[] cells = new DataCell[cellCount];
PushBackRowIterator it = new PushBackRowIterator(inputTable.iterator());
long currentRow = 0;
long totalRows = inputTable.size();
long currentOutRow = 0;
DataCell curGroupValue = null;
while (it.hasNext()) {
Arrays.fill(cells, DataType.getMissingCell());
// assign group column (if enabled)
if (groupColIndex >= 0) {
DataRow row = it.next();
curGroupValue = row.getCell(groupColIndex);
cells[cells.length - 1] = curGroupValue;
it.pushBack(row);
}
for (int grid = 0; grid < gridCount; grid++) {
if (!it.hasNext()) {
break;
}
DataRow inRow = it.next();
DataCell groupValue = groupColIndex < 0 ? null : inRow.getCell(groupColIndex);
if (ConvenienceMethods.areEqual(curGroupValue, groupValue)) {
mainExec.setProgress(currentRow / (double) totalRows, "Processing row " + currentRow + "/" + totalRows + ": " + inRow.getKey());
currentRow += 1;
mainExec.checkCanceled();
for (int i = 0; i < includeIndices.length; i++) {
cells[grid * includeIndices.length + i] = inRow.getCell(includeIndices[i]);
}
} else {
// start new group, i.e. new row
it.pushBack(inRow);
break;
}
}
RowKey key = RowKey.createRowKey(currentOutRow++);
cont.addRowToTable(new DefaultRow(key, cells));
}
cont.close();
return new BufferedDataTable[] { cont.getTable() };
}
Aggregations