use of org.knime.core.data.container.SingleCellFactory in project knime-core by knime.
the class RowKeyUtil2 method createColumnRearranger.
/**
* Creates the {@link ColumnRearranger} that appends a new column with the
* values of the row id to a data table.
*
* @param inSpec the <code>DataTableSpec</code> of table were the column
* should be appended
* @param newColName the name of the added column
* @param type the <code>DataType</code> of the new column
* @return the {@link ColumnRearranger} to use
*/
public static ColumnRearranger createColumnRearranger(final DataTableSpec inSpec, final String newColName, final DataType type) {
final ColumnRearranger c = new ColumnRearranger(inSpec);
// column specification of the appended column
final DataColumnSpecCreator colSpecCreater = new DataColumnSpecCreator(newColName, type);
final DataColumnSpec newColSpec = colSpecCreater.createSpec();
// utility object that performs the calculation
final CellFactory factory = new SingleCellFactory(newColSpec) {
@Override
public DataCell getCell(final DataRow row) {
return new StringCell(row.getKey().getString());
}
};
c.append(factory);
return c;
}
use of org.knime.core.data.container.SingleCellFactory in project knime-core by knime.
the class TargetShufflingNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final int colIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.columnName());
final String colName = inData[0].getDataTableSpec().getColumnSpec(colIndex).getName();
// create a new column rearranger from the input table
ColumnRearranger colRe = new ColumnRearranger(inData[0].getDataTableSpec());
for (DataColumnSpec c : inData[0].getDataTableSpec()) {
if (!c.getName().equals(colName)) {
// remove all columns except the selected one
colRe.remove(c.getName());
}
}
// append a new column with a random number for each cell
String uniqueColumnName = DataTableSpec.getUniqueColumnName(inData[0].getDataTableSpec(), "random_col");
colRe.append(new SingleCellFactory(new DataColumnSpecCreator(uniqueColumnName, LongCell.TYPE).createSpec()) {
@Override
public DataCell getCell(final DataRow row) {
return new LongCell(m_random.nextLong());
}
});
BufferedDataTable toSort = exec.createColumnRearrangeTable(exec.createBufferedDataTable(inData[0], exec), colRe, exec.createSilentSubProgress(.2));
// sort the random numbers ---> shuffles the sorted column
List<String> include = new ArrayList<String>();
include.add(toSort.getDataTableSpec().getColumnSpec(1).getName());
SortedTable sort = new SortedTable(toSort, include, new boolean[] { true }, exec.createSubExecutionContext(.6));
final BufferedDataTable sorted = sort.getBufferedDataTable();
// replace the selected column with the shuffled one
final DataColumnSpec colSpec = inData[0].getDataTableSpec().getColumnSpec(colIndex);
ColumnRearranger crea = new ColumnRearranger(inData[0].getDataTableSpec());
crea.replace(new SingleCellFactory(colSpec) {
private final CloseableRowIterator m_iterator = sorted.iterator();
@Override
public DataCell getCell(final DataRow row) {
return m_iterator.next().getCell(0);
}
}, colName);
return new BufferedDataTable[] { exec.createColumnRearrangeTable(inData[0], crea, exec.createSubProgress(0.2)) };
}
use of org.knime.core.data.container.SingleCellFactory in project knime-core by knime.
the class ThreadedColAppenderNodeModel method createOutputSpec.
/**
* Returns the output spec based on the input spec and the cell factory.
*
* @param inSpec the input spec
* @param cellFactory the cell factory used
* @return the output spec
*/
protected static DataTableSpec createOutputSpec(final DataTableSpec inSpec, final ExtendedCellFactory cellFactory) {
ColumnRearranger rea = new ColumnRearranger(inSpec);
ColumnDestination[] dests = cellFactory.getColumnDestinations();
for (int k = 0; k < dests.length; k++) {
ColumnDestination cd = dests[k];
CellFactory cf = new SingleCellFactory(cellFactory.getColumnSpecs()[k]) {
@Override
public DataCell getCell(final DataRow row) {
return null;
}
};
if (cd instanceof AppendColumn) {
rea.append(cf);
} else if (cd instanceof InsertColumn) {
rea.insertAt(((InsertColumn) cd).getIndex(), cf);
} else {
rea.replace(cf, ((ReplaceColumn) cd).getIndex());
}
}
return rea.createSpec();
}
use of org.knime.core.data.container.SingleCellFactory in project knime-core by knime.
the class Pivot2NodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
final BufferedDataTable table = (BufferedDataTable) inData[0];
final List<String> groupAndPivotCols = createAllColumns();
final BufferedDataTable groupTable;
final String orderPivotColumnName;
ExecutionContext groupAndPivotExec = exec.createSubExecutionContext(0.5);
ExecutionContext groupExec = exec.createSubExecutionContext(0.25);
ExecutionContext pivotExec = exec.createSubExecutionContext(0.25);
double progMainTotal = 0.0;
double progMainTableAppendIndexForSort = isProcessInMemory() || isRetainOrder() ? 1.0 : 0.0;
progMainTotal += progMainTableAppendIndexForSort;
double progMainTableGroup = 5.0;
progMainTotal += progMainTableGroup;
double progMainTableInMemSort = isProcessInMemory() ? 3.0 : 0.0;
progMainTotal += progMainTableInMemSort;
double progMainTableGetPivots = 1.0;
progMainTotal += progMainTableGetPivots;
double progMainTableFillPivots = 1.0;
progMainTotal += progMainTableFillPivots;
double progMainTableRestoreSort = isProcessInMemory() || isRetainOrder() ? 1.0 : 0.0;
progMainTotal += progMainTableRestoreSort;
double progMainTableReplaceRowKey = isProcessInMemory() ? 1.0 : 0.0;
progMainTotal += progMainTableReplaceRowKey;
if (isProcessInMemory() || isRetainOrder()) {
exec.setMessage("Keeping row order");
final String retainOrderCol = DataTableSpec.getUniqueColumnName(table.getDataTableSpec(), "#pivot_order#");
// append temp. id column with minimum-aggregation method
final ColumnAggregator[] colAggregators = getColumnAggregators().toArray(new ColumnAggregator[0]);
final Set<String> workingCols = new LinkedHashSet<String>();
workingCols.addAll(groupAndPivotCols);
for (final ColumnAggregator ca : colAggregators) {
workingCols.add(ca.getOriginalColName());
}
workingCols.add(retainOrderCol);
final BufferedDataTable appTable = GroupByTable.appendOrderColumn(groupAndPivotExec.createSubExecutionContext(progMainTableAppendIndexForSort / progMainTotal), table, workingCols, retainOrderCol);
final DataColumnSpec retainOrderColSpec = appTable.getSpec().getColumnSpec(retainOrderCol);
final ColumnAggregator[] aggrs = new ColumnAggregator[colAggregators.length + 1];
System.arraycopy(colAggregators, 0, aggrs, 0, colAggregators.length);
aggrs[colAggregators.length] = new ColumnAggregator(retainOrderColSpec, AggregationMethods.getRowOrderMethod(), true);
orderPivotColumnName = getColumnNamePolicy().createColumName(aggrs[colAggregators.length]);
exec.setMessage("Grouping main table");
final GroupByTable groupByTable = createGroupByTable(groupAndPivotExec.createSubExecutionContext(progMainTableGroup / progMainTotal), appTable, groupAndPivotCols, isProcessInMemory(), false, /* retain order always false; handled by pivoting */
Arrays.asList(aggrs));
// true then sort table by group&pivot columns
if (isProcessInMemory()) {
exec.setMessage("Sorting group table");
final boolean[] sortDirection = new boolean[groupAndPivotCols.size()];
// ensure that missing values are at the end by sorting in ascending order
Arrays.fill(sortDirection, true);
final SortedTable sortedGroupByTable = new SortedTable(groupByTable.getBufferedTable(), groupAndPivotCols, sortDirection, groupAndPivotExec.createSubExecutionContext(progMainTableInMemSort / progMainTotal));
groupTable = sortedGroupByTable.getBufferedDataTable();
} else {
groupTable = groupByTable.getBufferedTable();
}
} else {
exec.setMessage("Grouping main table");
final GroupByTable groupByTable = createGroupByTable(groupAndPivotExec.createSubExecutionContext(progMainTableGroup / progMainTotal), table, groupAndPivotCols, isProcessInMemory(), false, getColumnAggregators());
groupTable = groupByTable.getBufferedTable();
orderPivotColumnName = null;
}
final List<String> pivotCols = m_pivotCols.getIncludeList();
final int[] pivotIdx = new int[pivotCols.size()];
final DataTableSpec groupSpec = groupTable.getSpec();
final Set<String>[] combPivots = createCombinedPivots(groupSpec, pivotCols);
for (int i = 0; i < pivotIdx.length; i++) {
pivotIdx[i] = groupSpec.findColumnIndex(pivotCols.get(i));
}
exec.setProgress("Determining pivots...");
ExecutionContext fillExec = groupAndPivotExec.createSubExecutionContext(progMainTableGetPivots / progMainTotal);
final long groupTableSize = groupTable.size();
long groupIndex = 0;
for (final DataRow row : groupTable) {
for (int i = 0; i < pivotIdx.length; i++) {
if (combPivots[i] == null) {
combPivots[i] = new LinkedHashSet<String>();
}
final DataCell cell = row.getCell(pivotIdx[i]);
if (cell.isMissing()) {
if (!m_ignoreMissValues.getBooleanValue()) {
combPivots[i].add(cell.toString());
}
} else {
combPivots[i].add(cell.toString());
}
}
fillExec.setProgress(groupIndex++ / (double) groupTableSize, String.format("Group \"%s\" (%d/%d)", row.getKey(), groupIndex, groupTableSize));
fillExec.checkCanceled();
}
final Map<String, Integer> pivotStarts = new LinkedHashMap<String, Integer>();
final DataTableSpec outSpec = createOutSpec(groupSpec, combPivots, pivotStarts, orderPivotColumnName);
exec.setProgress("Filling pivot table");
BufferedDataTable pivotTable = fillPivotTable(groupTable, outSpec, pivotStarts, groupAndPivotExec.createSubExecutionContext(progMainTableFillPivots / progMainTotal), orderPivotColumnName);
if (orderPivotColumnName != null) {
exec.setMessage("Restoring row order");
final SortedTable sortedPivotTable = new SortedTable(pivotTable, Arrays.asList(new String[] { orderPivotColumnName }), new boolean[] { true }, groupAndPivotExec.createSubExecutionContext(progMainTableRestoreSort / progMainTotal));
pivotTable = sortedPivotTable.getBufferedDataTable();
final ColumnRearranger colre = new ColumnRearranger(pivotTable.getSpec());
colre.remove(orderPivotColumnName);
pivotTable = exec.createColumnRearrangeTable(pivotTable, colre, exec.createSilentSubProgress(0.0));
}
// temp fix for bug 3286
if (isProcessInMemory()) {
// if process in memory is true, RowKey's needs to be re-computed
final BufferedDataContainer rowkeyBuf = groupAndPivotExec.createSubExecutionContext(progMainTableReplaceRowKey / progMainTotal).createDataContainer(pivotTable.getSpec());
long rowIndex = 0;
for (DataRow row : pivotTable) {
rowkeyBuf.addRowToTable(new DefaultRow(RowKey.createRowKey(rowIndex++), row));
}
rowkeyBuf.close();
pivotTable = rowkeyBuf.getTable();
}
groupAndPivotExec.setProgress(1.0);
/* Fill the 3rd port */
exec.setMessage("Determining pivot totals");
double progPivotTotal = 0.0;
double progPivotGroup = 5.0;
progPivotTotal += progPivotGroup;
double progPivotFillMissing = 1.0;
progPivotTotal += progPivotFillMissing;
double progPivotFillPivots = 1.0;
progPivotTotal += progPivotFillPivots;
double progPivotOverallTotals = m_totalAggregation.getBooleanValue() ? 5.0 : 0.0;
progPivotTotal += progPivotOverallTotals;
// create pivot table only on pivot columns (for grouping)
// perform pivoting: result in single line
final GroupByTable rowGroup = createGroupByTable(pivotExec.createSubExecutionContext(progPivotGroup / progPivotTotal), table, m_pivotCols.getIncludeList(), isProcessInMemory(), isRetainOrder(), getColumnAggregators());
final BufferedDataTable rowGroupTable = rowGroup.getBufferedTable();
// fill group columns with missing cells
final ColumnRearranger colre = new ColumnRearranger(rowGroupTable.getDataTableSpec());
for (int i = 0; i < getGroupByColumns().size(); i++) {
final DataColumnSpec cspec = outSpec.getColumnSpec(i);
final CellFactory factory = new SingleCellFactory(cspec) {
/**
* {@inheritDoc}
*/
@Override
public DataCell getCell(final DataRow row) {
return DataType.getMissingCell();
}
};
colre.insertAt(i, factory);
}
final BufferedDataTable groupedRowTable = exec.createColumnRearrangeTable(rowGroupTable, colre, pivotExec.createSubExecutionContext(progPivotFillMissing / progPivotTotal));
BufferedDataTable pivotRowsTable = fillPivotTable(groupedRowTable, outSpec, pivotStarts, pivotExec.createSubExecutionContext(progPivotFillPivots / progPivotTotal), null);
if (orderPivotColumnName != null) {
final ColumnRearranger colre2 = new ColumnRearranger(pivotRowsTable.getSpec());
colre2.remove(orderPivotColumnName);
pivotRowsTable = exec.createColumnRearrangeTable(pivotRowsTable, colre2, exec.createSilentSubProgress(0.0));
}
// total aggregation without grouping
if (m_totalAggregation.getBooleanValue()) {
@SuppressWarnings("unchecked") final GroupByTable totalGroup = createGroupByTable(pivotExec.createSubExecutionContext(progPivotOverallTotals / progPivotTotal), table, Collections.EMPTY_LIST, isProcessInMemory(), isRetainOrder(), getColumnAggregators());
final BufferedDataTable totalGroupTable = totalGroup.getBufferedTable();
final DataTableSpec pivotsRowsSpec = pivotRowsTable.getSpec();
final DataTableSpec totalGroupSpec = totalGroupTable.getSpec();
final DataTableSpec overallTotalSpec = new DataTableSpec(pivotsRowsSpec, totalGroupSpec);
final BufferedDataContainer buf = exec.createDataContainer(overallTotalSpec);
if (pivotRowsTable.size() > 0) {
final List<DataCell> pivotTotalsCells = new ArrayList<DataCell>();
final DataRow pivotsRow = pivotRowsTable.iterator().next();
for (final DataCell cell : pivotsRow) {
pivotTotalsCells.add(cell);
}
final DataRow totalGroupRow = totalGroupTable.iterator().next();
for (final DataCell cell : totalGroupRow) {
pivotTotalsCells.add(cell);
}
buf.addRowToTable(new DefaultRow(new RowKey("Totals"), pivotTotalsCells));
}
buf.close();
pivotRowsTable = buf.getTable();
}
pivotExec.setProgress(1.0);
/* Fill the 2nd port: important to create this last since it will create
* the final hilite handler (mapping) for port #1 AND #2 (bug 3270) */
exec.setMessage("Creating group totals");
// create group table only on group columns; no pivoting
final BufferedDataTable columnGroupTable = createGroupByTable(groupExec, table, getGroupByColumns()).getBufferedTable();
return new PortObject[] { // pivot table
pivotTable, // group totals
columnGroupTable, // pivot and overall totals
pivotRowsTable };
}
use of org.knime.core.data.container.SingleCellFactory in project knime-core by knime.
the class BinByDictionaryNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final DataTableSpec[] ins, final BufferedDataTable port1Table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
final BinByDictionaryConfiguration c = m_configuration;
if (c == null) {
throw new InvalidSettingsException("No configuration set");
}
String lowerColPort1 = c.getLowerBoundColumnPort1();
String upperColPort1 = c.getUpperBoundColumnPort1();
String labelCol = c.getLabelColumnPort1();
String valueColumnPort0 = c.getValueColumnPort0();
DataType valueType = null;
final int valueColIndexPort0 = ins[0].findColumnIndex(valueColumnPort0);
if (valueColIndexPort0 < 0) {
throw new InvalidSettingsException("No such column in 1st input: " + valueColumnPort0);
} else {
valueType = ins[0].getColumnSpec(valueColIndexPort0).getType();
}
final boolean isLowerBoundInclusive = c.isLowerBoundInclusive();
final boolean isUpperBoundInclusive = c.isUpperBoundInclusive();
final int lowerBoundColIndex;
final DataValueComparator lowerBoundComparator;
if (lowerColPort1 == null) {
// no lower bound specified
lowerBoundComparator = null;
lowerBoundColIndex = -1;
} else {
lowerBoundColIndex = ins[1].findColumnIndex(lowerColPort1);
if (lowerBoundColIndex < 0) {
throw new InvalidSettingsException("No such column in 2nd input: " + lowerColPort1);
}
DataType type = ins[1].getColumnSpec(lowerBoundColIndex).getType();
if (valueType.equals(type)) {
lowerBoundComparator = valueType.getComparator();
} else {
setWarningMessage("The types of the comparison and value " + "columns are not equal, comparison might be done " + "based on lexicographical string representation!");
lowerBoundComparator = DataType.getCommonSuperType(valueType, type).getComparator();
}
}
final int upperBoundColIndex;
final DataValueComparator upperBoundComparator;
if (upperColPort1 == null) {
// no upper bound specified
upperBoundColIndex = -1;
upperBoundComparator = null;
} else {
upperBoundColIndex = ins[1].findColumnIndex(upperColPort1);
if (upperBoundColIndex < 0) {
throw new InvalidSettingsException("No such column in 2nd input: " + upperColPort1);
}
DataType type = ins[1].getColumnSpec(upperBoundColIndex).getType();
if (valueType.equals(type)) {
upperBoundComparator = valueType.getComparator();
} else {
setWarningMessage("The types of the comparison and value " + "columns are not equal, comparison might be done " + "based on lexicographical string representation!");
upperBoundComparator = DataType.getCommonSuperType(valueType, type).getComparator();
}
}
final int labelColIndex = ins[1].findColumnIndex(labelCol);
if (labelColIndex < 0) {
throw new InvalidSettingsException("No such column in 2nd input: " + labelCol);
}
DataColumnSpecCreator labelColSpecCreator = new DataColumnSpecCreator(ins[1].getColumnSpec(labelColIndex));
labelColSpecCreator.removeAllHandlers();
String name = DataTableSpec.getUniqueColumnName(ins[0], labelCol);
labelColSpecCreator.setName(name);
final DataColumnSpec labelColSpec = labelColSpecCreator.createSpec();
final BinByDictionaryRuleSet ruleSet = new BinByDictionaryRuleSet(lowerBoundComparator, isLowerBoundInclusive, upperBoundComparator, isUpperBoundInclusive, c.isUseBinarySearch());
if (port1Table != null) {
// in execute
long rowCount = port1Table.size();
long current = 1;
for (DataRow r : port1Table) {
DataCell lower = lowerBoundColIndex < 0 ? null : r.getCell(lowerBoundColIndex);
DataCell upper = upperBoundColIndex < 0 ? null : r.getCell(upperBoundColIndex);
DataCell label = r.getCell(labelColIndex);
ruleSet.addRule(lower, upper, label);
exec.setProgress(/*no prog */
0.0, "Indexing rule table " + (current++) + "/" + rowCount + " (\"" + r.getKey() + "\")");
exec.checkCanceled();
}
}
ruleSet.close();
SingleCellFactory fac = new SingleCellFactory(ruleSet.getSize() > 100, labelColSpec) {
@Override
public DataCell getCell(final DataRow row) {
DataCell value = row.getCell(valueColIndexPort0);
if (value.isMissing()) {
return DataType.getMissingCell();
}
DataCell result = ruleSet.search(value);
if (result != null) {
return result;
}
if (c.isFailIfNoRuleMatches()) {
throw new RuntimeException("No rule matched for row \"" + row.getKey() + "\", value: \"" + value + "\"");
}
return DataType.getMissingCell();
}
};
ColumnRearranger rearranger = new ColumnRearranger(ins[0]);
rearranger.append(fac);
return rearranger;
}
Aggregations