use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class ColumnToGrid2NodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
String groupColumn = m_configuration.getGroupColumn();
final ExecutionMonitor mainExec;
final BufferedDataTable inputTable;
if (groupColumn != null) {
exec.setMessage("Sorting input table");
BufferedDataTable in = inData[0];
ExecutionContext sortExec = exec.createSubExecutionContext(0.5);
ColumnRearranger sortFilterRearranger = new ColumnRearranger(in.getDataTableSpec());
String[] relevantCols = new String[m_included.length + 1];
System.arraycopy(m_included, 0, relevantCols, 0, m_included.length);
relevantCols[relevantCols.length - 1] = groupColumn;
sortFilterRearranger.keepOnly(relevantCols);
BufferedDataTable toBeSortedTable = exec.createColumnRearrangeTable(in, sortFilterRearranger, exec.createSubProgress(0.0));
SortedTable sorter = new SortedTable(toBeSortedTable, Collections.singletonList(groupColumn), new boolean[] { true }, sortExec);
inputTable = sorter.getBufferedDataTable();
mainExec = exec.createSubProgress(0.5);
} else {
inputTable = inData[0];
mainExec = exec;
}
exec.setMessage("Assembling output");
DataTableSpec spec = inputTable.getDataTableSpec();
DataTableSpec outSpec = createOutputSpec(spec);
BufferedDataContainer cont = exec.createDataContainer(outSpec);
int[] includeIndices = new int[m_included.length];
for (int i = 0; i < m_included.length; i++) {
int index = spec.findColumnIndex(m_included[i]);
includeIndices[i] = index;
}
int gridCount = m_configuration.getColCount();
final int cellCount;
final int groupColIndex;
if (groupColumn != null) {
cellCount = includeIndices.length * gridCount + 1;
groupColIndex = spec.findColumnIndex(groupColumn);
} else {
cellCount = includeIndices.length * gridCount;
groupColIndex = -1;
}
final DataCell[] cells = new DataCell[cellCount];
PushBackRowIterator it = new PushBackRowIterator(inputTable.iterator());
long currentRow = 0;
long totalRows = inputTable.size();
long currentOutRow = 0;
DataCell curGroupValue = null;
while (it.hasNext()) {
Arrays.fill(cells, DataType.getMissingCell());
// assign group column (if enabled)
if (groupColIndex >= 0) {
DataRow row = it.next();
curGroupValue = row.getCell(groupColIndex);
cells[cells.length - 1] = curGroupValue;
it.pushBack(row);
}
for (int grid = 0; grid < gridCount; grid++) {
if (!it.hasNext()) {
break;
}
DataRow inRow = it.next();
DataCell groupValue = groupColIndex < 0 ? null : inRow.getCell(groupColIndex);
if (ConvenienceMethods.areEqual(curGroupValue, groupValue)) {
mainExec.setProgress(currentRow / (double) totalRows, "Processing row " + currentRow + "/" + totalRows + ": " + inRow.getKey());
currentRow += 1;
mainExec.checkCanceled();
for (int i = 0; i < includeIndices.length; i++) {
cells[grid * includeIndices.length + i] = inRow.getCell(includeIndices[i]);
}
} else {
// start new group, i.e. new row
it.pushBack(inRow);
break;
}
}
RowKey key = RowKey.createRowKey(currentOutRow++);
cont.addRowToTable(new DefaultRow(key, cells));
}
cont.close();
return new BufferedDataTable[] { cont.getTable() };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class TableCreator2NodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
DataTableSpec outSpec = createSpec();
BufferedDataContainer cont = exec.createDataContainer(outSpec, true);
int numColProps = m_settings.getColumnProperties().size();
ColProperty[] colProps = new ColProperty[numColProps];
for (int i = 0; i < numColProps; i++) {
colProps[i] = m_settings.getColumnProperties().get(i);
}
int cc = 0;
int[] notSkippedMap = new int[numColProps];
for (int i = 0; i < numColProps; i++) {
notSkippedMap[i] = cc;
if (!colProps[i].getSkipThisColumn()) {
cc++;
}
}
int numRows = max(m_settings.getRowIndices()) + 1;
String rowIdPrefix = m_settings.getRowIdPrefix();
String rowIdSuffix = m_settings.getRowIdSuffix();
int rowIdStartWidth = m_settings.getRowIdStartValue();
int c = 0;
// fix for bug #2969
Set<Integer> toRemove = new HashSet<Integer>();
DataCellFactory cellFactory = new DataCellFactory();
for (int i = 0; i < numRows; i++) {
DataCell[] cells = new DataCell[outSpec.getNumColumns()];
for (int k = 0; k < numColProps; k++) {
// fix for bug #2969
while (c < m_settings.getRowIndices().length && (m_settings.getRowIndices()[c] < 0 || m_settings.getColumnIndices()[c] < 0)) {
toRemove.add(c);
c++;
}
String value = "";
if (c < m_settings.getRowIndices().length && m_settings.getRowIndices()[c] == i && m_settings.getColumnIndices()[c] == k) {
value = m_settings.getValues()[c];
c++;
}
if (colProps[k].getSkipThisColumn()) {
continue;
}
String missValPattern = colProps[k].getMissingValuePattern();
cellFactory.setMissingValuePattern(missValPattern);
cellFactory.setFormatParameter(colProps[k].getFormatParameter().orElse(null));
DataCell result = cellFactory.createDataCellOfType(colProps[k].getColumnSpec().getType(), value);
if (null != result) {
cells[notSkippedMap[k]] = result;
} else {
throw new InvalidSettingsException(cellFactory.getErrorMessage());
}
}
StringBuilder rowId = new StringBuilder();
rowId.append(rowIdPrefix);
rowId.append(Integer.toString(i + rowIdStartWidth));
rowId.append(rowIdSuffix);
DataRow row = new DefaultRow(rowId.toString(), cells);
cont.addRowToTable(row);
}
cont.close();
BufferedDataTable out = cont.getTable();
return new BufferedDataTable[] { out };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class XValidatePartitionModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
boolean inLoop = (m_partNumbers != null);
if (!inLoop) {
if (m_settings.leaveOneOut()) {
m_nrIterations = inData[0].getRowCount();
m_currIteration = 0;
m_partNumbers = new short[0];
} else {
m_partNumbers = new short[inData[0].getRowCount()];
final double partSize = m_partNumbers.length / (double) m_settings.validations();
if (m_settings.stratifiedSampling()) {
ExecutionMonitor subExec = exec.createSubProgress(0.0);
subExec.setMessage("Preparing stratified sampling");
Map<DataCell, List<Integer>> valueCounts = countValues(inData[0], subExec, m_settings.classColumn());
int part = 0;
for (Map.Entry<DataCell, List<Integer>> e : valueCounts.entrySet()) {
List<Integer> l = e.getValue();
for (Integer i : l) {
m_partNumbers[i] = (short) part++;
part %= m_settings.validations();
}
}
} else {
for (int i = 0; i < m_partNumbers.length; i++) {
m_partNumbers[i] = (short) Math.min(i / partSize, m_partNumbers.length);
}
if (m_settings.randomSampling()) {
long seed = m_settings.useRandomSeed() ? m_settings.randomSeed() : System.currentTimeMillis();
Random rand = new Random(seed);
for (int i = 0; i < m_partNumbers.length; i++) {
int pos = rand.nextInt(m_partNumbers.length);
short x = m_partNumbers[pos];
m_partNumbers[pos] = m_partNumbers[i];
m_partNumbers[i] = x;
}
}
}
m_nrIterations = m_settings.validations();
m_currIteration = 0;
}
}
BufferedDataContainer test = exec.createDataContainer(inData[0].getDataTableSpec());
BufferedDataContainer train = exec.createDataContainer(inData[0].getDataTableSpec());
int count = 0;
final double max = inData[0].getRowCount();
for (DataRow row : inData[0]) {
exec.checkCanceled();
exec.setProgress(count / max);
if (m_settings.leaveOneOut() && (count == m_currIteration)) {
test.addRowToTable(row);
} else if (!m_settings.leaveOneOut() && (m_partNumbers[count] == m_currIteration)) {
test.addRowToTable(row);
} else {
train.addRowToTable(row);
}
count++;
}
test.close();
train.close();
// we need to put the counts on the stack for the loop's tail to see:
pushFlowVariableInt("currentIteration", m_currIteration);
pushFlowVariableInt("maxIterations", m_nrIterations);
m_currIteration++;
return new BufferedDataTable[] { train.getTable(), test.getTable() };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class ConcatenateTableFactory method copyTablesIntoOneTable.
/**
* Copies all tables, except the last still not-closed table, into an entire new table
*/
private void copyTablesIntoOneTable(final ExecutionContext exec) throws CanceledExecutionException {
BufferedDataTable[] tables = new BufferedDataTable[m_tables.size() - 1];
for (int i = 0; i < tables.length; i++) {
tables[i] = m_tables.get(i).getTable();
}
AppendedRowsTable wrapper = new AppendedRowsTable(org.knime.core.data.append.AppendedRowsTable.DuplicatePolicy.Fail, null, tables);
BufferedDataContainer con = exec.createDataContainer(wrapper.getDataTableSpec());
RowIterator rowIt = wrapper.iterator();
exec.setProgress("Too many tables. Copy tables into one table.");
while (rowIt.hasNext()) {
exec.checkCanceled();
con.addRowToTable(rowIt.next());
}
con.close();
BufferedDataContainer last = m_tables.get(m_tables.size() - 1);
m_tables.clear();
m_tables.add(con);
m_tables.add(last);
exec.setProgress("Tables copied into one.");
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class ConcatenateTableFactory method addTable.
/**
* All rows of the given row input are added to a new data container. Creates a new data container if this data
* table spec differs from the previous table. This method call checks for row keys duplicates and throws a
* {@link DuplicateKeyException}.
*
* @param table the table to be added
* @param exec the execution context to possibly create a new data container
* @throws InterruptedException
* @throws IOException
* @throws DuplicateKeyException
* @throws CanceledExecutionException
*/
void addTable(final RowInput table, final ExecutionContext exec) throws InterruptedException, DuplicateKeyException, IOException, CanceledExecutionException {
// check if last container has been closed (i.e. createTable was called)
if (m_tables.size() > 0) {
if (m_tables.get(m_tables.size() - 1).isClosed()) {
throw new IllegalStateException("No more tables can be added! ConcatenateTable has already been created.");
}
}
// poll first row in order to check whether the incoming table is empty
DataRow row = table.poll();
if (row == null) {
// table is empty
if (m_ignoreEmptyTables && m_tables.size() > 0) {
m_iterationCount++;
return;
} else if (m_tables.size() == 0) {
// if this is the first table we receive and its empty, create an empty one and keep it
m_emptyTable = exec.createDataContainer(createSpec(table.getDataTableSpec(), m_addIterationColumn, false));
m_iterationCount++;
return;
}
}
// compare spec of the current table with the spec of the first table if changing specs are not tolerated
if (!m_tolerateChangingSpecs && (m_tables.size() > 0 || m_emptyTable != null)) {
if (!(m_ignoreEmptyTables && (row == null || m_emptyTable != null))) {
// don't fail if table is empty and to be ignored
// create spec for comparision -> set the most common column type for both table spec, if altered column types
// are to be tolerated
DataTableSpec tmpSpec1;
if (m_tables.size() == 0 && m_emptyTable != null) {
tmpSpec1 = createSpec(m_emptyTable.getTableSpec(), false, m_tolerateColumnTypes);
} else {
tmpSpec1 = createSpec(m_tables.get(0).getTableSpec(), false, m_tolerateColumnTypes);
}
DataTableSpec tmpSpec2 = createSpec(table.getDataTableSpec(), m_addIterationColumn, m_tolerateColumnTypes);
// fail if specs has been changed
compareSpecsAndFail(tmpSpec1, tmpSpec2);
}
}
// if table is empty and they are not to be ignored, nothing else to do -> return now
if (row == null) {
m_iterationCount++;
return;
}
// if there are too much tables -> create one new and copy the whole data
if (m_tables.size() > MAX_NUM_TABLES) {
copyTablesIntoOneTable(exec);
}
// create a new data container except the previously added has the same data table spec -> problem: if in each iteration a new row is added we
// end up with quite many data containers
BufferedDataContainer con;
DataTableSpec newTableSpec = createSpec(table.getDataTableSpec(), m_addIterationColumn, false);
if (m_tables.size() == 0) {
con = exec.createDataContainer(newTableSpec);
m_tables.add(con);
} else if (m_tables.size() > 0 && !newTableSpec.equalStructure(m_tables.get(m_tables.size() - 1).getTableSpec())) {
con = m_tables.get(m_tables.size() - 1);
con.close();
con = exec.createDataContainer(newTableSpec);
m_tables.add(con);
} else {
con = m_tables.get(m_tables.size() - 1);
}
// add rows of the table to the newly created data container
do {
exec.checkCanceled();
// change row key if desired
if (m_rowKeyCreator != null) {
// change row key
row = new BlobSupportDataRow(m_rowKeyCreator.apply(row.getKey()), row);
}
m_duplicateChecker.addKey(row.getKey().toString());
// add additional iteration column if desired
if (m_addIterationColumn) {
IntCell currIterCell = new IntCell(m_iterationCount);
row = new org.knime.core.data.append.AppendedColumnRow(row, currIterCell);
}
con.addRowToTable(row);
} while ((row = table.poll()) != null);
m_iterationCount++;
}
Aggregations