use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class ColumnAutoTypeCasterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
double progress = 0;
final BufferedDataTable data = inData[0];
BufferedDataTable outTable = inData[0];
final String[] incls = m_conf.applyTo(data.getDataTableSpec()).getIncludes();
final DataType[] types = new DataType[incls.length];
final double max = incls.length + data.size();
final String[] colNames = { "Column name", "Final column type", "Row determining final column type" };
final DataType[] colTypes = new DataType[] { StringCell.TYPE, StringCell.TYPE, StringCell.TYPE };
BufferedDataContainer reasonsCon = exec.createDataContainer(new DataTableSpec(colNames, colTypes));
setReasons(new String[incls.length][3]);
if (data.size() > 0) {
// empty table check
SimpleDateFormat dateFormat = new SimpleDateFormat(m_dateFormat);
long numberOfRows = m_quickScan ? Math.min(m_numberOfRows, data.size()) : data.size();
for (DataRow row : data) {
if (!(0 < numberOfRows--)) {
data.iterator().close();
break;
}
for (int i = 0; i < incls.length; i++) {
// guess for each cell in each column the best matching datatype
DataCell c = row.getCell(data.getDataTableSpec().findColumnIndex(incls[i]));
if (!c.isMissing() && c.toString().equals(m_missValPat)) {
continue;
}
DataType newType = typeGuesser(c, dateFormat);
if (types[i] != null) {
DataType toSet = setType(types[i], newType);
if (!toSet.equals(types[i])) {
m_reasons[i][2] = row.getKey().getString();
m_reasons[i][1] = toSet.toString();
m_reasons[i][0] = incls[i];
}
types[i] = toSet;
} else {
types[i] = newType;
String r = row.getKey().toString();
r += m_quickScan ? (" based on a quickscan.") : "";
m_reasons[i][2] = r;
m_reasons[i][1] = newType.toString();
m_reasons[i][0] = incls[i];
}
exec.checkCanceled();
}
exec.checkCanceled();
progress++;
exec.setProgress(progress / max);
}
for (int i = 0; i < types.length; i++) {
// if one column only contains missingCells than set column type to StringCell
if (types[i].equals(DataType.getMissingCell().getType())) {
types[i] = StringCell.TYPE;
}
}
ColumnRearranger arrange = new ColumnRearranger(data.getDataTableSpec());
for (int i = 0; i < incls.length; i++) {
final int colIdx = data.getDataTableSpec().findColumnIndex(incls[i]);
final DataType type = types[i];
DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(incls[i], types[i]);
DataColumnSpec colSpec = colSpecCreator.createSpec();
if (type.equals(DateAndTimeCell.TYPE)) {
arrange.replace(createDateAndTimeConverter(colIdx, colSpec), colIdx);
} else if (type.equals(LongCell.TYPE)) {
arrange.replace(createLongConverter(colIdx, colSpec), colIdx);
} else {
arrange.replace(createNumberConverter(colIdx, type, colSpec), colIdx);
}
progress++;
exec.setProgress(progress / max);
exec.checkCanceled();
}
outTable = exec.createColumnRearrangeTable(data, arrange, exec);
for (int i = 0; i < m_reasons.length; i++) {
DataCell[] row = new DataCell[m_reasons[i].length];
for (int j = 0; j < m_reasons[i].length; j++) {
row[j] = new StringCell(m_reasons[i][j]);
}
reasonsCon.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), row));
}
}
reasonsCon.close();
BufferedDataTable outReasons = reasonsCon.getTable();
return new BufferedDataTable[] { outTable, outReasons };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class BootstrapNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
// Init random object
long seed = m_configuration.getUseSeed() ? m_configuration.getSeed() : System.currentTimeMillis();
Random random = new Random(seed);
// Create containers for output tables
BufferedDataContainer bootstrap = exec.createDataContainer(getSamplesSpec(inData[0].getDataTableSpec()));
BufferedDataContainer holdout = exec.createDataContainer(inData[0].getDataTableSpec());
// Create iterator for all rows
CloseableRowIterator iterator = inData[0].iterator();
int numberOfRows = inData[0].getRowCount();
// Init unprocessed rows with amount of all rows
int unprocessedRows = numberOfRows;
// Create progress object with amount of all rows
Progress progress = new Progress(numberOfRows, exec);
// Calculate number of samples
int numberOfSamples;
if (m_configuration.getInPercent()) {
numberOfSamples = Math.round(numberOfRows * (m_configuration.getPercent() / 100));
} else {
numberOfSamples = m_configuration.getSize();
}
// Execute while until every row has been processed
while (unprocessedRows > 0) {
int chunkSize;
int numberOfChunkSamples;
// and will take care of fixing rounding issues
if (unprocessedRows > MAX_CHUNK_SIZE) {
// Set to biggest allowed size
chunkSize = MAX_CHUNK_SIZE;
// Calculate amount of samples relative to the size of this chunk
numberOfChunkSamples = Math.round((chunkSize / (float) numberOfRows) * numberOfSamples);
} else {
// Make this chunk as big as there are rows left
chunkSize = unprocessedRows;
// Generate the rest of the samples
// (this will take care of rounding errors that may occur in the relative calculation)
// we never put more than 2^31 rows in the bootstrap container, therefore it's safe to cast to int
numberOfChunkSamples = numberOfSamples - (int) bootstrap.size();
}
// Sample this chunk
sampleChunk(iterator, chunkSize, numberOfChunkSamples, bootstrap, holdout, random, progress);
// Mark chunked rows as processed
unprocessedRows -= chunkSize;
}
iterator.close();
bootstrap.close();
holdout.close();
return new BufferedDataTable[] { bootstrap.getTable(), holdout.getTable() };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class CreateTableStructureNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
BufferedDataContainer cont = exec.createDataContainer(createSpec());
cont.close();
return new BufferedDataTable[] { cont.getTable() };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class CAIMDiscretizationNodeModel method createResultTable.
/**
* Creates {@link BufferedDataTable} from a given input table and an
* appropriate {@link DiscretizationScheme}. The result table has replaced
* columns according to the {@link DiscretizationScheme}.
*
* @param exec the context from which to create the
* {@link BufferedDataTable}
* @param table the input data table
* @param discretizationModel the {@link DiscretizationModel} that contains
* the mapping from numerical intervals to nominal String values
* for the included columns
* @return the discretized input data
*/
public static BufferedDataTable createResultTable(final ExecutionContext exec, final BufferedDataTable table, final DiscretizationModel discretizationModel) {
DiscretizationScheme[] dSchemes = discretizationModel.getSchemes();
final String[] includedColumnNames = discretizationModel.getIncludedColumnNames();
// filter the schemes so that only schemes for columns are included
// which are also included in the table
dSchemes = filterNotKnownSchemes(dSchemes, includedColumnNames, table.getDataTableSpec());
DataTableSpec originalTableSpec = table.getDataTableSpec();
DataColumnSpec[] newColumnSpecs = new DataColumnSpec[originalTableSpec.getNumColumns()];
// remembers if an column index is included or not
boolean[] included = new boolean[newColumnSpecs.length];
int counter = 0;
for (DataColumnSpec originalColumnSpec : originalTableSpec) {
// if the column is included for discretizing, change the spec
if (isIncluded(originalColumnSpec, includedColumnNames) > -1) {
// creat a nominal string column spec
newColumnSpecs[counter] = new DataColumnSpecCreator(originalColumnSpec.getName(), StringCell.TYPE).createSpec();
included[counter] = true;
} else {
// add it as is
newColumnSpecs[counter] = originalColumnSpec;
included[counter] = false;
}
counter++;
}
// create the new table spec
DataTableSpec newTableSpec = new DataTableSpec(newColumnSpecs);
// create the result table
BufferedDataContainer container = exec.createDataContainer(newTableSpec);
// discretize the included column values
double rowCounter = 0;
double numRows = table.size();
for (DataRow row : table) {
if (rowCounter % 200 == 0) {
exec.setProgress(rowCounter / numRows);
}
int i = 0;
DataCell[] newCells = new DataCell[row.getNumCells()];
int includedCounter = 0;
for (DataCell cell : row) {
if (included[i]) {
// check for missing values
if (cell.isMissing()) {
newCells[i] = cell;
} else {
// transform the value to the discretized one
double value = ((DoubleValue) cell).getDoubleValue();
String discreteValue = dSchemes[includedCounter].getDiscreteValue(value);
newCells[i] = new StringCell(discreteValue);
}
includedCounter++;
} else {
newCells[i] = cell;
}
i++;
}
container.addRowToTable(new DefaultRow(row.getKey(), newCells));
rowCounter++;
}
container.close();
return container.getTable();
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class HiliteFilterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
DataTableSpec inSpec = inData[0].getDataTableSpec();
BufferedDataContainer bufIn = exec.createDataContainer(inSpec);
BufferedDataContainer bufOut = exec.createDataContainer(inSpec);
synchronized (m_inHdl) {
double rowCnt = inData[0].size();
CloseableRowIterator it = inData[0].iterator();
for (long i = 0; i < rowCnt; i++) {
DataRow row = it.next();
if (m_inHdl.isHiLit(row.getKey())) {
bufIn.addRowToTable(row);
} else {
bufOut.addRowToTable(row);
}
exec.checkCanceled();
exec.setProgress((i + 1) / rowCnt);
}
}
bufIn.close();
bufOut.close();
m_inHdl.addHiLiteListener(this);
return new BufferedDataTable[] { bufIn.getTable(), bufOut.getTable() };
}
Aggregations