use of org.knime.core.data.def.DefaultRow in project knime-core by knime.
the class ColumnAutoTypeCasterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
double progress = 0;
final BufferedDataTable data = inData[0];
BufferedDataTable outTable = inData[0];
final String[] incls = m_conf.applyTo(data.getDataTableSpec()).getIncludes();
final DataType[] types = new DataType[incls.length];
final double max = incls.length + data.size();
final String[] colNames = { "Column name", "Final column type", "Row determining final column type" };
final DataType[] colTypes = new DataType[] { StringCell.TYPE, StringCell.TYPE, StringCell.TYPE };
BufferedDataContainer reasonsCon = exec.createDataContainer(new DataTableSpec(colNames, colTypes));
setReasons(new String[incls.length][3]);
if (data.size() > 0) {
// empty table check
SimpleDateFormat dateFormat = new SimpleDateFormat(m_dateFormat);
long numberOfRows = m_quickScan ? Math.min(m_numberOfRows, data.size()) : data.size();
for (DataRow row : data) {
if (!(0 < numberOfRows--)) {
data.iterator().close();
break;
}
for (int i = 0; i < incls.length; i++) {
// guess for each cell in each column the best matching datatype
DataCell c = row.getCell(data.getDataTableSpec().findColumnIndex(incls[i]));
if (!c.isMissing() && c.toString().equals(m_missValPat)) {
continue;
}
DataType newType = typeGuesser(c, dateFormat);
if (types[i] != null) {
DataType toSet = setType(types[i], newType);
if (!toSet.equals(types[i])) {
m_reasons[i][2] = row.getKey().getString();
m_reasons[i][1] = toSet.toString();
m_reasons[i][0] = incls[i];
}
types[i] = toSet;
} else {
types[i] = newType;
String r = row.getKey().toString();
r += m_quickScan ? (" based on a quickscan.") : "";
m_reasons[i][2] = r;
m_reasons[i][1] = newType.toString();
m_reasons[i][0] = incls[i];
}
exec.checkCanceled();
}
exec.checkCanceled();
progress++;
exec.setProgress(progress / max);
}
for (int i = 0; i < types.length; i++) {
// if one column only contains missingCells than set column type to StringCell
if (types[i].equals(DataType.getMissingCell().getType())) {
types[i] = StringCell.TYPE;
}
}
ColumnRearranger arrange = new ColumnRearranger(data.getDataTableSpec());
for (int i = 0; i < incls.length; i++) {
final int colIdx = data.getDataTableSpec().findColumnIndex(incls[i]);
final DataType type = types[i];
DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(incls[i], types[i]);
DataColumnSpec colSpec = colSpecCreator.createSpec();
if (type.equals(DateAndTimeCell.TYPE)) {
arrange.replace(createDateAndTimeConverter(colIdx, colSpec), colIdx);
} else if (type.equals(LongCell.TYPE)) {
arrange.replace(createLongConverter(colIdx, colSpec), colIdx);
} else {
arrange.replace(createNumberConverter(colIdx, type, colSpec), colIdx);
}
progress++;
exec.setProgress(progress / max);
exec.checkCanceled();
}
outTable = exec.createColumnRearrangeTable(data, arrange, exec);
for (int i = 0; i < m_reasons.length; i++) {
DataCell[] row = new DataCell[m_reasons[i].length];
for (int j = 0; j < m_reasons[i].length; j++) {
row[j] = new StringCell(m_reasons[i][j]);
}
reasonsCon.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), row));
}
}
reasonsCon.close();
BufferedDataTable outReasons = reasonsCon.getTable();
return new BufferedDataTable[] { outTable, outReasons };
}
use of org.knime.core.data.def.DefaultRow in project knime-core by knime.
the class ColumnAppenderNodeModel method compute.
/* combines the rows in case a new table is created */
private void compute(final CustomRowIterator rowIt1, final CustomRowIterator rowIt2, final int numColsTotal, final RowConsumer output, final ExecutionContext exec, final long numRowsTab1, final long numRowsTab2) throws InterruptedException, CanceledExecutionException {
boolean useRowKeysFromFirstTable = m_rowKeySelect.getStringValue().equals(ROW_KEY_SELECT_OPTIONS[0]);
boolean useRowKeysFromSecondTable = m_rowKeySelect.getStringValue().equals(ROW_KEY_SELECT_OPTIONS[1]);
boolean generateRowKeys = m_rowKeySelect.getStringValue().equals(ROW_KEY_SELECT_OPTIONS[2]);
long rowCount = 0;
long numRows;
if (numRowsTab1 != -1) {
numRows = useRowKeysFromFirstTable ? numRowsTab1 : (useRowKeysFromSecondTable ? numRowsTab2 : Math.max(numRowsTab1, numRowsTab2));
} else {
numRows = -1;
}
while (rowIt1.hasNext() && rowIt2.hasNext()) {
if (numRows != -1) {
exec.setProgress(rowCount / (double) numRows);
final long rowCountFinal = rowCount;
exec.setMessage(() -> "Appending columns (row " + rowCountFinal + "/" + numRows + ")");
}
exec.checkCanceled();
DataRow row1 = rowIt1.next();
DataRow row2 = rowIt2.next();
if (m_wrapTable.getBooleanValue() && !row1.getKey().equals(row2.getKey())) {
errorDifferingRowKeys(rowCount, row1.getKey(), row2.getKey());
}
ArrayList<DataCell> cells = new ArrayList<DataCell>(numColsTotal);
for (DataCell cell : row1) {
cells.add(cell);
}
for (DataCell cell : row2) {
cells.add(cell);
}
DefaultRow res;
if (useRowKeysFromFirstTable) {
res = new DefaultRow(row1.getKey(), cells);
} else if (useRowKeysFromSecondTable) {
res = new DefaultRow(row2.getKey(), cells);
} else {
res = new DefaultRow("Row" + (rowCount), cells);
}
output.consume(res);
rowCount++;
}
/* --add missing cells if row counts mismatch --*/
long extraRowsTab1 = 0;
while (((rowIt1.hasNext() && useRowKeysFromFirstTable) || (rowIt1.hasNext() && generateRowKeys)) && !rowIt2.hasNext()) {
if (numRows != -1) {
exec.setProgress((rowCount + extraRowsTab1) / (double) numRows);
final long rowCountFinal = rowCount + extraRowsTab1;
exec.setMessage(() -> "Appending columns (row " + rowCountFinal + "/" + numRows + ")");
}
exec.checkCanceled();
DataRow row = rowIt1.next();
ArrayList<DataCell> cells = new ArrayList<DataCell>(numColsTotal);
for (DataCell cell : row) {
cells.add(cell);
}
for (int i = 0; i < numColsTotal - row.getNumCells(); i++) {
cells.add(DataType.getMissingCell());
}
DefaultRow res;
if (generateRowKeys) {
res = new DefaultRow("Row" + (rowCount + extraRowsTab1), cells);
} else {
res = new DefaultRow(row.getKey(), cells);
}
output.consume(res);
extraRowsTab1++;
}
long extraRowsTab2 = 0;
while (((rowIt2.hasNext() && useRowKeysFromSecondTable) || (rowIt2.hasNext() && generateRowKeys)) && !rowIt1.hasNext()) {
if (numRows != -1) {
exec.setProgress((rowCount + extraRowsTab2) / (double) numRows);
final long rowCountFinal = rowCount + extraRowsTab2;
exec.setMessage(() -> "Appending columns (row " + rowCountFinal + "/" + numRows + ")");
}
exec.checkCanceled();
DataRow row = rowIt2.next();
ArrayList<DataCell> cells = new ArrayList<DataCell>(numColsTotal);
for (int i = 0; i < numColsTotal - row.getNumCells(); i++) {
cells.add(DataType.getMissingCell());
}
for (DataCell cell : row) {
cells.add(cell);
}
DefaultRow res;
if (generateRowKeys) {
res = new DefaultRow("Row" + (rowCount + extraRowsTab2), cells);
} else {
res = new DefaultRow(row.getKey(), cells);
}
output.consume(res);
extraRowsTab2++;
}
// set warning messages if missing values have been inserted or one table was truncated
if (useRowKeysFromFirstTable) {
if (extraRowsTab1 == 0 && rowIt2.hasNext()) {
setWarningMessage("First table is shorter than the second table! Second table has been truncated.");
} else if (extraRowsTab1 > 0) {
setWarningMessage("First table is longer than the second table! Missing values have been added to the second table.");
}
} else if (useRowKeysFromSecondTable) {
if (extraRowsTab2 == 0 && rowIt1.hasNext()) {
setWarningMessage("Second table is shorter than the first table! First table has been truncated.");
} else if (extraRowsTab2 > 0) {
setWarningMessage("Second table is longer than the first table! Missing values have been added to the first table.");
}
} else {
if (extraRowsTab1 > 0 || extraRowsTab2 > 0) {
setWarningMessage("Both tables differ in length! Missing values have been added accordingly.");
}
}
// throw error messages if the "wrap"-option is set and tables vary in size
if (m_wrapTable.getBooleanValue()) {
if (extraRowsTab1 != extraRowsTab2) {
errorDifferingTableSize(rowCount + extraRowsTab1, rowCount + extraRowsTab2);
}
}
}
use of org.knime.core.data.def.DefaultRow in project knime-core by knime.
the class LagColumnStreamableOperator method runFinal.
/**
* {@inheritDoc}
*/
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
long counter = 0;
int maxLag = m_configuration.getLagInterval() * m_configuration.getLag();
RingBuffer ringBuffer = new RingBuffer(maxLag);
RowInput input = (RowInput) inputs[0];
RowOutput output = (RowOutput) outputs[0];
int skippedFirstCount = !m_configuration.isSkipInitialIncompleteRows() ? -1 : m_configuration.getLagInterval() * m_configuration.getLag();
DataRow row;
while ((row = input.poll()) != null) {
if (counter >= skippedFirstCount) {
DataCell[] newCells = getAdditionalCells(ringBuffer);
output.push(copyWithNewCells(row, newCells));
}
DataCell toBeCached = m_columnIndex < 0 ? new StringCell(row.getKey().toString()) : row.getCell(m_columnIndex);
ringBuffer.add(toBeCached);
setProgress(exec, counter, row);
counter += 1;
}
if (!m_configuration.isSkipLastIncompleteRows()) {
DataCell[] missings = new DataCell[input.getDataTableSpec().getNumColumns()];
Arrays.fill(missings, DataType.getMissingCell());
for (int i = 0; i < maxLag; i++) {
DataRow missingRow = new DefaultRow("overflow-" + i, missings);
DataCell[] newCells = getAdditionalCells(ringBuffer);
output.push(copyWithNewCells(missingRow, newCells));
ringBuffer.add(DataType.getMissingCell());
}
}
output.close();
}
use of org.knime.core.data.def.DefaultRow in project knime-core by knime.
the class CrossJoinerNodeModel method joinRow.
/**
* Joins the two rows into one.
* @param left the first data row (put at the beginning of the new one)
* @param right the second data row (at the end of the new one)
* @param showLeft if true there will be new column containing the rowid of the left column
* @param showRight if true there will be new column containing the rowid of the left column
* @param seperator String which will be put between the two rowkeys to generate the new one.
* @return a DataRow, containing the cells of both rows and if selected the rowkeys in new columns
* @since 2.9.1
*/
private DataRow joinRow(final DataRow left, final DataRow right, final boolean showLeft, final boolean showRight, final String seperator) {
int numCols = left.getNumCells() + right.getNumCells() + (showLeft ? 1 : 0) + (showRight ? 1 : 0);
DataCell[] cells = new DataCell[numCols];
for (int i = 0; i < left.getNumCells(); i++) {
cells[i] = left.getCell(i);
}
for (int i = 0; i < right.getNumCells(); i++) {
cells[i + left.getNumCells()] = right.getCell(i);
}
if (showLeft) {
cells[left.getNumCells() + right.getNumCells()] = new StringCell(left.getKey().toString());
}
if (showRight) {
cells[left.getNumCells() + right.getNumCells() + (showLeft ? 1 : 0)] = new StringCell(right.getKey().toString());
}
String newrowkey = left.getKey().getString() + seperator + right.getKey().getString();
return new DefaultRow(newrowkey, cells);
}
use of org.knime.core.data.def.DefaultRow in project knime-core by knime.
the class CAIMDiscretizationNodeModel method createResultTable.
/**
* Creates {@link BufferedDataTable} from a given input table and an
* appropriate {@link DiscretizationScheme}. The result table has replaced
* columns according to the {@link DiscretizationScheme}.
*
* @param exec the context from which to create the
* {@link BufferedDataTable}
* @param table the input data table
* @param discretizationModel the {@link DiscretizationModel} that contains
* the mapping from numerical intervals to nominal String values
* for the included columns
* @return the discretized input data
*/
public static BufferedDataTable createResultTable(final ExecutionContext exec, final BufferedDataTable table, final DiscretizationModel discretizationModel) {
DiscretizationScheme[] dSchemes = discretizationModel.getSchemes();
final String[] includedColumnNames = discretizationModel.getIncludedColumnNames();
// filter the schemes so that only schemes for columns are included
// which are also included in the table
dSchemes = filterNotKnownSchemes(dSchemes, includedColumnNames, table.getDataTableSpec());
DataTableSpec originalTableSpec = table.getDataTableSpec();
DataColumnSpec[] newColumnSpecs = new DataColumnSpec[originalTableSpec.getNumColumns()];
// remembers if an column index is included or not
boolean[] included = new boolean[newColumnSpecs.length];
int counter = 0;
for (DataColumnSpec originalColumnSpec : originalTableSpec) {
// if the column is included for discretizing, change the spec
if (isIncluded(originalColumnSpec, includedColumnNames) > -1) {
// creat a nominal string column spec
newColumnSpecs[counter] = new DataColumnSpecCreator(originalColumnSpec.getName(), StringCell.TYPE).createSpec();
included[counter] = true;
} else {
// add it as is
newColumnSpecs[counter] = originalColumnSpec;
included[counter] = false;
}
counter++;
}
// create the new table spec
DataTableSpec newTableSpec = new DataTableSpec(newColumnSpecs);
// create the result table
BufferedDataContainer container = exec.createDataContainer(newTableSpec);
// discretize the included column values
double rowCounter = 0;
double numRows = table.size();
for (DataRow row : table) {
if (rowCounter % 200 == 0) {
exec.setProgress(rowCounter / numRows);
}
int i = 0;
DataCell[] newCells = new DataCell[row.getNumCells()];
int includedCounter = 0;
for (DataCell cell : row) {
if (included[i]) {
// check for missing values
if (cell.isMissing()) {
newCells[i] = cell;
} else {
// transform the value to the discretized one
double value = ((DoubleValue) cell).getDoubleValue();
String discreteValue = dSchemes[includedCounter].getDiscreteValue(value);
newCells[i] = new StringCell(discreteValue);
}
includedCounter++;
} else {
newCells[i] = cell;
}
i++;
}
container.addRowToTable(new DefaultRow(row.getKey(), newCells));
rowCounter++;
}
container.close();
return container.getTable();
}
Aggregations