Search in sources :

Example 71 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class ColumnAutoTypeCasterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    double progress = 0;
    final BufferedDataTable data = inData[0];
    BufferedDataTable outTable = inData[0];
    final String[] incls = m_conf.applyTo(data.getDataTableSpec()).getIncludes();
    final DataType[] types = new DataType[incls.length];
    final double max = incls.length + data.size();
    final String[] colNames = { "Column name", "Final column type", "Row determining final column type" };
    final DataType[] colTypes = new DataType[] { StringCell.TYPE, StringCell.TYPE, StringCell.TYPE };
    BufferedDataContainer reasonsCon = exec.createDataContainer(new DataTableSpec(colNames, colTypes));
    setReasons(new String[incls.length][3]);
    if (data.size() > 0) {
        // empty table check
        SimpleDateFormat dateFormat = new SimpleDateFormat(m_dateFormat);
        long numberOfRows = m_quickScan ? Math.min(m_numberOfRows, data.size()) : data.size();
        for (DataRow row : data) {
            if (!(0 < numberOfRows--)) {
                data.iterator().close();
                break;
            }
            for (int i = 0; i < incls.length; i++) {
                // guess for each cell in each column the best matching datatype
                DataCell c = row.getCell(data.getDataTableSpec().findColumnIndex(incls[i]));
                if (!c.isMissing() && c.toString().equals(m_missValPat)) {
                    continue;
                }
                DataType newType = typeGuesser(c, dateFormat);
                if (types[i] != null) {
                    DataType toSet = setType(types[i], newType);
                    if (!toSet.equals(types[i])) {
                        m_reasons[i][2] = row.getKey().getString();
                        m_reasons[i][1] = toSet.toString();
                        m_reasons[i][0] = incls[i];
                    }
                    types[i] = toSet;
                } else {
                    types[i] = newType;
                    String r = row.getKey().toString();
                    r += m_quickScan ? (" based on a quickscan.") : "";
                    m_reasons[i][2] = r;
                    m_reasons[i][1] = newType.toString();
                    m_reasons[i][0] = incls[i];
                }
                exec.checkCanceled();
            }
            exec.checkCanceled();
            progress++;
            exec.setProgress(progress / max);
        }
        for (int i = 0; i < types.length; i++) {
            // if one column only contains missingCells than set column type to StringCell
            if (types[i].equals(DataType.getMissingCell().getType())) {
                types[i] = StringCell.TYPE;
            }
        }
        ColumnRearranger arrange = new ColumnRearranger(data.getDataTableSpec());
        for (int i = 0; i < incls.length; i++) {
            final int colIdx = data.getDataTableSpec().findColumnIndex(incls[i]);
            final DataType type = types[i];
            DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(incls[i], types[i]);
            DataColumnSpec colSpec = colSpecCreator.createSpec();
            if (type.equals(DateAndTimeCell.TYPE)) {
                arrange.replace(createDateAndTimeConverter(colIdx, colSpec), colIdx);
            } else if (type.equals(LongCell.TYPE)) {
                arrange.replace(createLongConverter(colIdx, colSpec), colIdx);
            } else {
                arrange.replace(createNumberConverter(colIdx, type, colSpec), colIdx);
            }
            progress++;
            exec.setProgress(progress / max);
            exec.checkCanceled();
        }
        outTable = exec.createColumnRearrangeTable(data, arrange, exec);
        for (int i = 0; i < m_reasons.length; i++) {
            DataCell[] row = new DataCell[m_reasons[i].length];
            for (int j = 0; j < m_reasons[i].length; j++) {
                row[j] = new StringCell(m_reasons[i][j]);
            }
            reasonsCon.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), row));
        }
    }
    reasonsCon.close();
    BufferedDataTable outReasons = reasonsCon.getTable();
    return new BufferedDataTable[] { outTable, outReasons };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataRow(org.knime.core.data.DataRow) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) StringCell(org.knime.core.data.def.StringCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) SimpleDateFormat(java.text.SimpleDateFormat)

Example 72 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class ColumnAppenderNodeModel method compute.

/* combines the rows in case a new table is created */
private void compute(final CustomRowIterator rowIt1, final CustomRowIterator rowIt2, final int numColsTotal, final RowConsumer output, final ExecutionContext exec, final long numRowsTab1, final long numRowsTab2) throws InterruptedException, CanceledExecutionException {
    boolean useRowKeysFromFirstTable = m_rowKeySelect.getStringValue().equals(ROW_KEY_SELECT_OPTIONS[0]);
    boolean useRowKeysFromSecondTable = m_rowKeySelect.getStringValue().equals(ROW_KEY_SELECT_OPTIONS[1]);
    boolean generateRowKeys = m_rowKeySelect.getStringValue().equals(ROW_KEY_SELECT_OPTIONS[2]);
    long rowCount = 0;
    long numRows;
    if (numRowsTab1 != -1) {
        numRows = useRowKeysFromFirstTable ? numRowsTab1 : (useRowKeysFromSecondTable ? numRowsTab2 : Math.max(numRowsTab1, numRowsTab2));
    } else {
        numRows = -1;
    }
    while (rowIt1.hasNext() && rowIt2.hasNext()) {
        if (numRows != -1) {
            exec.setProgress(rowCount / (double) numRows);
            final long rowCountFinal = rowCount;
            exec.setMessage(() -> "Appending columns (row " + rowCountFinal + "/" + numRows + ")");
        }
        exec.checkCanceled();
        DataRow row1 = rowIt1.next();
        DataRow row2 = rowIt2.next();
        if (m_wrapTable.getBooleanValue() && !row1.getKey().equals(row2.getKey())) {
            errorDifferingRowKeys(rowCount, row1.getKey(), row2.getKey());
        }
        ArrayList<DataCell> cells = new ArrayList<DataCell>(numColsTotal);
        for (DataCell cell : row1) {
            cells.add(cell);
        }
        for (DataCell cell : row2) {
            cells.add(cell);
        }
        DefaultRow res;
        if (useRowKeysFromFirstTable) {
            res = new DefaultRow(row1.getKey(), cells);
        } else if (useRowKeysFromSecondTable) {
            res = new DefaultRow(row2.getKey(), cells);
        } else {
            res = new DefaultRow("Row" + (rowCount), cells);
        }
        output.consume(res);
        rowCount++;
    }
    /* --add missing cells if row counts mismatch --*/
    long extraRowsTab1 = 0;
    while (((rowIt1.hasNext() && useRowKeysFromFirstTable) || (rowIt1.hasNext() && generateRowKeys)) && !rowIt2.hasNext()) {
        if (numRows != -1) {
            exec.setProgress((rowCount + extraRowsTab1) / (double) numRows);
            final long rowCountFinal = rowCount + extraRowsTab1;
            exec.setMessage(() -> "Appending columns (row " + rowCountFinal + "/" + numRows + ")");
        }
        exec.checkCanceled();
        DataRow row = rowIt1.next();
        ArrayList<DataCell> cells = new ArrayList<DataCell>(numColsTotal);
        for (DataCell cell : row) {
            cells.add(cell);
        }
        for (int i = 0; i < numColsTotal - row.getNumCells(); i++) {
            cells.add(DataType.getMissingCell());
        }
        DefaultRow res;
        if (generateRowKeys) {
            res = new DefaultRow("Row" + (rowCount + extraRowsTab1), cells);
        } else {
            res = new DefaultRow(row.getKey(), cells);
        }
        output.consume(res);
        extraRowsTab1++;
    }
    long extraRowsTab2 = 0;
    while (((rowIt2.hasNext() && useRowKeysFromSecondTable) || (rowIt2.hasNext() && generateRowKeys)) && !rowIt1.hasNext()) {
        if (numRows != -1) {
            exec.setProgress((rowCount + extraRowsTab2) / (double) numRows);
            final long rowCountFinal = rowCount + extraRowsTab2;
            exec.setMessage(() -> "Appending columns (row " + rowCountFinal + "/" + numRows + ")");
        }
        exec.checkCanceled();
        DataRow row = rowIt2.next();
        ArrayList<DataCell> cells = new ArrayList<DataCell>(numColsTotal);
        for (int i = 0; i < numColsTotal - row.getNumCells(); i++) {
            cells.add(DataType.getMissingCell());
        }
        for (DataCell cell : row) {
            cells.add(cell);
        }
        DefaultRow res;
        if (generateRowKeys) {
            res = new DefaultRow("Row" + (rowCount + extraRowsTab2), cells);
        } else {
            res = new DefaultRow(row.getKey(), cells);
        }
        output.consume(res);
        extraRowsTab2++;
    }
    // set warning messages if missing values have been inserted or one table was truncated
    if (useRowKeysFromFirstTable) {
        if (extraRowsTab1 == 0 && rowIt2.hasNext()) {
            setWarningMessage("First table is shorter than the second table! Second table has been truncated.");
        } else if (extraRowsTab1 > 0) {
            setWarningMessage("First table is longer than the second table! Missing values have been added to the second table.");
        }
    } else if (useRowKeysFromSecondTable) {
        if (extraRowsTab2 == 0 && rowIt1.hasNext()) {
            setWarningMessage("Second table is shorter than the first table! First table has been truncated.");
        } else if (extraRowsTab2 > 0) {
            setWarningMessage("Second table is longer than the first table! Missing values have been added to the first table.");
        }
    } else {
        if (extraRowsTab1 > 0 || extraRowsTab2 > 0) {
            setWarningMessage("Both tables differ in length! Missing values have been added accordingly.");
        }
    }
    // throw error messages if the "wrap"-option is set and tables vary in size
    if (m_wrapTable.getBooleanValue()) {
        if (extraRowsTab1 != extraRowsTab2) {
            errorDifferingTableSize(rowCount + extraRowsTab1, rowCount + extraRowsTab2);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) DataRow(org.knime.core.data.DataRow)

Example 73 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class LagColumnStreamableOperator method runFinal.

/**
 * {@inheritDoc}
 */
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
    long counter = 0;
    int maxLag = m_configuration.getLagInterval() * m_configuration.getLag();
    RingBuffer ringBuffer = new RingBuffer(maxLag);
    RowInput input = (RowInput) inputs[0];
    RowOutput output = (RowOutput) outputs[0];
    int skippedFirstCount = !m_configuration.isSkipInitialIncompleteRows() ? -1 : m_configuration.getLagInterval() * m_configuration.getLag();
    DataRow row;
    while ((row = input.poll()) != null) {
        if (counter >= skippedFirstCount) {
            DataCell[] newCells = getAdditionalCells(ringBuffer);
            output.push(copyWithNewCells(row, newCells));
        }
        DataCell toBeCached = m_columnIndex < 0 ? new StringCell(row.getKey().toString()) : row.getCell(m_columnIndex);
        ringBuffer.add(toBeCached);
        setProgress(exec, counter, row);
        counter += 1;
    }
    if (!m_configuration.isSkipLastIncompleteRows()) {
        DataCell[] missings = new DataCell[input.getDataTableSpec().getNumColumns()];
        Arrays.fill(missings, DataType.getMissingCell());
        for (int i = 0; i < maxLag; i++) {
            DataRow missingRow = new DefaultRow("overflow-" + i, missings);
            DataCell[] newCells = getAdditionalCells(ringBuffer);
            output.push(copyWithNewCells(missingRow, newCells));
            ringBuffer.add(DataType.getMissingCell());
        }
    }
    output.close();
}
Also used : RowOutput(org.knime.core.node.streamable.RowOutput) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) RowInput(org.knime.core.node.streamable.RowInput) DataTableRowInput(org.knime.core.node.streamable.DataTableRowInput) BlobSupportDataRow(org.knime.core.data.container.BlobSupportDataRow) DataRow(org.knime.core.data.DataRow)

Example 74 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class CrossJoinerNodeModel method joinRow.

/**
 * Joins the two rows into one.
 * @param left the first data row (put at the beginning of the new one)
 * @param right the second data row (at the end of the new one)
 * @param showLeft if true there will be new column containing the rowid of the left column
 * @param showRight if true there will be new column containing the rowid of the left column
 * @param seperator String which will be put between the two rowkeys to generate the new one.
 * @return a DataRow, containing the cells of both rows and if selected the rowkeys in new columns
 * @since 2.9.1
 */
private DataRow joinRow(final DataRow left, final DataRow right, final boolean showLeft, final boolean showRight, final String seperator) {
    int numCols = left.getNumCells() + right.getNumCells() + (showLeft ? 1 : 0) + (showRight ? 1 : 0);
    DataCell[] cells = new DataCell[numCols];
    for (int i = 0; i < left.getNumCells(); i++) {
        cells[i] = left.getCell(i);
    }
    for (int i = 0; i < right.getNumCells(); i++) {
        cells[i + left.getNumCells()] = right.getCell(i);
    }
    if (showLeft) {
        cells[left.getNumCells() + right.getNumCells()] = new StringCell(left.getKey().toString());
    }
    if (showRight) {
        cells[left.getNumCells() + right.getNumCells() + (showLeft ? 1 : 0)] = new StringCell(right.getKey().toString());
    }
    String newrowkey = left.getKey().getString() + seperator + right.getKey().getString();
    return new DefaultRow(newrowkey, cells);
}
Also used : StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 75 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class CAIMDiscretizationNodeModel method createResultTable.

/**
 * Creates {@link BufferedDataTable} from a given input table and an
 * appropriate {@link DiscretizationScheme}. The result table has replaced
 * columns according to the {@link DiscretizationScheme}.
 *
 * @param exec the context from which to create the
 *            {@link BufferedDataTable}
 * @param table the input data table
 * @param discretizationModel the {@link DiscretizationModel} that contains
 *            the mapping from numerical intervals to nominal String values
 *            for the included columns
 * @return the discretized input data
 */
public static BufferedDataTable createResultTable(final ExecutionContext exec, final BufferedDataTable table, final DiscretizationModel discretizationModel) {
    DiscretizationScheme[] dSchemes = discretizationModel.getSchemes();
    final String[] includedColumnNames = discretizationModel.getIncludedColumnNames();
    // filter the schemes so that only schemes for columns are included
    // which are also included in the table
    dSchemes = filterNotKnownSchemes(dSchemes, includedColumnNames, table.getDataTableSpec());
    DataTableSpec originalTableSpec = table.getDataTableSpec();
    DataColumnSpec[] newColumnSpecs = new DataColumnSpec[originalTableSpec.getNumColumns()];
    // remembers if an column index is included or not
    boolean[] included = new boolean[newColumnSpecs.length];
    int counter = 0;
    for (DataColumnSpec originalColumnSpec : originalTableSpec) {
        // if the column is included for discretizing, change the spec
        if (isIncluded(originalColumnSpec, includedColumnNames) > -1) {
            // creat a nominal string column spec
            newColumnSpecs[counter] = new DataColumnSpecCreator(originalColumnSpec.getName(), StringCell.TYPE).createSpec();
            included[counter] = true;
        } else {
            // add it as is
            newColumnSpecs[counter] = originalColumnSpec;
            included[counter] = false;
        }
        counter++;
    }
    // create the new table spec
    DataTableSpec newTableSpec = new DataTableSpec(newColumnSpecs);
    // create the result table
    BufferedDataContainer container = exec.createDataContainer(newTableSpec);
    // discretize the included column values
    double rowCounter = 0;
    double numRows = table.size();
    for (DataRow row : table) {
        if (rowCounter % 200 == 0) {
            exec.setProgress(rowCounter / numRows);
        }
        int i = 0;
        DataCell[] newCells = new DataCell[row.getNumCells()];
        int includedCounter = 0;
        for (DataCell cell : row) {
            if (included[i]) {
                // check for missing values
                if (cell.isMissing()) {
                    newCells[i] = cell;
                } else {
                    // transform the value to the discretized one
                    double value = ((DoubleValue) cell).getDoubleValue();
                    String discreteValue = dSchemes[includedCounter].getDiscreteValue(value);
                    newCells[i] = new StringCell(discreteValue);
                }
                includedCounter++;
            } else {
                newCells[i] = cell;
            }
            i++;
        }
        container.addRowToTable(new DefaultRow(row.getKey(), newCells));
        rowCounter++;
    }
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DiscretizationScheme(org.knime.base.node.preproc.discretization.caim2.DiscretizationScheme) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

DefaultRow (org.knime.core.data.def.DefaultRow)207 DataCell (org.knime.core.data.DataCell)165 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)94 DataTableSpec (org.knime.core.data.DataTableSpec)92 DataRow (org.knime.core.data.DataRow)88 RowKey (org.knime.core.data.RowKey)80 DoubleCell (org.knime.core.data.def.DoubleCell)66 StringCell (org.knime.core.data.def.StringCell)65 BufferedDataTable (org.knime.core.node.BufferedDataTable)56 IntCell (org.knime.core.data.def.IntCell)46 ArrayList (java.util.ArrayList)26 DataType (org.knime.core.data.DataType)26 DataColumnSpec (org.knime.core.data.DataColumnSpec)22 DataContainer (org.knime.core.data.container.DataContainer)21 HashSet (java.util.HashSet)18 LinkedHashMap (java.util.LinkedHashMap)17 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 LinkedHashSet (java.util.LinkedHashSet)14 DoubleValue (org.knime.core.data.DoubleValue)14 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)14