Search in sources :

Example 71 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class ColumnToGrid2NodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    String groupColumn = m_configuration.getGroupColumn();
    final ExecutionMonitor mainExec;
    final BufferedDataTable inputTable;
    if (groupColumn != null) {
        exec.setMessage("Sorting input table");
        BufferedDataTable in = inData[0];
        ExecutionContext sortExec = exec.createSubExecutionContext(0.5);
        ColumnRearranger sortFilterRearranger = new ColumnRearranger(in.getDataTableSpec());
        String[] relevantCols = new String[m_included.length + 1];
        System.arraycopy(m_included, 0, relevantCols, 0, m_included.length);
        relevantCols[relevantCols.length - 1] = groupColumn;
        sortFilterRearranger.keepOnly(relevantCols);
        BufferedDataTable toBeSortedTable = exec.createColumnRearrangeTable(in, sortFilterRearranger, exec.createSubProgress(0.0));
        SortedTable sorter = new SortedTable(toBeSortedTable, Collections.singletonList(groupColumn), new boolean[] { true }, sortExec);
        inputTable = sorter.getBufferedDataTable();
        mainExec = exec.createSubProgress(0.5);
    } else {
        inputTable = inData[0];
        mainExec = exec;
    }
    exec.setMessage("Assembling output");
    DataTableSpec spec = inputTable.getDataTableSpec();
    DataTableSpec outSpec = createOutputSpec(spec);
    BufferedDataContainer cont = exec.createDataContainer(outSpec);
    int[] includeIndices = new int[m_included.length];
    for (int i = 0; i < m_included.length; i++) {
        int index = spec.findColumnIndex(m_included[i]);
        includeIndices[i] = index;
    }
    int gridCount = m_configuration.getColCount();
    final int cellCount;
    final int groupColIndex;
    if (groupColumn != null) {
        cellCount = includeIndices.length * gridCount + 1;
        groupColIndex = spec.findColumnIndex(groupColumn);
    } else {
        cellCount = includeIndices.length * gridCount;
        groupColIndex = -1;
    }
    final DataCell[] cells = new DataCell[cellCount];
    PushBackRowIterator it = new PushBackRowIterator(inputTable.iterator());
    long currentRow = 0;
    long totalRows = inputTable.size();
    long currentOutRow = 0;
    DataCell curGroupValue = null;
    while (it.hasNext()) {
        Arrays.fill(cells, DataType.getMissingCell());
        // assign group column (if enabled)
        if (groupColIndex >= 0) {
            DataRow row = it.next();
            curGroupValue = row.getCell(groupColIndex);
            cells[cells.length - 1] = curGroupValue;
            it.pushBack(row);
        }
        for (int grid = 0; grid < gridCount; grid++) {
            if (!it.hasNext()) {
                break;
            }
            DataRow inRow = it.next();
            DataCell groupValue = groupColIndex < 0 ? null : inRow.getCell(groupColIndex);
            if (ConvenienceMethods.areEqual(curGroupValue, groupValue)) {
                mainExec.setProgress(currentRow / (double) totalRows, "Processing row " + currentRow + "/" + totalRows + ": " + inRow.getKey());
                currentRow += 1;
                mainExec.checkCanceled();
                for (int i = 0; i < includeIndices.length; i++) {
                    cells[grid * includeIndices.length + i] = inRow.getCell(includeIndices[i]);
                }
            } else {
                // start new group, i.e. new row
                it.pushBack(inRow);
                break;
            }
        }
        RowKey key = RowKey.createRowKey(currentOutRow++);
        cont.addRowToTable(new DefaultRow(key, cells));
    }
    cont.close();
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) SortedTable(org.knime.base.data.sort.SortedTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 72 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class TableCreator2NodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    DataTableSpec outSpec = createSpec();
    BufferedDataContainer cont = exec.createDataContainer(outSpec, true);
    int numColProps = m_settings.getColumnProperties().size();
    ColProperty[] colProps = new ColProperty[numColProps];
    for (int i = 0; i < numColProps; i++) {
        colProps[i] = m_settings.getColumnProperties().get(i);
    }
    int cc = 0;
    int[] notSkippedMap = new int[numColProps];
    for (int i = 0; i < numColProps; i++) {
        notSkippedMap[i] = cc;
        if (!colProps[i].getSkipThisColumn()) {
            cc++;
        }
    }
    int numRows = max(m_settings.getRowIndices()) + 1;
    String rowIdPrefix = m_settings.getRowIdPrefix();
    String rowIdSuffix = m_settings.getRowIdSuffix();
    int rowIdStartWidth = m_settings.getRowIdStartValue();
    int c = 0;
    // fix for bug #2969
    Set<Integer> toRemove = new HashSet<Integer>();
    DataCellFactory cellFactory = new DataCellFactory();
    for (int i = 0; i < numRows; i++) {
        DataCell[] cells = new DataCell[outSpec.getNumColumns()];
        for (int k = 0; k < numColProps; k++) {
            // fix for bug #2969
            while (c < m_settings.getRowIndices().length && (m_settings.getRowIndices()[c] < 0 || m_settings.getColumnIndices()[c] < 0)) {
                toRemove.add(c);
                c++;
            }
            String value = "";
            if (c < m_settings.getRowIndices().length && m_settings.getRowIndices()[c] == i && m_settings.getColumnIndices()[c] == k) {
                value = m_settings.getValues()[c];
                c++;
            }
            if (colProps[k].getSkipThisColumn()) {
                continue;
            }
            String missValPattern = colProps[k].getMissingValuePattern();
            cellFactory.setMissingValuePattern(missValPattern);
            cellFactory.setFormatParameter(colProps[k].getFormatParameter().orElse(null));
            DataCell result = cellFactory.createDataCellOfType(colProps[k].getColumnSpec().getType(), value);
            if (null != result) {
                cells[notSkippedMap[k]] = result;
            } else {
                throw new InvalidSettingsException(cellFactory.getErrorMessage());
            }
        }
        StringBuilder rowId = new StringBuilder();
        rowId.append(rowIdPrefix);
        rowId.append(Integer.toString(i + rowIdStartWidth));
        rowId.append(rowIdSuffix);
        DataRow row = new DefaultRow(rowId.toString(), cells);
        cont.addRowToTable(row);
    }
    cont.close();
    BufferedDataTable out = cont.getTable();
    return new BufferedDataTable[] { out };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataCellFactory(org.knime.base.node.io.filereader.DataCellFactory) ColProperty(org.knime.base.node.io.filereader.ColProperty) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) HashSet(java.util.HashSet)

Example 73 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class XValidatePartitionModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    boolean inLoop = (m_partNumbers != null);
    if (!inLoop) {
        if (m_settings.leaveOneOut()) {
            m_nrIterations = inData[0].getRowCount();
            m_currIteration = 0;
            m_partNumbers = new short[0];
        } else {
            m_partNumbers = new short[inData[0].getRowCount()];
            final double partSize = m_partNumbers.length / (double) m_settings.validations();
            if (m_settings.stratifiedSampling()) {
                ExecutionMonitor subExec = exec.createSubProgress(0.0);
                subExec.setMessage("Preparing stratified sampling");
                Map<DataCell, List<Integer>> valueCounts = countValues(inData[0], subExec, m_settings.classColumn());
                int part = 0;
                for (Map.Entry<DataCell, List<Integer>> e : valueCounts.entrySet()) {
                    List<Integer> l = e.getValue();
                    for (Integer i : l) {
                        m_partNumbers[i] = (short) part++;
                        part %= m_settings.validations();
                    }
                }
            } else {
                for (int i = 0; i < m_partNumbers.length; i++) {
                    m_partNumbers[i] = (short) Math.min(i / partSize, m_partNumbers.length);
                }
                if (m_settings.randomSampling()) {
                    long seed = m_settings.useRandomSeed() ? m_settings.randomSeed() : System.currentTimeMillis();
                    Random rand = new Random(seed);
                    for (int i = 0; i < m_partNumbers.length; i++) {
                        int pos = rand.nextInt(m_partNumbers.length);
                        short x = m_partNumbers[pos];
                        m_partNumbers[pos] = m_partNumbers[i];
                        m_partNumbers[i] = x;
                    }
                }
            }
            m_nrIterations = m_settings.validations();
            m_currIteration = 0;
        }
    }
    BufferedDataContainer test = exec.createDataContainer(inData[0].getDataTableSpec());
    BufferedDataContainer train = exec.createDataContainer(inData[0].getDataTableSpec());
    int count = 0;
    final double max = inData[0].getRowCount();
    for (DataRow row : inData[0]) {
        exec.checkCanceled();
        exec.setProgress(count / max);
        if (m_settings.leaveOneOut() && (count == m_currIteration)) {
            test.addRowToTable(row);
        } else if (!m_settings.leaveOneOut() && (m_partNumbers[count] == m_currIteration)) {
            test.addRowToTable(row);
        } else {
            train.addRowToTable(row);
        }
        count++;
    }
    test.close();
    train.close();
    // we need to put the counts on the stack for the loop's tail to see:
    pushFlowVariableInt("currentIteration", m_currIteration);
    pushFlowVariableInt("maxIterations", m_nrIterations);
    m_currIteration++;
    return new BufferedDataTable[] { train.getTable(), test.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataRow(org.knime.core.data.DataRow) Random(java.util.Random) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 74 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class ConcatenateTableFactory method copyTablesIntoOneTable.

/**
 * Copies all tables, except the last still not-closed table, into an entire new table
 */
private void copyTablesIntoOneTable(final ExecutionContext exec) throws CanceledExecutionException {
    BufferedDataTable[] tables = new BufferedDataTable[m_tables.size() - 1];
    for (int i = 0; i < tables.length; i++) {
        tables[i] = m_tables.get(i).getTable();
    }
    AppendedRowsTable wrapper = new AppendedRowsTable(org.knime.core.data.append.AppendedRowsTable.DuplicatePolicy.Fail, null, tables);
    BufferedDataContainer con = exec.createDataContainer(wrapper.getDataTableSpec());
    RowIterator rowIt = wrapper.iterator();
    exec.setProgress("Too many tables. Copy tables into one table.");
    while (rowIt.hasNext()) {
        exec.checkCanceled();
        con.addRowToTable(rowIt.next());
    }
    con.close();
    BufferedDataContainer last = m_tables.get(m_tables.size() - 1);
    m_tables.clear();
    m_tables.add(con);
    m_tables.add(last);
    exec.setProgress("Tables copied into one.");
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) AppendedRowsTable(org.knime.core.data.append.AppendedRowsTable) RowIterator(org.knime.core.data.RowIterator) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 75 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class ConcatenateTableFactory method addTable.

/**
 * All rows of the given row input are added to a new data container. Creates a new data container if this data
 * table spec differs from the previous table. This method call checks for row keys duplicates and throws a
 * {@link DuplicateKeyException}.
 *
 * @param table the table to be added
 * @param exec the execution context to possibly create a new data container
 * @throws InterruptedException
 * @throws IOException
 * @throws DuplicateKeyException
 * @throws CanceledExecutionException
 */
void addTable(final RowInput table, final ExecutionContext exec) throws InterruptedException, DuplicateKeyException, IOException, CanceledExecutionException {
    // check if last container has been closed (i.e. createTable was called)
    if (m_tables.size() > 0) {
        if (m_tables.get(m_tables.size() - 1).isClosed()) {
            throw new IllegalStateException("No more tables can be added! ConcatenateTable has already been created.");
        }
    }
    // poll first row in order to check whether the incoming table is empty
    DataRow row = table.poll();
    if (row == null) {
        // table is empty
        if (m_ignoreEmptyTables && m_tables.size() > 0) {
            m_iterationCount++;
            return;
        } else if (m_tables.size() == 0) {
            // if this is the first table we receive and its empty, create an empty one and keep it
            m_emptyTable = exec.createDataContainer(createSpec(table.getDataTableSpec(), m_addIterationColumn, false));
            m_iterationCount++;
            return;
        }
    }
    // compare spec of the current table with the spec of the first table if changing specs are not tolerated
    if (!m_tolerateChangingSpecs && (m_tables.size() > 0 || m_emptyTable != null)) {
        if (!(m_ignoreEmptyTables && (row == null || m_emptyTable != null))) {
            // don't fail if table is empty and to be ignored
            // create spec for comparision -> set the most common column type for both table spec, if altered column types
            // are to be tolerated
            DataTableSpec tmpSpec1;
            if (m_tables.size() == 0 && m_emptyTable != null) {
                tmpSpec1 = createSpec(m_emptyTable.getTableSpec(), false, m_tolerateColumnTypes);
            } else {
                tmpSpec1 = createSpec(m_tables.get(0).getTableSpec(), false, m_tolerateColumnTypes);
            }
            DataTableSpec tmpSpec2 = createSpec(table.getDataTableSpec(), m_addIterationColumn, m_tolerateColumnTypes);
            // fail if specs has been changed
            compareSpecsAndFail(tmpSpec1, tmpSpec2);
        }
    }
    // if table is empty and they are not to be ignored, nothing else to do -> return now
    if (row == null) {
        m_iterationCount++;
        return;
    }
    // if there are too much tables -> create one new and copy the whole data
    if (m_tables.size() > MAX_NUM_TABLES) {
        copyTablesIntoOneTable(exec);
    }
    // create a new data container except the previously added has the same data table spec -> problem: if in each iteration a new row is added we
    // end up with quite many data containers
    BufferedDataContainer con;
    DataTableSpec newTableSpec = createSpec(table.getDataTableSpec(), m_addIterationColumn, false);
    if (m_tables.size() == 0) {
        con = exec.createDataContainer(newTableSpec);
        m_tables.add(con);
    } else if (m_tables.size() > 0 && !newTableSpec.equalStructure(m_tables.get(m_tables.size() - 1).getTableSpec())) {
        con = m_tables.get(m_tables.size() - 1);
        con.close();
        con = exec.createDataContainer(newTableSpec);
        m_tables.add(con);
    } else {
        con = m_tables.get(m_tables.size() - 1);
    }
    // add rows of the table to the newly created data container
    do {
        exec.checkCanceled();
        // change row key if desired
        if (m_rowKeyCreator != null) {
            // change row key
            row = new BlobSupportDataRow(m_rowKeyCreator.apply(row.getKey()), row);
        }
        m_duplicateChecker.addKey(row.getKey().toString());
        // add additional iteration column if desired
        if (m_addIterationColumn) {
            IntCell currIterCell = new IntCell(m_iterationCount);
            row = new org.knime.core.data.append.AppendedColumnRow(row, currIterCell);
        }
        con.addRowToTable(row);
    } while ((row = table.poll()) != null);
    m_iterationCount++;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BlobSupportDataRow(org.knime.core.data.container.BlobSupportDataRow) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BlobSupportDataRow(org.knime.core.data.container.BlobSupportDataRow) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell)

Aggregations

BufferedDataContainer (org.knime.core.node.BufferedDataContainer)157 BufferedDataTable (org.knime.core.node.BufferedDataTable)96 DefaultRow (org.knime.core.data.def.DefaultRow)93 DataCell (org.knime.core.data.DataCell)88 DataTableSpec (org.knime.core.data.DataTableSpec)88 DataRow (org.knime.core.data.DataRow)80 RowKey (org.knime.core.data.RowKey)38 DoubleCell (org.knime.core.data.def.DoubleCell)37 StringCell (org.knime.core.data.def.StringCell)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)24 ArrayList (java.util.ArrayList)23 DataColumnSpec (org.knime.core.data.DataColumnSpec)21 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)21 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)17 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 IOException (java.io.IOException)15 ExecutionContext (org.knime.core.node.ExecutionContext)15 LinkedHashMap (java.util.LinkedHashMap)14 HashSet (java.util.HashSet)13 IntCell (org.knime.core.data.def.IntCell)13