Search in sources :

Example 56 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class ColumnAutoTypeCasterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    double progress = 0;
    final BufferedDataTable data = inData[0];
    BufferedDataTable outTable = inData[0];
    final String[] incls = m_conf.applyTo(data.getDataTableSpec()).getIncludes();
    final DataType[] types = new DataType[incls.length];
    final double max = incls.length + data.size();
    final String[] colNames = { "Column name", "Final column type", "Row determining final column type" };
    final DataType[] colTypes = new DataType[] { StringCell.TYPE, StringCell.TYPE, StringCell.TYPE };
    BufferedDataContainer reasonsCon = exec.createDataContainer(new DataTableSpec(colNames, colTypes));
    setReasons(new String[incls.length][3]);
    if (data.size() > 0) {
        // empty table check
        SimpleDateFormat dateFormat = new SimpleDateFormat(m_dateFormat);
        long numberOfRows = m_quickScan ? Math.min(m_numberOfRows, data.size()) : data.size();
        for (DataRow row : data) {
            if (!(0 < numberOfRows--)) {
                data.iterator().close();
                break;
            }
            for (int i = 0; i < incls.length; i++) {
                // guess for each cell in each column the best matching datatype
                DataCell c = row.getCell(data.getDataTableSpec().findColumnIndex(incls[i]));
                if (!c.isMissing() && c.toString().equals(m_missValPat)) {
                    continue;
                }
                DataType newType = typeGuesser(c, dateFormat);
                if (types[i] != null) {
                    DataType toSet = setType(types[i], newType);
                    if (!toSet.equals(types[i])) {
                        m_reasons[i][2] = row.getKey().getString();
                        m_reasons[i][1] = toSet.toString();
                        m_reasons[i][0] = incls[i];
                    }
                    types[i] = toSet;
                } else {
                    types[i] = newType;
                    String r = row.getKey().toString();
                    r += m_quickScan ? (" based on a quickscan.") : "";
                    m_reasons[i][2] = r;
                    m_reasons[i][1] = newType.toString();
                    m_reasons[i][0] = incls[i];
                }
                exec.checkCanceled();
            }
            exec.checkCanceled();
            progress++;
            exec.setProgress(progress / max);
        }
        for (int i = 0; i < types.length; i++) {
            // if one column only contains missingCells than set column type to StringCell
            if (types[i].equals(DataType.getMissingCell().getType())) {
                types[i] = StringCell.TYPE;
            }
        }
        ColumnRearranger arrange = new ColumnRearranger(data.getDataTableSpec());
        for (int i = 0; i < incls.length; i++) {
            final int colIdx = data.getDataTableSpec().findColumnIndex(incls[i]);
            final DataType type = types[i];
            DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(incls[i], types[i]);
            DataColumnSpec colSpec = colSpecCreator.createSpec();
            if (type.equals(DateAndTimeCell.TYPE)) {
                arrange.replace(createDateAndTimeConverter(colIdx, colSpec), colIdx);
            } else if (type.equals(LongCell.TYPE)) {
                arrange.replace(createLongConverter(colIdx, colSpec), colIdx);
            } else {
                arrange.replace(createNumberConverter(colIdx, type, colSpec), colIdx);
            }
            progress++;
            exec.setProgress(progress / max);
            exec.checkCanceled();
        }
        outTable = exec.createColumnRearrangeTable(data, arrange, exec);
        for (int i = 0; i < m_reasons.length; i++) {
            DataCell[] row = new DataCell[m_reasons[i].length];
            for (int j = 0; j < m_reasons[i].length; j++) {
                row[j] = new StringCell(m_reasons[i][j]);
            }
            reasonsCon.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), row));
        }
    }
    reasonsCon.close();
    BufferedDataTable outReasons = reasonsCon.getTable();
    return new BufferedDataTable[] { outTable, outReasons };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataRow(org.knime.core.data.DataRow) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) StringCell(org.knime.core.data.def.StringCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) SimpleDateFormat(java.text.SimpleDateFormat)

Example 57 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class BootstrapNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    // Init random object
    long seed = m_configuration.getUseSeed() ? m_configuration.getSeed() : System.currentTimeMillis();
    Random random = new Random(seed);
    // Create containers for output tables
    BufferedDataContainer bootstrap = exec.createDataContainer(getSamplesSpec(inData[0].getDataTableSpec()));
    BufferedDataContainer holdout = exec.createDataContainer(inData[0].getDataTableSpec());
    // Create iterator for all rows
    CloseableRowIterator iterator = inData[0].iterator();
    int numberOfRows = inData[0].getRowCount();
    // Init unprocessed rows with amount of all rows
    int unprocessedRows = numberOfRows;
    // Create progress object with amount of all rows
    Progress progress = new Progress(numberOfRows, exec);
    // Calculate number of samples
    int numberOfSamples;
    if (m_configuration.getInPercent()) {
        numberOfSamples = Math.round(numberOfRows * (m_configuration.getPercent() / 100));
    } else {
        numberOfSamples = m_configuration.getSize();
    }
    // Execute while until every row has been processed
    while (unprocessedRows > 0) {
        int chunkSize;
        int numberOfChunkSamples;
        // and will take care of fixing rounding issues
        if (unprocessedRows > MAX_CHUNK_SIZE) {
            // Set to biggest allowed size
            chunkSize = MAX_CHUNK_SIZE;
            // Calculate amount of samples relative to the size of this chunk
            numberOfChunkSamples = Math.round((chunkSize / (float) numberOfRows) * numberOfSamples);
        } else {
            // Make this chunk as big as there are rows left
            chunkSize = unprocessedRows;
            // Generate the rest of the samples
            // (this will take care of rounding errors that may occur in the relative calculation)
            // we never put more than 2^31 rows in the bootstrap container, therefore it's safe to cast to int
            numberOfChunkSamples = numberOfSamples - (int) bootstrap.size();
        }
        // Sample this chunk
        sampleChunk(iterator, chunkSize, numberOfChunkSamples, bootstrap, holdout, random, progress);
        // Mark chunked rows as processed
        unprocessedRows -= chunkSize;
    }
    iterator.close();
    bootstrap.close();
    holdout.close();
    return new BufferedDataTable[] { bootstrap.getTable(), holdout.getTable() };
}
Also used : Random(java.util.Random) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator)

Example 58 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class CreateTableStructureNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataContainer cont = exec.createDataContainer(createSpec());
    cont.close();
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 59 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class CAIMDiscretizationNodeModel method createResultTable.

/**
 * Creates {@link BufferedDataTable} from a given input table and an
 * appropriate {@link DiscretizationScheme}. The result table has replaced
 * columns according to the {@link DiscretizationScheme}.
 *
 * @param exec the context from which to create the
 *            {@link BufferedDataTable}
 * @param table the input data table
 * @param discretizationModel the {@link DiscretizationModel} that contains
 *            the mapping from numerical intervals to nominal String values
 *            for the included columns
 * @return the discretized input data
 */
public static BufferedDataTable createResultTable(final ExecutionContext exec, final BufferedDataTable table, final DiscretizationModel discretizationModel) {
    DiscretizationScheme[] dSchemes = discretizationModel.getSchemes();
    final String[] includedColumnNames = discretizationModel.getIncludedColumnNames();
    // filter the schemes so that only schemes for columns are included
    // which are also included in the table
    dSchemes = filterNotKnownSchemes(dSchemes, includedColumnNames, table.getDataTableSpec());
    DataTableSpec originalTableSpec = table.getDataTableSpec();
    DataColumnSpec[] newColumnSpecs = new DataColumnSpec[originalTableSpec.getNumColumns()];
    // remembers if an column index is included or not
    boolean[] included = new boolean[newColumnSpecs.length];
    int counter = 0;
    for (DataColumnSpec originalColumnSpec : originalTableSpec) {
        // if the column is included for discretizing, change the spec
        if (isIncluded(originalColumnSpec, includedColumnNames) > -1) {
            // creat a nominal string column spec
            newColumnSpecs[counter] = new DataColumnSpecCreator(originalColumnSpec.getName(), StringCell.TYPE).createSpec();
            included[counter] = true;
        } else {
            // add it as is
            newColumnSpecs[counter] = originalColumnSpec;
            included[counter] = false;
        }
        counter++;
    }
    // create the new table spec
    DataTableSpec newTableSpec = new DataTableSpec(newColumnSpecs);
    // create the result table
    BufferedDataContainer container = exec.createDataContainer(newTableSpec);
    // discretize the included column values
    double rowCounter = 0;
    double numRows = table.size();
    for (DataRow row : table) {
        if (rowCounter % 200 == 0) {
            exec.setProgress(rowCounter / numRows);
        }
        int i = 0;
        DataCell[] newCells = new DataCell[row.getNumCells()];
        int includedCounter = 0;
        for (DataCell cell : row) {
            if (included[i]) {
                // check for missing values
                if (cell.isMissing()) {
                    newCells[i] = cell;
                } else {
                    // transform the value to the discretized one
                    double value = ((DoubleValue) cell).getDoubleValue();
                    String discreteValue = dSchemes[includedCounter].getDiscreteValue(value);
                    newCells[i] = new StringCell(discreteValue);
                }
                includedCounter++;
            } else {
                newCells[i] = cell;
            }
            i++;
        }
        container.addRowToTable(new DefaultRow(row.getKey(), newCells));
        rowCounter++;
    }
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DiscretizationScheme(org.knime.base.node.preproc.discretization.caim2.DiscretizationScheme) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 60 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class HiliteFilterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    DataTableSpec inSpec = inData[0].getDataTableSpec();
    BufferedDataContainer bufIn = exec.createDataContainer(inSpec);
    BufferedDataContainer bufOut = exec.createDataContainer(inSpec);
    synchronized (m_inHdl) {
        double rowCnt = inData[0].size();
        CloseableRowIterator it = inData[0].iterator();
        for (long i = 0; i < rowCnt; i++) {
            DataRow row = it.next();
            if (m_inHdl.isHiLit(row.getKey())) {
                bufIn.addRowToTable(row);
            } else {
                bufOut.addRowToTable(row);
            }
            exec.checkCanceled();
            exec.setProgress((i + 1) / rowCnt);
        }
    }
    bufIn.close();
    bufOut.close();
    m_inHdl.addHiLiteListener(this);
    return new BufferedDataTable[] { bufIn.getTable(), bufOut.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) DataRow(org.knime.core.data.DataRow)

Aggregations

BufferedDataContainer (org.knime.core.node.BufferedDataContainer)157 BufferedDataTable (org.knime.core.node.BufferedDataTable)96 DefaultRow (org.knime.core.data.def.DefaultRow)93 DataCell (org.knime.core.data.DataCell)88 DataTableSpec (org.knime.core.data.DataTableSpec)88 DataRow (org.knime.core.data.DataRow)80 RowKey (org.knime.core.data.RowKey)38 DoubleCell (org.knime.core.data.def.DoubleCell)37 StringCell (org.knime.core.data.def.StringCell)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)24 ArrayList (java.util.ArrayList)23 DataColumnSpec (org.knime.core.data.DataColumnSpec)21 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)21 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)17 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 IOException (java.io.IOException)15 ExecutionContext (org.knime.core.node.ExecutionContext)15 LinkedHashMap (java.util.LinkedHashMap)14 HashSet (java.util.HashSet)13 IntCell (org.knime.core.data.def.IntCell)13