Search in sources :

Example 56 with PortObject

use of org.knime.core.node.port.PortObject in project knime-core by knime.

the class DataValidatorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    DataTableSpec in = ((BufferedDataTable) inData[0]).getDataTableSpec();
    checkSettingNotNull(m_config, "Configuration is missing.");
    DataValidatorColConflicts conflicts = new DataValidatorColConflicts();
    ColumnRearranger columnRearranger = createRearranger(in, conflicts);
    if (!conflicts.isEmpty()) {
        switch(m_config.getFailingBehavior()) {
            case FAIL_NODE:
                throw new InvalidSettingsException("Validation failed:\n" + conflicts);
            default:
        }
    }
    BufferedDataTable returnTable = exec.createColumnRearrangeTable((BufferedDataTable) inData[0], columnRearranger, exec.createSubExecutionContext(0.9));
    if (!conflicts.isEmpty()) {
        switch(m_config.getFailingBehavior()) {
            case OUTPUT_TO_PORT_CHECK_DATA:
                return new PortObject[] { InactiveBranchPortObject.INSTANCE, createConflictsTable(conflicts, exec.createSubExecutionContext(0.1)) };
            case FAIL_NODE:
                throw new InvalidSettingsException("Validation failed:\n" + conflicts);
        }
    }
    return new PortObject[] { returnTable, InactiveBranchPortObject.INSTANCE };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable) InactiveBranchPortObject(org.knime.core.node.port.inactive.InactiveBranchPortObject) PortObject(org.knime.core.node.port.PortObject)

Example 57 with PortObject

use of org.knime.core.node.port.PortObject in project knime-core by knime.

the class DiscretizationApplyNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    DiscretizationModel discrModel = (DiscretizationModel) inData[MODEL_INPORT];
    // if an empty model was received, just return the input data
    if (discrModel.getSchemes().length == 0) {
        return new PortObject[] { inData[DATA_INPORT] };
    }
    // create an output table that replaces the included columns by
    // interval values from the model
    BufferedDataTable resultTable = CAIMDiscretizationNodeModel.createResultTable(exec, (BufferedDataTable) inData[DATA_INPORT], discrModel);
    return new BufferedDataTable[] { resultTable };
}
Also used : DiscretizationModel(org.knime.base.node.preproc.discretization.caim2.DiscretizationModel) BufferedDataTable(org.knime.core.node.BufferedDataTable) PortObject(org.knime.core.node.port.PortObject)

Example 58 with PortObject

use of org.knime.core.node.port.PortObject in project knime-core by knime.

the class CAIMDiscretizationNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    // measure the time
    long startTime = System.currentTimeMillis();
    // empty model
    if (m_includedColumnNames.getIncludeList() == null || m_includedColumnNames.getIncludeList().size() == 0) {
        return new PortObject[] { inData[0], new DiscretizationModel() };
    }
    LOGGER.debug("Start discretizing.");
    // as the algorithm is for binary class problems only
    // (positive, negative) the algorithm is performed for each class value
    // labeled as positive class and the rest as negative
    exec.setProgress(0.0, "Preparing...");
    // check input data
    BufferedDataTable data = (BufferedDataTable) inData[0];
    // get class column index
    m_classifyColumnIndex = data.getDataTableSpec().findColumnIndex(m_classColumnName.getStringValue());
    assert m_classifyColumnIndex > -1;
    // create the class - index mapping
    createClassFromToIndexMaps(data.getDataTableSpec());
    // create the array with the result discretization schemes for
    // each included column
    DiscretizationScheme[] resultSchemes = new DiscretizationScheme[m_includedColumnNames.getIncludeList().size()];
    // for all included columns do the discretization
    int currentColumn = 0;
    for (String includedColumnName : m_includedColumnNames.getIncludeList()) {
        LOGGER.debug("Process column: " + includedColumnName);
        exec.setProgress("Discretizing column '" + includedColumnName + "'");
        ExecutionContext subExecPerColumn = exec.createSubExecutionContext(1.0D / m_includedColumnNames.getIncludeList().size());
        subExecPerColumn.checkCanceled();
        // never discretize the column index (should never happen)
        if (m_classColumnName.getStringValue().equals(includedColumnName)) {
            continue;
        }
        // determine the column index of the current column
        int columnIndex = data.getDataTableSpec().findColumnIndex(includedColumnName);
        DataColumnDomain domain = data.getDataTableSpec().getColumnSpec(columnIndex).getDomain();
        double minValue = ((DoubleValue) domain.getLowerBound()).getDoubleValue();
        double maxValue = ((DoubleValue) domain.getUpperBound()).getDoubleValue();
        // find all distinct values of the column and create
        // a table with all possible interval boundaries (midpoint value of
        // adjacent values)
        subExecPerColumn.setProgress("Find possible boundaries.");
        BoundaryScheme boundaryScheme = null;
        // create subExec for sorting
        ExecutionContext subExecSort = subExecPerColumn.createSubExecutionContext(0.1);
        // long t1 = System.currentTimeMillis();
        if (m_classOptimizedVersion) {
            boundaryScheme = createAllIntervalBoundaries(data, columnIndex, subExecSort);
        } else {
            boundaryScheme = createAllIntervalBoundaries2(data, columnIndex, subExecSort);
        }
        subExecSort.setProgress(1.0D);
        // long t2 = System.currentTimeMillis() - t1;
        // LOGGER.error("Create boundaries time: " + (t2 / 1000.0)
        // + " optimized: " + m_classOptimizedVersion);
        // LOGGER.error("Boundaries: " + boundaryScheme.getHead());
        LinkedDouble allIntervalBoundaries = boundaryScheme.getHead();
        // create the initial discretization scheme
        DiscretizationScheme discretizationScheme = new DiscretizationScheme(new Interval(minValue, maxValue, true, true));
        double globalCAIM = 0;
        // performe the iterative search for the best intervals
        int numInsertedBounds = 0;
        double currentCAIM = 0;
        // create subExec for inserted bounds
        ExecutionContext subExecBounds = subExecPerColumn.createSubExecutionContext(0.9);
        while (currentCAIM > globalCAIM || numInsertedBounds < m_classValues.length - 1) {
            subExecPerColumn.checkCanceled();
            // create subExec for counting
            ExecutionContext subExecCount = subExecBounds.createSubExecutionContext(1.0D / m_classValues.length);
            // LOGGER.debug("Inserted bounds: " + numInsertedBounds);
            // LOGGER.debug("intervall boundaries: " +
            // allIntervalBoundaries);
            // for all possible interval boundaries
            // insert each one, calculate the caim value and add
            // the one with the biggest caim
            LinkedDouble intervalBoundary = allIntervalBoundaries.m_next;
            currentCAIM = 0;
            LinkedDouble bestBoundary = null;
            long currentCountedBoundaries = 0;
            while (intervalBoundary != null) {
                subExecPerColumn.checkCanceled();
                // set progress
                currentCountedBoundaries++;
                subExecCount.setProgress((double) currentCountedBoundaries / (double) boundaryScheme.getNumBoundaries(), "Count for possible boundary " + currentCountedBoundaries + " of " + boundaryScheme.getNumBoundaries());
                // LOGGER.debug("current caim: " + currentCAIM);
                DiscretizationScheme tentativeDS = new DiscretizationScheme(discretizationScheme);
                tentativeDS.insertBound(intervalBoundary.m_value);
                // create the quanta matrix
                QuantaMatrix2D quantaMatrix = new QuantaMatrix2D(tentativeDS, m_classValueToIndexMap);
                // pass the data for filling the matrix
                quantaMatrix.countData(data, columnIndex, m_classifyColumnIndex);
                // calculate the caim
                double caim = quantaMatrix.calculateCaim();
                if (caim > currentCAIM) {
                    currentCAIM = caim;
                    bestBoundary = intervalBoundary;
                }
                intervalBoundary = intervalBoundary.m_next;
            }
            // if there is no best boundary, break the first while loop
            if (bestBoundary == null) {
                break;
            }
            // in this case accept the best discretization scheme
            if (currentCAIM > globalCAIM || numInsertedBounds < m_classValues.length) {
                int numIntervals = discretizationScheme.getNumIntervals();
                discretizationScheme.insertBound(bestBoundary.m_value);
                // remove the linked list element from the list
                bestBoundary.remove();
                globalCAIM = currentCAIM;
                if (numIntervals < discretizationScheme.getNumIntervals()) {
                    numInsertedBounds++;
                    subExecPerColumn.setProgress("Inserted bound " + numInsertedBounds);
                // LOGGER.debug("Inserted boundary: "
                // + bestBoundary.m_value);
                } else {
                    throw new IllegalStateException("Only usefull bounds should be inserted: " + bestBoundary.m_value);
                }
            }
            subExecCount.setProgress(1.0D);
        }
        resultSchemes[currentColumn] = discretizationScheme;
        subExecBounds.setProgress(1.0D);
        // ensure the full progress is set for this iteration
        subExecPerColumn.setProgress(1.0D);
        currentColumn++;
    }
    // set the model
    DataTableSpec modelSpec = createModelSpec(m_includedColumnNames, data.getDataTableSpec());
    m_discretizationModel = new DiscretizationModel(resultSchemes, modelSpec);
    // create an output table that replaces the included columns by
    // interval values
    BufferedDataTable resultTable = createResultTable(exec, data, m_discretizationModel);
    // log the runtime of the execute method
    long runtime = System.currentTimeMillis() - startTime;
    LOGGER.debug("Binning runtime: " + (runtime / 1000.0) + " sec.");
    return new PortObject[] { resultTable, m_discretizationModel };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DiscretizationScheme(org.knime.base.node.preproc.discretization.caim2.DiscretizationScheme) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) ExecutionContext(org.knime.core.node.ExecutionContext) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) DiscretizationModel(org.knime.base.node.preproc.discretization.caim2.DiscretizationModel) BufferedDataTable(org.knime.core.node.BufferedDataTable) PortObject(org.knime.core.node.port.PortObject) Interval(org.knime.base.node.preproc.discretization.caim2.Interval)

Example 59 with PortObject

use of org.knime.core.node.port.PortObject in project knime-core by knime.

the class InteractiveHiLiteCollectorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    m_data = inData[0];
    if (m_annotationMap.isEmpty()) {
        return new PortObject[] { m_data };
    }
    DataTableSpec inSpec = (DataTableSpec) m_data.getSpec();
    final DataColumnSpec[] cspecs = createSpecs(inSpec);
    ColumnRearranger cr = new ColumnRearranger(inSpec);
    cr.append(new CellFactory() {

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell[] getCells(final DataRow row) {
            if (m_annotationMap.isEmpty()) {
                return new DataCell[0];
            }
            DataCell[] cells = new DataCell[m_lastIndex + 1];
            for (int i = 0; i < cells.length; i++) {
                Map<Integer, String> map = m_annotationMap.get(row.getKey());
                if (map == null) {
                    cells[i] = DataType.getMissingCell();
                } else {
                    String str = map.get(i);
                    if (str == null) {
                        cells[i] = DataType.getMissingCell();
                    } else {
                        cells[i] = new StringCell(str);
                    }
                }
            }
            return cells;
        }

        @Override
        public DataColumnSpec[] getColumnSpecs() {
            return cspecs;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor em) {
            em.setProgress((double) curRowNr / rowCount);
        }
    });
    return new BufferedDataTable[] { exec.createColumnRearrangeTable((BufferedDataTable) m_data, cr, exec) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) StringCell(org.knime.core.data.def.StringCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) PortObject(org.knime.core.node.port.PortObject) CellFactory(org.knime.core.data.container.CellFactory) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 60 with PortObject

use of org.knime.core.node.port.PortObject in project knime-core by knime.

the class EndcaseNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    Vector<BufferedDataTable> tables = new Vector<BufferedDataTable>();
    for (int i = 0; i < getNrInPorts(); i++) {
        if (inData[i] != null) {
            // if connected...
            if (!(inData[i] instanceof InactiveBranchPortObject)) {
                // ...and active, add it:
                tables.add((BufferedDataTable) inData[i]);
            }
        }
    }
    if (tables.size() == 0) {
        // be connected!)
        assert inData[0] instanceof InactiveBranchPortObject;
        if (m_enableHiliting) {
            // create empty hilite translation map (so we correctly
            // handle the internals).
            Map<RowKey, Set<RowKey>> map = new HashMap<RowKey, Set<RowKey>>();
            m_hiliteTranslator.setMapper(new DefaultHiLiteMapper(map));
        }
        return new PortObject[] { inData[0] };
    }
    assert tables.size() > 0;
    // check compatibility of specs against first spec in list
    for (int i = 1; i < tables.size(); i++) {
        if (!(tables.get(0).getSpec().equalStructure(tables.get(i).getSpec()))) {
            // incompatible - refuse to execute
            throw new Exception("The data table structures of the active " + "ports are not compatible.");
        }
    }
    int totalRowCount = 0;
    DataTable[] dtables = new DataTable[tables.size()];
    int i = 0;
    for (BufferedDataTable t : tables) {
        totalRowCount += t.getRowCount();
        dtables[i] = t;
        i++;
    }
    AppendedRowsTable out = new AppendedRowsTable((m_isAppendSuffix ? m_suffix : null), dtables);
    // note, this iterator throws runtime exceptions when canceled.
    AppendedRowsIterator it = out.iterator(exec, totalRowCount);
    BufferedDataContainer c = exec.createDataContainer(out.getDataTableSpec());
    try {
        while (it.hasNext()) {
            // may throw exception, also sets progress
            c.addRowToTable(it.next());
        }
    } catch (RuntimeCanceledExecutionException rcee) {
        throw rcee.getCause();
    } finally {
        c.close();
    }
    if (it.getNrRowsSkipped() > 0) {
        setWarningMessage("Filtered out " + it.getNrRowsSkipped() + " duplicate row id(s).");
    }
    if (m_enableHiliting) {
        // create hilite translation map
        Map<RowKey, Set<RowKey>> map = new HashMap<RowKey, Set<RowKey>>();
        // map of all RowKeys and duplicate RowKeys in the resulting table
        Map<RowKey, RowKey> dupMap = it.getDuplicateNameMap();
        for (Map.Entry<RowKey, RowKey> e : dupMap.entrySet()) {
            // if a duplicate key
            if (!e.getKey().equals(e.getValue())) {
                Set<RowKey> set = Collections.singleton(e.getValue());
                // put duplicate key and original key into map
                map.put(e.getKey(), set);
            } else {
                // skip duplicate keys
                if (!dupMap.containsKey(new RowKey(e.getKey().getString() + m_suffix))) {
                    Set<RowKey> set = Collections.singleton(e.getValue());
                    map.put(e.getKey(), set);
                }
            }
        }
        m_hiliteTranslator.setMapper(new DefaultHiLiteMapper(map));
    }
    return new BufferedDataTable[] { c.getTable() };
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) InactiveBranchPortObject(org.knime.core.node.port.inactive.InactiveBranchPortObject) Set(java.util.Set) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) RuntimeCanceledExecutionException(org.knime.base.data.append.row.AppendedRowsIterator.RuntimeCanceledExecutionException) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) AppendedRowsIterator(org.knime.core.data.append.AppendedRowsIterator) AppendedRowsTable(org.knime.core.data.append.AppendedRowsTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) RuntimeCanceledExecutionException(org.knime.base.data.append.row.AppendedRowsIterator.RuntimeCanceledExecutionException) Vector(java.util.Vector) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) InactiveBranchPortObject(org.knime.core.node.port.inactive.InactiveBranchPortObject) PortObject(org.knime.core.node.port.PortObject) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

PortObject (org.knime.core.node.port.PortObject)173 BufferedDataTable (org.knime.core.node.BufferedDataTable)97 DataTableSpec (org.knime.core.data.DataTableSpec)68 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)59 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)39 InactiveBranchPortObject (org.knime.core.node.port.inactive.InactiveBranchPortObject)37 FlowVariablePortObject (org.knime.core.node.port.flowvariable.FlowVariablePortObject)35 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)34 IOException (java.io.IOException)33 DataRow (org.knime.core.data.DataRow)25 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)24 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)23 DataCell (org.knime.core.data.DataCell)20 FileStorePortObject (org.knime.core.data.filestore.FileStorePortObject)19 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)19 DatabasePortObject (org.knime.core.node.port.database.DatabasePortObject)18 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)16 ExecutionContext (org.knime.core.node.ExecutionContext)15 DataColumnSpec (org.knime.core.data.DataColumnSpec)14 InactiveBranchPortObjectSpec (org.knime.core.node.port.inactive.InactiveBranchPortObjectSpec)13