Search in sources :

Example 31 with ExecutionContext

use of org.knime.core.node.ExecutionContext in project knime-core by knime.

the class CategoryToNumberNodeModel method createStreamableOperator.

/**
 * {@inheritDoc}
 */
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    return new StreamableOperator() {

        @Override
        public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
            ColumnRearranger cr = createRearranger((DataTableSpec) inSpecs[0]);
            cr.createStreamableFunction(0, 0).runFinal(inputs, outputs, exec);
            // the optional PMML in port (can be null)
            PMMLPortObject inPMMLPort = null;
            if (m_pmmlInEnabled && inputs[1] != null) {
                inPMMLPort = (PMMLPortObject) ((PortObjectInput) inputs[1]).getPortObject();
            }
            PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(inPMMLPort, cr.createSpec());
            PMMLPortObject outPMMLPort = new PMMLPortObject(creator.createSpec(), inPMMLPort);
            for (CategoryToNumberCellFactory factory : m_factories) {
                PMMLMapValuesTranslator trans = new PMMLMapValuesTranslator(factory.getConfig(), new DerivedFieldMapper(inPMMLPort));
                outPMMLPort.addGlobalTransformations(trans.exportToTransDict());
            }
            PortObjectOutput portObjectOutput = (PortObjectOutput) outputs[1];
            portObjectOutput.setPortObject(outPMMLPort);
        }
    };
}
Also used : DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) PortObjectInput(org.knime.core.node.streamable.PortObjectInput) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator) PortObjectOutput(org.knime.core.node.streamable.PortObjectOutput)

Example 32 with ExecutionContext

use of org.knime.core.node.ExecutionContext in project knime-core by knime.

the class ColumnAppenderNodeModel method createStreamableOperator.

// ////////////// STREAMING FUNCTIONS ////////////////
/**
 * {@inheritDoc}
 */
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    return new StreamableOperator() {

        @Override
        public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
            RowInput in1 = (RowInput) inputs[0];
            RowInput in2 = (RowInput) inputs[1];
            RowOutput out = (RowOutput) outputs[0];
            CustomRowIterator tableIt1 = new CustomRowIteratorImpl2(in1);
            CustomRowIterator tableIt2 = new CustomRowIteratorImpl2(in2);
            compute(tableIt1, tableIt2, in1.getDataTableSpec().getNumColumns() + in2.getDataTableSpec().getNumColumns(), row -> {
                out.push(row);
            }, exec, -1, -1);
            // poll all the remaining rows if there are any but don't do anything with them
            while (tableIt1.hasNext()) {
                tableIt1.next();
            }
            while (tableIt2.hasNext()) {
                tableIt2.next();
            }
            in1.close();
            in2.close();
            out.close();
        }
    };
}
Also used : RowOutput(org.knime.core.node.streamable.RowOutput) ExecutionContext(org.knime.core.node.ExecutionContext) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) RowInput(org.knime.core.node.streamable.RowInput)

Example 33 with ExecutionContext

use of org.knime.core.node.ExecutionContext in project knime-core by knime.

the class CAIMDiscretizationNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    // measure the time
    long startTime = System.currentTimeMillis();
    // empty model
    if (m_includedColumnNames.getIncludeList() == null || m_includedColumnNames.getIncludeList().size() == 0) {
        return new PortObject[] { inData[0], new DiscretizationModel() };
    }
    LOGGER.debug("Start discretizing.");
    // as the algorithm is for binary class problems only
    // (positive, negative) the algorithm is performed for each class value
    // labeled as positive class and the rest as negative
    exec.setProgress(0.0, "Preparing...");
    // check input data
    BufferedDataTable data = (BufferedDataTable) inData[0];
    // get class column index
    m_classifyColumnIndex = data.getDataTableSpec().findColumnIndex(m_classColumnName.getStringValue());
    assert m_classifyColumnIndex > -1;
    // create the class - index mapping
    createClassFromToIndexMaps(data.getDataTableSpec());
    // create the array with the result discretization schemes for
    // each included column
    DiscretizationScheme[] resultSchemes = new DiscretizationScheme[m_includedColumnNames.getIncludeList().size()];
    // for all included columns do the discretization
    int currentColumn = 0;
    for (String includedColumnName : m_includedColumnNames.getIncludeList()) {
        LOGGER.debug("Process column: " + includedColumnName);
        exec.setProgress("Discretizing column '" + includedColumnName + "'");
        ExecutionContext subExecPerColumn = exec.createSubExecutionContext(1.0D / m_includedColumnNames.getIncludeList().size());
        subExecPerColumn.checkCanceled();
        // never discretize the column index (should never happen)
        if (m_classColumnName.getStringValue().equals(includedColumnName)) {
            continue;
        }
        // determine the column index of the current column
        int columnIndex = data.getDataTableSpec().findColumnIndex(includedColumnName);
        DataColumnDomain domain = data.getDataTableSpec().getColumnSpec(columnIndex).getDomain();
        double minValue = ((DoubleValue) domain.getLowerBound()).getDoubleValue();
        double maxValue = ((DoubleValue) domain.getUpperBound()).getDoubleValue();
        // find all distinct values of the column and create
        // a table with all possible interval boundaries (midpoint value of
        // adjacent values)
        subExecPerColumn.setProgress("Find possible boundaries.");
        BoundaryScheme boundaryScheme = null;
        // create subExec for sorting
        ExecutionContext subExecSort = subExecPerColumn.createSubExecutionContext(0.1);
        // long t1 = System.currentTimeMillis();
        if (m_classOptimizedVersion) {
            boundaryScheme = createAllIntervalBoundaries(data, columnIndex, subExecSort);
        } else {
            boundaryScheme = createAllIntervalBoundaries2(data, columnIndex, subExecSort);
        }
        subExecSort.setProgress(1.0D);
        // long t2 = System.currentTimeMillis() - t1;
        // LOGGER.error("Create boundaries time: " + (t2 / 1000.0)
        // + " optimized: " + m_classOptimizedVersion);
        // LOGGER.error("Boundaries: " + boundaryScheme.getHead());
        LinkedDouble allIntervalBoundaries = boundaryScheme.getHead();
        // create the initial discretization scheme
        DiscretizationScheme discretizationScheme = new DiscretizationScheme(new Interval(minValue, maxValue, true, true));
        double globalCAIM = 0;
        // performe the iterative search for the best intervals
        int numInsertedBounds = 0;
        double currentCAIM = 0;
        // create subExec for inserted bounds
        ExecutionContext subExecBounds = subExecPerColumn.createSubExecutionContext(0.9);
        while (currentCAIM > globalCAIM || numInsertedBounds < m_classValues.length - 1) {
            subExecPerColumn.checkCanceled();
            // create subExec for counting
            ExecutionContext subExecCount = subExecBounds.createSubExecutionContext(1.0D / m_classValues.length);
            // LOGGER.debug("Inserted bounds: " + numInsertedBounds);
            // LOGGER.debug("intervall boundaries: " +
            // allIntervalBoundaries);
            // for all possible interval boundaries
            // insert each one, calculate the caim value and add
            // the one with the biggest caim
            LinkedDouble intervalBoundary = allIntervalBoundaries.m_next;
            currentCAIM = 0;
            LinkedDouble bestBoundary = null;
            long currentCountedBoundaries = 0;
            while (intervalBoundary != null) {
                subExecPerColumn.checkCanceled();
                // set progress
                currentCountedBoundaries++;
                subExecCount.setProgress((double) currentCountedBoundaries / (double) boundaryScheme.getNumBoundaries(), "Count for possible boundary " + currentCountedBoundaries + " of " + boundaryScheme.getNumBoundaries());
                // LOGGER.debug("current caim: " + currentCAIM);
                DiscretizationScheme tentativeDS = new DiscretizationScheme(discretizationScheme);
                tentativeDS.insertBound(intervalBoundary.m_value);
                // create the quanta matrix
                QuantaMatrix2D quantaMatrix = new QuantaMatrix2D(tentativeDS, m_classValueToIndexMap);
                // pass the data for filling the matrix
                quantaMatrix.countData(data, columnIndex, m_classifyColumnIndex);
                // calculate the caim
                double caim = quantaMatrix.calculateCaim();
                if (caim > currentCAIM) {
                    currentCAIM = caim;
                    bestBoundary = intervalBoundary;
                }
                intervalBoundary = intervalBoundary.m_next;
            }
            // if there is no best boundary, break the first while loop
            if (bestBoundary == null) {
                break;
            }
            // in this case accept the best discretization scheme
            if (currentCAIM > globalCAIM || numInsertedBounds < m_classValues.length) {
                int numIntervals = discretizationScheme.getNumIntervals();
                discretizationScheme.insertBound(bestBoundary.m_value);
                // remove the linked list element from the list
                bestBoundary.remove();
                globalCAIM = currentCAIM;
                if (numIntervals < discretizationScheme.getNumIntervals()) {
                    numInsertedBounds++;
                    subExecPerColumn.setProgress("Inserted bound " + numInsertedBounds);
                // LOGGER.debug("Inserted boundary: "
                // + bestBoundary.m_value);
                } else {
                    throw new IllegalStateException("Only usefull bounds should be inserted: " + bestBoundary.m_value);
                }
            }
            subExecCount.setProgress(1.0D);
        }
        resultSchemes[currentColumn] = discretizationScheme;
        subExecBounds.setProgress(1.0D);
        // ensure the full progress is set for this iteration
        subExecPerColumn.setProgress(1.0D);
        currentColumn++;
    }
    // set the model
    DataTableSpec modelSpec = createModelSpec(m_includedColumnNames, data.getDataTableSpec());
    m_discretizationModel = new DiscretizationModel(resultSchemes, modelSpec);
    // create an output table that replaces the included columns by
    // interval values
    BufferedDataTable resultTable = createResultTable(exec, data, m_discretizationModel);
    // log the runtime of the execute method
    long runtime = System.currentTimeMillis() - startTime;
    LOGGER.debug("Binning runtime: " + (runtime / 1000.0) + " sec.");
    return new PortObject[] { resultTable, m_discretizationModel };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DiscretizationScheme(org.knime.base.node.preproc.discretization.caim2.DiscretizationScheme) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) ExecutionContext(org.knime.core.node.ExecutionContext) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) DiscretizationModel(org.knime.base.node.preproc.discretization.caim2.DiscretizationModel) BufferedDataTable(org.knime.core.node.BufferedDataTable) PortObject(org.knime.core.node.port.PortObject) Interval(org.knime.base.node.preproc.discretization.caim2.Interval)

Example 34 with ExecutionContext

use of org.knime.core.node.ExecutionContext in project knime-core by knime.

the class RowFilterNodeModel method createStreamableOperator.

/**
 * {@inheritDoc}
 */
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    return new StreamableOperator() {

        @Override
        public StreamableOperatorInternals saveInternals() {
            return null;
        }

        @Override
        public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext ctx) throws Exception {
            RowInput in = (RowInput) inputs[0];
            RowOutput out = (RowOutput) outputs[0];
            RowFilterNodeModel.this.execute(in, out, ctx);
        }
    };
}
Also used : RowOutput(org.knime.core.node.streamable.RowOutput) ExecutionContext(org.knime.core.node.ExecutionContext) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) RowInput(org.knime.core.node.streamable.RowInput)

Example 35 with ExecutionContext

use of org.knime.core.node.ExecutionContext in project knime-core by knime.

the class BigGroupByTable method createGroupByTable.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
    LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
    final DataTableSpec origSpec = table.getDataTableSpec();
    // sort the data table in order to process the input table chunk wise
    final BufferedDataTable sortedTable;
    final ExecutionContext groupExec;
    final DataValueComparator[] comparators;
    if (groupColIdx.length < 1) {
        sortedTable = table;
        groupExec = exec;
        comparators = new DataValueComparator[0];
    } else {
        final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
        exec.setMessage("Sorting input table...");
        sortedTable = sortTable(sortExec, table, getGroupCols());
        sortExec.setProgress(1.0);
        groupExec = exec.createSubExecutionContext(0.4);
        comparators = new DataValueComparator[groupColIdx.length];
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
            comparators[i] = colSpec.getType().getComparator();
        }
    }
    final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
    exec.setMessage("Creating groups");
    final DataCell[] previousGroup = new DataCell[groupColIdx.length];
    final DataCell[] currentGroup = new DataCell[groupColIdx.length];
    final MutableInteger groupCounter = new MutableInteger(0);
    boolean firstRow = true;
    final double numOfRows = sortedTable.size();
    long rowCounter = 0;
    // In the rare case that the DataCell comparator return 0 for two
    // data cells that are not equal we have to maintain a map with all
    // rows with equal cells in the group columns per chunk.
    // This variable stores for each chunk these members. A chunk consists
    // of rows which return 0 for the pairwise group value comparison.
    // Usually only equal data cells return 0 when compared with each other
    // but in rare occasions also data cells that are NOT equal return 0 when
    // compared to each other
    // (such as cells that contain chemical structures).
    // In this rare case this map will contain for each group of data cells
    // that are pairwise equal in the chunk a separate entry.
    final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
    boolean logUnusualCells = true;
    String groupLabel = "";
    // cannot put init to the constructor, as the super() constructor directly calls the current function
    initMissingValuesMap();
    for (final DataRow row : sortedTable) {
        // fetch the current group column values
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            currentGroup[i] = row.getCell(groupColIdx[i]);
        }
        if (firstRow) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            firstRow = false;
        }
        // group column data cells
        if (!sameChunk(comparators, previousGroup, currentGroup)) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            createTableRows(dc, chunkMembers, groupCounter);
            // set the current group as previous group
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            if (logUnusualCells && chunkMembers.size() > 1) {
                // cause the problem
                if (LOGGER.isEnabledFor(LEVEL.INFO)) {
                    final StringBuilder buf = new StringBuilder();
                    buf.append("Data chunk with ");
                    buf.append(chunkMembers.size());
                    buf.append(" members occured in groupby node. " + "Involved classes are: ");
                    final GroupKey key = chunkMembers.keySet().iterator().next();
                    for (final DataCell cell : key.getGroupVals()) {
                        buf.append(cell.getClass().getCanonicalName());
                        buf.append(", ");
                    }
                    LOGGER.info(buf.toString());
                }
                logUnusualCells = false;
            }
            // reset the chunk members map
            chunkMembers.clear();
        }
        // process the row as one of the members of the current chunk
        Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
        if (member == null) {
            Set<RowKey> rowKeys;
            if (isEnableHilite()) {
                rowKeys = new HashSet<>();
            } else {
                rowKeys = Collections.emptySet();
            }
            member = new Pair<>(cloneColumnAggregators(), rowKeys);
            final DataCell[] groupKeys = new DataCell[currentGroup.length];
            System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
            chunkMembers.put(new GroupKey(groupKeys), member);
        }
        // compute the current row values
        for (final ColumnAggregator colAggr : member.getFirst()) {
            final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
            colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
        }
        if (isEnableHilite()) {
            member.getSecond().add(row.getKey());
        }
        groupExec.checkCanceled();
        groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
    }
    // create the final row for the last chunk after processing the last
    // table row
    createTableRows(dc, chunkMembers, groupCounter);
    dc.close();
    return dc.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) Pair(org.knime.core.util.Pair) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) MutableInteger(org.knime.core.util.MutableInteger) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) DataCell(org.knime.core.data.DataCell)

Aggregations

ExecutionContext (org.knime.core.node.ExecutionContext)107 DataTableSpec (org.knime.core.data.DataTableSpec)61 StreamableOperator (org.knime.core.node.streamable.StreamableOperator)57 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)45 BufferedDataTable (org.knime.core.node.BufferedDataTable)44 DataRow (org.knime.core.data.DataRow)35 RowInput (org.knime.core.node.streamable.RowInput)26 RowOutput (org.knime.core.node.streamable.RowOutput)24 StreamableFunction (org.knime.core.node.streamable.StreamableFunction)23 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)20 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)20 DataColumnSpec (org.knime.core.data.DataColumnSpec)19 DataCell (org.knime.core.data.DataCell)18 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)15 NodeModel (org.knime.core.node.NodeModel)14 PortObject (org.knime.core.node.port.PortObject)14 RowKey (org.knime.core.data.RowKey)13 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)13 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)13 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)12