Search in sources :

Example 76 with PMMLPortObject

use of org.knime.core.node.port.pmml.PMMLPortObject in project knime-core by knime.

the class AbstractNormalizerPMMLNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    CalculationResult result = calculate(inObjects, exec);
    BufferedDataTable outTable = result.getDataTable();
    // the optional PMML in port (can be null)
    PMMLPortObject inPMMLPort = m_hasModelIn ? (PMMLPortObject) inObjects[1] : null;
    PMMLNormalizeTranslator trans = new PMMLNormalizeTranslator(result.getConfig(), new DerivedFieldMapper(inPMMLPort));
    DataTableSpec dataTableSpec = (DataTableSpec) inObjects[0].getSpec();
    PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(inPMMLPort, dataTableSpec);
    PMMLPortObject outPMMLPort = new PMMLPortObject(creator.createSpec(), inPMMLPort);
    outPMMLPort.addGlobalTransformations(trans.exportToTransDict());
    return new PortObject[] { outTable, outPMMLPort };
}
Also used : DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLNormalizeTranslator(org.knime.base.data.normalize.PMMLNormalizeTranslator) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Example 77 with PMMLPortObject

use of org.knime.core.node.port.pmml.PMMLPortObject in project knime-core by knime.

the class LinearRegressionContent method createPortObject.

/**
 * Creates a new PMML regression port object from this linear regression
 * model.
 * @param inPMMLPort the incoming PMMLPort object (can be null)
 * @param dts the full data table spec with which the regression
 *      model was created.
 * @param learningSpec a data table spec containing only learning columns
 * @return a port object
 * @throws InvalidSettingsException if the settings are invalid
 */
public PMMLPortObject createPortObject(final PMMLPortObject inPMMLPort, final DataTableSpec dts, final DataTableSpec learningSpec) throws InvalidSettingsException {
    PMMLPortObjectSpec inPMMLSpec = null;
    if (inPMMLPort != null) {
        inPMMLSpec = inPMMLPort.getSpec();
    }
    PMMLPortObjectSpec spec = createPortObjectSpec(inPMMLSpec, dts, learningSpec);
    PMMLPortObject outPMMLPort = new PMMLPortObject(spec, inPMMLPort);
    NumericPredictor[] nps = new NumericPredictor[m_multipliers.length];
    for (int i = 0; i < nps.length; i++) {
        nps[i] = new NumericPredictor(m_spec.getColumnSpec(i).getName(), 1, m_multipliers[i]);
    }
    RegressionTable regressionTable = new RegressionTable(m_offset, nps);
    /* To maintain compatibility with the previous SAX-based implementation.
         * */
    String targetField = "Response";
    List<String> targetFields = spec.getTargetFields();
    if (!targetFields.isEmpty()) {
        targetField = targetFields.get(0);
    }
    PMMLRegressionTranslator trans = new PMMLRegressionTranslator(MODEL_NAME, ALGORITHM_NAME, regressionTable, targetField);
    outPMMLPort.addModelTranslater(trans);
    return outPMMLPort;
}
Also used : PMMLRegressionTranslator(org.knime.base.node.mine.regression.PMMLRegressionTranslator) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) NumericPredictor(org.knime.base.node.mine.regression.PMMLRegressionTranslator.NumericPredictor) RegressionTable(org.knime.base.node.mine.regression.PMMLRegressionTranslator.RegressionTable)

Example 78 with PMMLPortObject

use of org.knime.core.node.port.pmml.PMMLPortObject in project knime-core by knime.

the class LinRegLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    /*
         * What comes next is the matrix calculation, solving A \times w = b
         * where A is the matrix having the training data (as many rows as there
         * are rows in inData[0], w is the vector of weights to learn (number of
         * variables) and b is the target output
         */
    // reset was called, must be cleared
    final BufferedDataTable data = (BufferedDataTable) inData[0];
    final DataTableSpec spec = data.getDataTableSpec();
    final String[] includes = computeIncludes(spec);
    final int nrUnknown = includes.length + 1;
    double[] means = new double[includes.length];
    // indices of the columns in m_includes
    final int[] colIndizes = new int[includes.length];
    for (int i = 0; i < includes.length; i++) {
        colIndizes[i] = spec.findColumnIndex(includes[i]);
    }
    // index of m_target
    final int target = spec.findColumnIndex(m_target);
    // this is the matrix (A^T x A) where A is the training data including
    // one column fixed to one.
    // (we do it here manually in order to avoid to get all the data in
    // double[][])
    double[][] ata = new double[nrUnknown][nrUnknown];
    double[] buffer = new double[nrUnknown];
    // we memorize for each row if it contains missing values.
    BitSet missingSet = new BitSet();
    m_nrRows = data.getRowCount();
    int myProgress = 0;
    // we need 2 or 3 scans on the data (first run was done already)
    final double totalProgress = (2 + (m_isCalcError ? 1 : 0)) * m_nrRows;
    int rowCount = 0;
    boolean hasPrintedWarning = false;
    for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
        DataRow row = it.next();
        myProgress++;
        exec.setProgress(myProgress / totalProgress, "Calculating matrix " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
        exec.checkCanceled();
        DataCell targetValue = row.getCell(target);
        // read data from row into buffer, skip missing value rows
        boolean containsMissing = targetValue.isMissing() || readIntoBuffer(row, buffer, colIndizes);
        missingSet.set(rowCount, containsMissing);
        if (containsMissing) {
            String errorMessage = "Row \"" + row.getKey().getString() + "\" contains missing values, skipping it.";
            if (!hasPrintedWarning) {
                LOGGER.warn(errorMessage + " Suppress further warnings.");
                hasPrintedWarning = true;
            } else {
                LOGGER.debug(errorMessage);
            }
            m_nrRowsSkipped++;
            // with next row
            continue;
        }
        updateMean(buffer, means);
        // the matrix is symmetric
        for (int i = 0; i < nrUnknown; i++) {
            for (int j = 0; j < nrUnknown; j++) {
                ata[i][j] += buffer[i] * buffer[j];
            }
        }
    }
    assert (m_nrRows == rowCount);
    normalizeMean(means);
    // no unique solution when there are less rows than unknown variables
    if (rowCount <= nrUnknown) {
        throw new Exception("Too few rows to perform regression (" + rowCount + " rows, but degree of freedom of " + nrUnknown + ")");
    }
    exec.setMessage("Calculating pseudo inverse...");
    double[][] ataInverse = MathUtils.inverse(ata);
    checkForNaN(ataInverse);
    // multiply with A^T and b, i.e. (A^T x A)^-1 x A^T x b
    double[] multipliers = new double[nrUnknown];
    rowCount = 0;
    for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
        DataRow row = it.next();
        exec.setMessage("Determining output " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
        myProgress++;
        exec.setProgress(myProgress / totalProgress);
        exec.checkCanceled();
        // does row containing missing values?
        if (missingSet.get(rowCount)) {
            // error has printed above, silently ignore here.
            continue;
        }
        boolean containsMissing = readIntoBuffer(row, buffer, colIndizes);
        assert !containsMissing;
        DataCell targetValue = row.getCell(target);
        double b = ((DoubleValue) targetValue).getDoubleValue();
        for (int i = 0; i < nrUnknown; i++) {
            double buf = 0.0;
            for (int j = 0; j < nrUnknown; j++) {
                buf += ataInverse[i][j] * buffer[j];
            }
            multipliers[i] += buf * b;
        }
    }
    if (m_isCalcError) {
        assert m_error == 0.0;
        rowCount = 0;
        for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
            DataRow row = it.next();
            exec.setMessage("Calculating error " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
            myProgress++;
            exec.setProgress(myProgress / totalProgress);
            exec.checkCanceled();
            // does row containing missing values?
            if (missingSet.get(rowCount)) {
                // error has printed above, silently ignore here.
                continue;
            }
            boolean hasMissing = readIntoBuffer(row, buffer, colIndizes);
            assert !hasMissing;
            DataCell targetValue = row.getCell(target);
            double b = ((DoubleValue) targetValue).getDoubleValue();
            double out = 0.0;
            for (int i = 0; i < nrUnknown; i++) {
                out += multipliers[i] * buffer[i];
            }
            m_error += (b - out) * (b - out);
        }
    }
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = (PMMLPortObject) inData[1];
    DataTableSpec outSpec = getLearningSpec(spec);
    double offset = multipliers[0];
    multipliers = Arrays.copyOfRange(multipliers, 1, multipliers.length);
    m_params = new LinearRegressionContent(outSpec, offset, multipliers, means);
    // cache the entire table as otherwise the color information
    // may be lost (filtering out the "colored" column)
    m_rowContainer = new DefaultDataArray(data, m_firstRowPaint, m_rowCountPaint);
    m_actualUsedColumns = includes;
    return new PortObject[] { m_params.createPortObject(inPMMLPort, spec, outSpec) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) BitSet(java.util.BitSet) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) DoubleValue(org.knime.core.data.DoubleValue) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) RowIterator(org.knime.core.data.RowIterator) LinearRegressionContent(org.knime.base.node.mine.regression.linear.LinearRegressionContent) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject)

Example 79 with PMMLPortObject

use of org.knime.core.node.port.pmml.PMMLPortObject in project knime-core by knime.

the class GradientBoostingPMMLPredictorNodeModel method createStreamableOperator.

/**
 * {@inheritDoc}
 */
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    return new StreamableOperator() {

        @Override
        public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
            PMMLPortObject model = (PMMLPortObject) ((PortObjectInput) inputs[0]).getPortObject();
            DataTableSpec dataSpec = (DataTableSpec) inSpecs[1];
            GradientBoostingModelPortObject gbt = importModel(model);
            final GradientBoostingPredictor<?> pred = new GradientBoostingPredictor<>(gbt.getEnsembleModel(), gbt.getSpec(), dataSpec, m_configuration);
            ColumnRearranger rearranger = pred.getPredictionRearranger();
            StreamableFunction func = rearranger.createStreamableFunction(1, 0);
            func.runFinal(inputs, outputs, exec);
        }
    };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) GradientBoostingModelPortObject(org.knime.base.node.mine.treeensemble2.model.GradientBoostingModelPortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) GradientBoostingPredictor(org.knime.base.node.mine.treeensemble2.node.gradientboosting.predictor.GradientBoostingPredictor) StreamableFunction(org.knime.core.node.streamable.StreamableFunction)

Example 80 with PMMLPortObject

use of org.knime.core.node.port.pmml.PMMLPortObject in project knime-core by knime.

the class RegressionTreePMMLPredictorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
public PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    PMMLPortObject pmmlPO = (PMMLPortObject) inObjects[0];
    Pair<RegressionTreeModel, RegressionTreeModelPortObjectSpec> modelSpecPair = importModel(pmmlPO);
    BufferedDataTable data = (BufferedDataTable) inObjects[1];
    DataTableSpec dataSpec = data.getDataTableSpec();
    // Can only happen if configure was not called before execute e.g. in generic PMML Predictor
    if (m_configuration == null) {
        m_configuration = RegressionTreePredictorConfiguration.createDefault(translateSpec(pmmlPO.getSpec()).getTargetColumn().getName());
    }
    final RegressionTreePredictor pred = new RegressionTreePredictor(modelSpecPair.getFirst(), modelSpecPair.getSecond(), dataSpec, m_configuration);
    ColumnRearranger rearranger = pred.getPredictionRearranger();
    BufferedDataTable outTable = exec.createColumnRearrangeTable(data, rearranger, exec);
    return new BufferedDataTable[] { outTable };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) RegressionTreeModel(org.knime.base.node.mine.treeensemble2.model.RegressionTreeModel) BufferedDataTable(org.knime.core.node.BufferedDataTable) RegressionTreeModelPortObjectSpec(org.knime.base.node.mine.treeensemble2.model.RegressionTreeModelPortObjectSpec)

Aggregations

PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)90 BufferedDataTable (org.knime.core.node.BufferedDataTable)53 DataTableSpec (org.knime.core.data.DataTableSpec)50 PortObject (org.knime.core.node.port.PortObject)47 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)35 PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)25 PMMLPortObjectSpecCreator (org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)21 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)18 ExecutionContext (org.knime.core.node.ExecutionContext)13 DataColumnSpec (org.knime.core.data.DataColumnSpec)12 DataRow (org.knime.core.data.DataRow)12 DerivedFieldMapper (org.knime.core.node.port.pmml.preproc.DerivedFieldMapper)11 StreamableOperator (org.knime.core.node.streamable.StreamableOperator)11 Node (org.w3c.dom.Node)10 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)9 IOException (java.io.IOException)8 DataCell (org.knime.core.data.DataCell)8 PMMLDocument (org.dmg.pmml.PMMLDocument)7 PMMLGeneralRegressionTranslator (org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionTranslator)7 StreamableFunction (org.knime.core.node.streamable.StreamableFunction)7