Search in sources :

Example 31 with PMMLPortObject

use of org.knime.core.node.port.pmml.PMMLPortObject in project knime-core by knime.

the class DecisionTreeLearnerNodeModel2 method execute.

/**
 * Start of decision tree induction.
 *
 * @param exec the execution context for this run
 * @param data the input data to build the decision tree from
 * @return an empty data table array, as just a model is provided
 * @throws Exception any type of exception, e.g. for cancellation,
 *         invalid input,...
 * @see NodeModel#execute(BufferedDataTable[],ExecutionContext)
 */
@Override
protected PortObject[] execute(final PortObject[] data, final ExecutionContext exec) throws Exception {
    // holds the warning message displayed after execution
    m_warningMessageSb = new StringBuilder();
    ParallelProcessing parallelProcessing = new ParallelProcessing(m_parallelProcessing.getIntValue());
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("Number available threads: " + parallelProcessing.getMaxNumberThreads() + " used threads: " + parallelProcessing.getCurrentThreadsInUse());
    }
    exec.setProgress("Preparing...");
    // check input data
    assert (data != null && data[DATA_INPORT] != null);
    BufferedDataTable inData = (BufferedDataTable) data[DATA_INPORT];
    // get column with color information
    String colorColumn = null;
    for (DataColumnSpec s : inData.getDataTableSpec()) {
        if (s.getColorHandler() != null) {
            colorColumn = s.getName();
            break;
        }
    }
    // the data table must have more than 2 records
    if (inData.size() <= 1) {
        throw new IllegalArgumentException("Input data table must have at least 2 records!");
    }
    // get class column index
    int classColumnIndex = inData.getDataTableSpec().findColumnIndex(m_classifyColumn.getStringValue());
    assert classColumnIndex > -1;
    // create initial In-Memory table
    exec.setProgress("Create initial In-Memory table...");
    InMemoryTableCreator tableCreator = new InMemoryTableCreator(inData, classColumnIndex, m_minNumberRecordsPerNode.getIntValue(), m_skipColumns.getBooleanValue());
    InMemoryTable initialTable = tableCreator.createInMemoryTable(exec.createSubExecutionContext(0.05));
    int removedRows = tableCreator.getRemovedRowsDueToMissingClassValue();
    if (removedRows == inData.size()) {
        throw new IllegalArgumentException("Class column contains only " + "missing values");
    }
    if (removedRows > 0) {
        m_warningMessageSb.append(removedRows);
        m_warningMessageSb.append(" rows removed due to missing class value;");
    }
    // the all over row count is used to report progress
    m_alloverRowCount = initialTable.getSumOfWeights();
    // set the finishing counter
    // this counter will always be incremented when a leaf node is
    // created, as this determines the recursion end and can thus
    // be used for progress indication
    m_finishedCounter = new AtomicDouble(0);
    // get the number of attributes
    m_numberAttributes = initialTable.getNumAttributes();
    // create the quality measure
    final SplitQualityMeasure splitQualityMeasure;
    if (m_splitQualityMeasureType.getStringValue().equals(SPLIT_QUALITY_GINI)) {
        splitQualityMeasure = new SplitQualityGini();
    } else {
        splitQualityMeasure = new SplitQualityGainRatio();
    }
    // build the tree
    // before this set the node counter to 0
    m_counter.set(0);
    exec.setMessage("Building tree...");
    final int firstSplitColIdx = initialTable.getAttributeIndex(m_firstSplitCol.getStringValue());
    DecisionTreeNode root = null;
    root = buildTree(initialTable, exec, 0, splitQualityMeasure, parallelProcessing, firstSplitColIdx);
    boolean isBinaryNominal = m_binaryNominalSplitMode.getBooleanValue();
    boolean isFilterInvalidAttributeValues = m_filterNominalValuesFromParent.getBooleanValue();
    if (isBinaryNominal && isFilterInvalidAttributeValues) {
        // traverse tree nodes and remove from the children the attribute
        // values that were filtered out further up in the tree. "Bug" 3124
        root.filterIllegalAttributes(Collections.<String, Set<String>>emptyMap());
    }
    // the decision tree model saved as PMML at the second out-port
    DecisionTree decisionTree = new DecisionTree(root, m_classifyColumn.getStringValue(), /* strategy has to be set explicitly as the default in PMML is
                    none, which means rows with missing values are not
                    classified. */
    PMMLMissingValueStrategy.get(m_missingValues.getStringValue()), PMMLNoTrueChildStrategy.get(m_noTrueChild.getStringValue()));
    decisionTree.setColorColumn(colorColumn);
    // prune the tree
    exec.setMessage("Prune tree with " + m_pruningMethod.getStringValue() + "...");
    pruneTree(decisionTree);
    // add highlight patterns and color information
    exec.setMessage("Adding hilite and color info to tree...");
    addHiliteAndColorInfo(inData, decisionTree);
    LOGGER.info("Decision tree consisting of " + decisionTree.getNumberNodes() + " nodes created with pruning method " + m_pruningMethod.getStringValue());
    // set the warning message if available
    if (m_warningMessageSb.length() > 0) {
        setWarningMessage(m_warningMessageSb.toString());
    }
    // reset the number available threads
    parallelProcessing.reset();
    parallelProcessing = null;
    // no data out table is created -> return an empty table array
    exec.setMessage("Creating PMML decision tree model...");
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) data[1] : null;
    DataTableSpec inSpec = inData.getSpec();
    PMMLPortObjectSpec outPortSpec = createPMMLPortObjectSpec(inPMMLPort == null ? null : inPMMLPort.getSpec(), inSpec);
    PMMLPortObject outPMMLPort = new PMMLPortObject(outPortSpec, inPMMLPort, inData.getSpec());
    outPMMLPort.addModelTranslater(new PMMLDecisionTreeTranslator(decisionTree));
    m_decisionTree = decisionTree;
    return new PortObject[] { outPMMLPort };
}
Also used : DecisionTree(org.knime.base.node.mine.decisiontree2.model.DecisionTree) DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PMMLDecisionTreeTranslator(org.knime.base.node.mine.decisiontree2.PMMLDecisionTreeTranslator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataColumnSpec(org.knime.core.data.DataColumnSpec) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)

Example 32 with PMMLPortObject

use of org.knime.core.node.port.pmml.PMMLPortObject in project knime-core by knime.

the class DecTreePredictorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
public PortObject[] execute(final PortObject[] inPorts, final ExecutionContext exec) throws CanceledExecutionException, Exception {
    exec.setMessage("Decision Tree Predictor: Loading predictor...");
    PMMLPortObject port = (PMMLPortObject) inPorts[INMODELPORT];
    List<Node> models = port.getPMMLValue().getModels(PMMLModelType.TreeModel);
    if (models.isEmpty()) {
        String msg = "Decision Tree evaluation failed: " + "No tree model found.";
        LOGGER.error(msg);
        throw new RuntimeException(msg);
    }
    PMMLDecisionTreeTranslator trans = new PMMLDecisionTreeTranslator();
    port.initializeModelTranslator(trans);
    DecisionTree decTree = trans.getDecisionTree();
    decTree.resetColorInformation();
    BufferedDataTable inData = (BufferedDataTable) inPorts[INDATAPORT];
    // get column with color information
    String colorColumn = null;
    for (DataColumnSpec s : inData.getDataTableSpec()) {
        if (s.getColorHandler() != null) {
            colorColumn = s.getName();
            break;
        }
    }
    decTree.setColorColumn(colorColumn);
    exec.setMessage("Decision Tree Predictor: start execution.");
    PortObjectSpec[] inSpecs = new PortObjectSpec[] { inPorts[0].getSpec(), inPorts[1].getSpec() };
    DataTableSpec outSpec = createOutTableSpec(inSpecs);
    BufferedDataContainer outData = exec.createDataContainer(outSpec);
    long coveredPattern = 0;
    long nrPattern = 0;
    long rowCount = 0;
    final long numberRows = inData.size();
    exec.setMessage("Classifying...");
    List<String> predictionValues = getPredictionStrings((PMMLPortObjectSpec) inPorts[INMODELPORT].getSpec());
    for (DataRow thisRow : inData) {
        DataCell cl = null;
        LinkedHashMap<String, Double> classDistrib = null;
        try {
            Pair<DataCell, LinkedHashMap<DataCell, Double>> pair = decTree.getWinnerAndClasscounts(thisRow, inData.getDataTableSpec());
            cl = pair.getFirst();
            LinkedHashMap<DataCell, Double> classCounts = pair.getSecond();
            classDistrib = getDistribution(classCounts);
            if (coveredPattern < m_maxNumCoveredPattern.getIntValue()) {
                // remember this one for HiLite support
                decTree.addCoveredPattern(thisRow, inData.getDataTableSpec());
                coveredPattern++;
            } else {
                // too many patterns for HiLite - at least remember color
                decTree.addCoveredColor(thisRow, inData.getDataTableSpec());
            }
            nrPattern++;
        } catch (Exception e) {
            LOGGER.error("Decision Tree evaluation failed: " + e.getMessage());
            throw e;
        }
        if (cl == null) {
            LOGGER.error("Decision Tree evaluation failed: result empty");
            throw new Exception("Decision Tree evaluation failed.");
        }
        DataCell[] newCells = new DataCell[outSpec.getNumColumns()];
        int numInCells = thisRow.getNumCells();
        for (int i = 0; i < numInCells; i++) {
            newCells[i] = thisRow.getCell(i);
        }
        if (m_showDistribution.getBooleanValue()) {
            assert predictionValues.size() >= newCells.length - 1 - numInCells : "Could not determine the prediction values: " + newCells.length + "; " + numInCells + "; " + predictionValues;
            for (int i = numInCells; i < newCells.length - 1; i++) {
                String predClass = predictionValues.get(i - numInCells);
                if (classDistrib != null && classDistrib.get(predClass) != null) {
                    newCells[i] = new DoubleCell(classDistrib.get(predClass));
                } else {
                    newCells[i] = new DoubleCell(0.0);
                }
            }
        }
        newCells[newCells.length - 1] = cl;
        outData.addRowToTable(new DefaultRow(thisRow.getKey(), newCells));
        rowCount++;
        if (rowCount % 100 == 0) {
            exec.setProgress(rowCount / (double) numberRows, "Classifying... Row " + rowCount + " of " + numberRows);
        }
        exec.checkCanceled();
    }
    if (coveredPattern < nrPattern) {
        // let the user know that we did not store all available pattern
        // for HiLiting.
        this.setWarningMessage("Tree only stored first " + m_maxNumCoveredPattern.getIntValue() + " (of " + nrPattern + ") rows for HiLiting!");
    }
    outData.close();
    m_decTree = decTree;
    exec.setMessage("Decision Tree Predictor: end execution.");
    return new BufferedDataTable[] { outData.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLDecisionTreeTranslator(org.knime.base.node.mine.decisiontree2.PMMLDecisionTreeTranslator) DoubleCell(org.knime.core.data.def.DoubleCell) Node(org.w3c.dom.Node) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) DecisionTree(org.knime.base.node.mine.decisiontree2.model.DecisionTree) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 33 with PMMLPortObject

use of org.knime.core.node.port.pmml.PMMLPortObject in project knime-core by knime.

the class PMMLKNNNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inPorts, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inPorts[0];
    if (m_limitRows.getBooleanValue() && inTable.size() > m_maxNumRows.getIntValue()) {
        setWarningMessage("The number of records in the table (" + inTable.size() + ") exceeds the maximum record number of " + m_maxNumRows.getIntValue());
    }
    PMMLPortObject outPort = new PMMLPortObject(createSpec(inTable.getDataTableSpec()));
    // -1 means that rows are not limited at all
    int maxRows = -1;
    if (m_limitRows.getBooleanValue()) {
        maxRows = m_maxNumRows.getIntValue();
    }
    String[] includes = m_learningColumns.applyTo(inTable.getDataTableSpec()).getIncludes();
    if (includes.length == 0) {
        throw new InvalidSettingsException("No learning columns are selected.");
    }
    outPort.addModelTranslater(new PMMLKNNTranslator(inTable, maxRows, m_numNeighbors.getIntValue(), includes));
    return new PortObject[] { outPort };
}
Also used : PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject)

Example 34 with PMMLPortObject

use of org.knime.core.node.port.pmml.PMMLPortObject in project knime-core by knime.

the class MLPPredictorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
public PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable testdata = (BufferedDataTable) inData[1];
    PMMLPortObject pmmlPort = (PMMLPortObject) inData[0];
    ColumnRearranger colre = createColumnRearranger(pmmlPort, testdata.getDataTableSpec());
    BufferedDataTable bdt = exec.createColumnRearrangeTable(testdata, colre, exec);
    return new BufferedDataTable[] { bdt };
}
Also used : ColumnRearranger(org.knime.core.data.container.ColumnRearranger) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 35 with PMMLPortObject

use of org.knime.core.node.port.pmml.PMMLPortObject in project knime-core by knime.

the class CategoryToNumberNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    BufferedDataTable inData = (BufferedDataTable) inObjects[0];
    DataTableSpec inSpec = (DataTableSpec) inObjects[0].getSpec();
    ColumnRearranger rearranger = createRearranger(inSpec);
    BufferedDataTable outTable = exec.createColumnRearrangeTable(inData, rearranger, exec);
    // the optional PMML in port (can be null)
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inObjects[1] : null;
    PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(inPMMLPort, rearranger.createSpec());
    PMMLPortObject outPMMLPort = new PMMLPortObject(creator.createSpec(), inPMMLPort);
    for (CategoryToNumberCellFactory factory : m_factories) {
        PMMLMapValuesTranslator trans = new PMMLMapValuesTranslator(factory.getConfig(), new DerivedFieldMapper(inPMMLPort));
        outPMMLPort.addGlobalTransformations(trans.exportToTransDict());
    }
    return new PortObject[] { outTable, outPMMLPort };
}
Also used : DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Aggregations

PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)90 BufferedDataTable (org.knime.core.node.BufferedDataTable)53 DataTableSpec (org.knime.core.data.DataTableSpec)50 PortObject (org.knime.core.node.port.PortObject)47 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)35 PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)25 PMMLPortObjectSpecCreator (org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)21 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)18 ExecutionContext (org.knime.core.node.ExecutionContext)13 DataColumnSpec (org.knime.core.data.DataColumnSpec)12 DataRow (org.knime.core.data.DataRow)12 DerivedFieldMapper (org.knime.core.node.port.pmml.preproc.DerivedFieldMapper)11 StreamableOperator (org.knime.core.node.streamable.StreamableOperator)11 Node (org.w3c.dom.Node)10 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)9 IOException (java.io.IOException)8 DataCell (org.knime.core.data.DataCell)8 PMMLDocument (org.dmg.pmml.PMMLDocument)7 PMMLGeneralRegressionTranslator (org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionTranslator)7 StreamableFunction (org.knime.core.node.streamable.StreamableFunction)7