Search in sources :

Example 56 with PMMLPortObjectSpec

use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.

the class SVMLearnerNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec inSpec = (DataTableSpec) inSpecs[0];
    PMMLPortObjectSpec portSpec = m_pmmlInEnabled ? (PMMLPortObjectSpec) inSpecs[1] : null;
    LearnColumnsAndColumnRearrangerTuple tuple = createTrainTableColumnRearranger(inSpec);
    DataTableSpec trainSpec = tuple.getTrainingRearranger().createSpec();
    PMMLPortObjectSpecCreator pmmlcreate = new PMMLPortObjectSpecCreator(portSpec, trainSpec);
    pmmlcreate.setTargetCol(tuple.getTargetColumn());
    pmmlcreate.setLearningCols(tuple.getLearningColumns());
    return new PortObjectSpec[] { pmmlcreate.createSpec() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Example 57 with PMMLPortObjectSpec

use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.

the class SVMPredictorNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec testSpec = (DataTableSpec) inSpecs[1];
    PMMLPortObjectSpec trainingSpec = (PMMLPortObjectSpec) inSpecs[0];
    // try to find all columns (except the class column)
    Vector<Integer> colindices = new Vector<Integer>();
    for (DataColumnSpec colspec : trainingSpec.getLearningCols()) {
        if (colspec.getType().isCompatible(DoubleValue.class)) {
            int colindex = testSpec.findColumnIndex(colspec.getName());
            if (colindex < 0) {
                throw new InvalidSettingsException("Column " + "\'" + colspec.getName() + "\' not found" + " in test data");
            }
            colindices.add(colindex);
        }
    }
    final PredictorHelper predictorHelper = PredictorHelper.getInstance();
    return new DataTableSpec[] { predictorHelper.createOutTableSpec(testSpec, trainingSpec, m_addProbabilities.getBooleanValue(), m_predictionColumn.getStringValue(), m_overridePrediction.getBooleanValue(), m_suffix.getStringValue()) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) PredictorHelper(org.knime.base.node.mine.util.PredictorHelper) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) Vector(java.util.Vector)

Example 58 with PMMLPortObjectSpec

use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.

the class PredictorNodeDialog method extractTargetColumn.

/**
 * Updates the last target column property based on input. The default implementation assumes a
 * {@link PMMLPortObjectSpec} on the first {@code 0} port with a single target column name. Please override if this
 * is not the case.
 *
 * @param specs The {@link PortObjectSpec}s containing the model specific info.
 * @see #setLastTargetColumn(DataColumnSpec)
 */
protected void extractTargetColumn(final PortObjectSpec[] specs) {
    if (specs[MODEL_PORT] instanceof PMMLPortObjectSpec) {
        PMMLPortObjectSpec spec = (PMMLPortObjectSpec) specs[MODEL_PORT];
        setLastTargetColumn(spec.getTargetCols().iterator().next());
    } else if (specs[MODEL_PORT] == null) {
        setLastTargetColumn(null);
    } else {
        throw new IllegalStateException("Please implement this method properly for the class:\n" + this.getClass());
    }
}
Also used : PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec)

Example 59 with PMMLPortObjectSpec

use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.

the class RPropNodeModel method execute.

/**
 * The execution consists of three steps:
 * <ol>
 * <li>A neural network is build with the inputs and outputs according to
 * the input datatable, number of hidden layers as specified.</li>
 * <li>Input DataTables are converted into double-arrays so they can be
 * attached to the neural net.</li>
 * <li>The neural net is trained.</li>
 * </ol>
 *
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    // If class column is not set, it is the last column.
    DataTableSpec posSpec = (DataTableSpec) inData[INDATA].getSpec();
    if (m_classcol.getStringValue() == null) {
        m_classcol.setStringValue(posSpec.getColumnSpec(posSpec.getNumColumns() - 1).getName());
    }
    List<String> learningCols = new LinkedList<String>();
    List<String> targetCols = new LinkedList<String>();
    // Determine the number of inputs and the number of outputs. Make also
    // sure that the inputs are double values.
    int nrInputs = 0;
    int nrOutputs = 0;
    HashMap<String, Integer> inputmap = new HashMap<String, Integer>();
    HashMap<DataCell, Integer> classMap = new HashMap<DataCell, Integer>();
    for (DataColumnSpec colspec : posSpec) {
        // check for class column
        if (colspec.getName().toString().compareTo(m_classcol.getStringValue()) == 0) {
            targetCols.add(colspec.getName());
            if (colspec.getType().isCompatible(DoubleValue.class)) {
                // check if the values are in range [0,1]
                DataColumnDomain domain = colspec.getDomain();
                if (domain.hasBounds()) {
                    double lower = ((DoubleValue) domain.getLowerBound()).getDoubleValue();
                    double upper = ((DoubleValue) domain.getUpperBound()).getDoubleValue();
                    if (lower < 0 || upper > 1) {
                        throw new InvalidSettingsException("Domain range for regression in column " + colspec.getName() + " not in range [0,1]");
                    }
                }
                nrOutputs = 1;
                classMap = new HashMap<DataCell, Integer>();
                classMap.put(new StringCell(colspec.getName()), 0);
                m_regression = true;
            } else {
                m_regression = false;
                DataColumnDomain domain = colspec.getDomain();
                if (domain.hasValues()) {
                    Set<DataCell> allvalues = domain.getValues();
                    int outputneuron = 0;
                    classMap = new HashMap<DataCell, Integer>();
                    for (DataCell value : allvalues) {
                        classMap.put(value, outputneuron);
                        outputneuron++;
                    }
                    nrOutputs = allvalues.size();
                } else {
                    throw new Exception("Could not find domain values in" + "nominal column " + colspec.getName().toString());
                }
            }
        } else {
            if (!colspec.getType().isCompatible(DoubleValue.class)) {
                throw new Exception("Only double columns for input");
            }
            inputmap.put(colspec.getName(), nrInputs);
            learningCols.add(colspec.getName());
            nrInputs++;
        }
    }
    assert targetCols.size() == 1 : "Only one class column allowed.";
    m_architecture.setNrInputNeurons(nrInputs);
    m_architecture.setNrHiddenLayers(m_nrHiddenLayers.getIntValue());
    m_architecture.setNrHiddenNeurons(m_nrHiddenNeuronsperLayer.getIntValue());
    m_architecture.setNrOutputNeurons(nrOutputs);
    Random random = new Random();
    if (m_useRandomSeed.getBooleanValue()) {
        random.setSeed(m_randomSeed.getIntValue());
    }
    m_mlp = new MultiLayerPerceptron(m_architecture, random);
    if (m_regression) {
        m_mlp.setMode(MultiLayerPerceptron.REGRESSION_MODE);
    } else {
        m_mlp.setMode(MultiLayerPerceptron.CLASSIFICATION_MODE);
    }
    // Convert inputs to double arrays. Values from the class column are
    // encoded as bitvectors.
    int classColNr = posSpec.findColumnIndex(m_classcol.getStringValue());
    List<Double[]> samples = new ArrayList<Double[]>();
    List<Double[]> outputs = new ArrayList<Double[]>();
    Double[] sample = new Double[nrInputs];
    Double[] output = new Double[nrOutputs];
    final RowIterator rowIt = ((BufferedDataTable) inData[INDATA]).iterator();
    int rowcounter = 0;
    while (rowIt.hasNext()) {
        boolean add = true;
        output = new Double[nrOutputs];
        sample = new Double[nrInputs];
        DataRow row = rowIt.next();
        int nrCells = row.getNumCells();
        int index = 0;
        for (int i = 0; i < nrCells; i++) {
            if (i != classColNr) {
                if (!row.getCell(i).isMissing()) {
                    DoubleValue dc = (DoubleValue) row.getCell(i);
                    sample[index] = dc.getDoubleValue();
                    index++;
                } else {
                    if (m_ignoreMV.getBooleanValue()) {
                        add = false;
                        break;
                    } else {
                        throw new Exception("Missing values in input" + " datatable");
                    }
                }
            } else {
                if (row.getCell(i).isMissing()) {
                    add = false;
                    if (!m_ignoreMV.getBooleanValue()) {
                        throw new Exception("Missing value in class" + " column");
                    }
                    break;
                }
                if (m_regression) {
                    DoubleValue dc = (DoubleValue) row.getCell(i);
                    output[0] = dc.getDoubleValue();
                } else {
                    for (int j = 0; j < nrOutputs; j++) {
                        if (classMap.get(row.getCell(i)) == j) {
                            output[j] = new Double(1.0);
                        } else {
                            output[j] = new Double(0.0);
                        }
                    }
                }
            }
        }
        if (add) {
            samples.add(sample);
            outputs.add(output);
            rowcounter++;
        }
    }
    Double[][] samplesarr = new Double[rowcounter][nrInputs];
    Double[][] outputsarr = new Double[rowcounter][nrInputs];
    for (int i = 0; i < samplesarr.length; i++) {
        samplesarr[i] = samples.get(i);
        outputsarr[i] = outputs.get(i);
    }
    // Now finally train the network.
    m_mlp.setClassMapping(classMap);
    m_mlp.setInputMapping(inputmap);
    RProp myrprop = new RProp();
    m_errors = new double[m_nrIterations.getIntValue()];
    for (int iteration = 0; iteration < m_nrIterations.getIntValue(); iteration++) {
        exec.setProgress((double) iteration / (double) m_nrIterations.getIntValue(), "Iteration " + iteration);
        myrprop.train(m_mlp, samplesarr, outputsarr);
        double error = 0;
        for (int j = 0; j < outputsarr.length; j++) {
            double[] myoutput = m_mlp.output(samplesarr[j]);
            for (int o = 0; o < outputsarr[0].length; o++) {
                error += (myoutput[o] - outputsarr[j][o]) * (myoutput[o] - outputsarr[j][o]);
            }
        }
        m_errors[iteration] = error;
        exec.checkCanceled();
    }
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[INMODEL] : null;
    PMMLPortObjectSpec inPMMLSpec = null;
    if (inPMMLPort != null) {
        inPMMLSpec = inPMMLPort.getSpec();
    }
    PMMLPortObjectSpec outPortSpec = createPMMLPortObjectSpec(inPMMLSpec, posSpec, learningCols, targetCols);
    PMMLPortObject outPMMLPort = new PMMLPortObject(outPortSpec, inPMMLPort, posSpec);
    outPMMLPort.addModelTranslater(new PMMLNeuralNetworkTranslator(m_mlp));
    return new PortObject[] { outPMMLPort };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) HashMap(java.util.HashMap) PMMLNeuralNetworkTranslator(org.knime.base.node.mine.neural.mlp2.PMMLNeuralNetworkTranslator) ArrayList(java.util.ArrayList) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) Random(java.util.Random) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject) LinkedList(java.util.LinkedList) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) IOException(java.io.IOException) MultiLayerPerceptron(org.knime.base.data.neural.MultiLayerPerceptron) SettingsModelInteger(org.knime.core.node.defaultnodesettings.SettingsModelInteger) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) RProp(org.knime.base.data.neural.methods.RProp)

Example 60 with PMMLPortObjectSpec

use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.

the class ClusterNodeModel method execute.

/**
 * Generate new clustering based on InputDataTable and specified number of
 * clusters. Currently the objective function only looks for cluster centers
 * that are extremely similar to the first n patterns...
 *
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] data, final ExecutionContext exec) throws Exception {
    // FIXME actually do something useful with missing values!
    BufferedDataTable inData = (BufferedDataTable) data[0];
    DataTableSpec spec = inData.getDataTableSpec();
    // get dimension of feature space
    m_dimension = inData.getDataTableSpec().getNumColumns();
    HashMap<RowKey, Set<RowKey>> mapping = new HashMap<RowKey, Set<RowKey>>();
    addExcludeColumnsToIgnoreList(spec);
    double[][] clusters = initializeClusters(inData);
    // also keep counts of how many patterns fall in a specific cluster
    int[] clusterCoverage = new int[m_nrOfClusters.getIntValue()];
    // --------- create clusters --------------
    // reserve space for cluster center updates (do batch update!)
    double[][] delta = new double[m_nrOfClusters.getIntValue()][];
    for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
        delta[c] = new double[m_dimension - m_nrIgnoredColumns];
    }
    // main loop - until clusters stop changing or maxNrIterations reached
    int currentIteration = 0;
    boolean finished = false;
    while ((!finished) && (currentIteration < m_nrMaxIterations.getIntValue())) {
        exec.checkCanceled();
        exec.setProgress((double) currentIteration / (double) m_nrMaxIterations.getIntValue(), "Iteration " + currentIteration);
        // initialize counts and cluster-deltas
        for (int c = 0; c < m_nrOfClusters.getIntValue(); c++) {
            clusterCoverage[c] = 0;
            delta[c] = new double[m_dimension - m_nrIgnoredColumns];
            int deltaPos = 0;
            for (int i = 0; i < m_dimension; i++) {
                if (!m_ignoreColumn[i]) {
                    delta[c][deltaPos++] = 0.0;
                }
            }
        }
        // assume that we are done (i.e. clusters have stopped changing)
        finished = true;
        // first training example
        RowIterator rowIt = inData.iterator();
        while (rowIt.hasNext()) {
            DataRow currentRow = rowIt.next();
            int winner = findClosestPrototypeFor(currentRow, clusters);
            if (winner >= 0) {
                // update winning cluster centers delta
                int deltaPos = 0;
                for (int i = 0; i < m_dimension; i++) {
                    DataCell currentCell = currentRow.getCell(i);
                    if (!m_ignoreColumn[i]) {
                        if (!currentCell.isMissing()) {
                            delta[winner][deltaPos] += ((DoubleValue) (currentCell)).getDoubleValue();
                        } else {
                            throw new Exception("Missing Values not (yet) allowed in k-Means.");
                        }
                        deltaPos++;
                    }
                }
                clusterCoverage[winner]++;
            } else {
                // let's report this during
                assert (winner >= 0);
                // otherwise just don't reproduce result
                throw new IllegalStateException("No winner found: " + winner);
            }
        }
        // update cluster centers
        finished = updateClusterCenters(clusterCoverage, clusters, delta);
        currentIteration++;
    }
    // while(!finished & nrIt<maxNrIt)
    // create list of feature names
    // index of not-ignored columns
    int k = 0;
    // index of column
    int j = 0;
    String[] featureNames = new String[m_dimension];
    do {
        if (!m_ignoreColumn[j]) {
            featureNames[k] = spec.getColumnSpec(j).getName();
            k++;
        }
        j++;
    } while (j < m_dimension);
    // create output container and also mapping for HiLiteing
    BufferedDataContainer labeledInput = exec.createDataContainer(createAppendedSpec(spec));
    for (DataRow row : inData) {
        int winner = findClosestPrototypeFor(row, clusters);
        DataCell cell = new StringCell(CLUSTER + winner);
        labeledInput.addRowToTable(new AppendedColumnRow(row, cell));
        if (m_enableHilite.getBooleanValue()) {
            RowKey key = new RowKey(CLUSTER + winner);
            if (mapping.get(key) == null) {
                Set<RowKey> set = new HashSet<RowKey>();
                set.add(row.getKey());
                mapping.put(key, set);
            } else {
                mapping.get(key).add(row.getKey());
            }
        }
    }
    labeledInput.close();
    if (m_enableHilite.getBooleanValue()) {
        m_translator.setMapper(new DefaultHiLiteMapper(mapping));
    }
    BufferedDataTable outData = labeledInput.getTable();
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) data[1] : null;
    PMMLPortObjectSpec inPMMLSpec = null;
    if (inPMMLPort != null) {
        inPMMLSpec = inPMMLPort.getSpec();
    }
    PMMLPortObjectSpec pmmlOutSpec = createPMMLSpec(inPMMLSpec, spec);
    PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
    Set<String> columns = new LinkedHashSet<String>();
    for (String s : pmmlOutSpec.getLearningFields()) {
        columns.add(s);
    }
    outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrOfClusters.getIntValue(), clusters, clusterCoverage, columns));
    m_viewData = new ClusterViewData(clusters, clusterCoverage, m_dimension - m_nrIgnoredColumns, featureNames);
    if (m_outputCenters) {
        DataContainer clusterCenterContainer = exec.createDataContainer(createClusterCentersSpec(spec));
        int i = 0;
        for (double[] cluster : clusters) {
            List<DataCell> cells = new ArrayList<>();
            for (double d : cluster) {
                cells.add(new DoubleCell(d));
            }
            clusterCenterContainer.addRowToTable(new DefaultRow(new RowKey(PMMLClusterTranslator.CLUSTER_NAME_PREFIX + i++), cells));
        }
        clusterCenterContainer.close();
        return new PortObject[] { outData, (BufferedDataTable) clusterCenterContainer.getTable(), outPMMLPort };
    } else {
        return new PortObject[] { outData, outPMMLPort };
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) PMMLClusterTranslator(org.knime.base.node.mine.cluster.PMMLClusterTranslator) BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) AppendedColumnRow(org.knime.core.data.append.AppendedColumnRow) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) StringCell(org.knime.core.data.def.StringCell) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)77 DataTableSpec (org.knime.core.data.DataTableSpec)57 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)40 DataColumnSpec (org.knime.core.data.DataColumnSpec)31 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)30 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)23 PMMLPortObjectSpecCreator (org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)23 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)22 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)20 BufferedDataTable (org.knime.core.node.BufferedDataTable)15 PortObject (org.knime.core.node.port.PortObject)12 DataCell (org.knime.core.data.DataCell)10 DoubleValue (org.knime.core.data.DoubleValue)10 DataRow (org.knime.core.data.DataRow)8 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)7 ArrayList (java.util.ArrayList)6 LinkedList (java.util.LinkedList)6 DataColumnDomain (org.knime.core.data.DataColumnDomain)6 DoubleCell (org.knime.core.data.def.DoubleCell)6 IOException (java.io.IOException)4