Search in sources :

Example 31 with PMMLPortObjectSpec

use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.

the class PolyRegLearnerNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec tableSpec = (DataTableSpec) inSpecs[0];
    PMMLPortObjectSpec pmmlSpec = m_pmmlInEnabled ? (PMMLPortObjectSpec) inSpecs[1] : null;
    String[] selectedCols = computeSelectedColumns(tableSpec);
    m_columnNames = selectedCols;
    for (String colName : selectedCols) {
        DataColumnSpec dcs = tableSpec.getColumnSpec(colName);
        if (dcs == null) {
            throw new InvalidSettingsException("Selected column '" + colName + "' does not exist in input table");
        }
        if (!dcs.getType().isCompatible(DoubleValue.class)) {
            throw new InvalidSettingsException("Selected column '" + dcs.getName() + "' from the input table is not a numeric column.");
        }
    }
    if (m_settings.getTargetColumn() == null) {
        throw new InvalidSettingsException("No target column selected");
    }
    if (tableSpec.findColumnIndex(m_settings.getTargetColumn()) == -1) {
        throw new InvalidSettingsException("Target column '" + m_settings.getTargetColumn() + "' does not exist.");
    }
    DataColumnSpecCreator crea = new DataColumnSpecCreator("PolyReg prediction", DoubleCell.TYPE);
    DataColumnSpec col1 = crea.createSpec();
    crea = new DataColumnSpecCreator("Prediction Error", DoubleCell.TYPE);
    DataColumnSpec col2 = crea.createSpec();
    return new PortObjectSpec[] { createModelSpec(pmmlSpec, tableSpec), AppendedColumnTable.getTableSpec(tableSpec, col1, col2), STATS_SPEC };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec)

Example 32 with PMMLPortObjectSpec

use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.

the class PolyRegLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getDataTableSpec();
    final int colCount = inSpec.getNumColumns();
    String[] selectedCols = computeSelectedColumns(inSpec);
    Set<String> hash = new HashSet<String>(Arrays.asList(selectedCols));
    m_colSelected = new boolean[colCount];
    for (int i = 0; i < colCount; i++) {
        m_colSelected[i] = hash.contains(inTable.getDataTableSpec().getColumnSpec(i).getName());
    }
    final int rowCount = inTable.getRowCount();
    String[] temp = new String[m_columnNames.length + 1];
    System.arraycopy(m_columnNames, 0, temp, 0, m_columnNames.length);
    temp[temp.length - 1] = m_settings.getTargetColumn();
    FilterColumnTable filteredTable = new FilterColumnTable(inTable, temp);
    final DataArray rowContainer = new DefaultDataArray(filteredTable, 1, m_settings.getMaxRowsForView());
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[1] : null;
    PortObjectSpec[] outputSpec = configure((inPMMLPort == null) ? new PortObjectSpec[] { inData[0].getSpec(), null } : new PortObjectSpec[] { inData[0].getSpec(), inPMMLPort.getSpec() });
    Learner learner = new Learner((PMMLPortObjectSpec) outputSpec[0], 0d, m_settings.getMissingValueHandling() == MissingValueHandling.fail, m_settings.getDegree());
    try {
        PolyRegContent polyRegContent = learner.perform(inTable, exec);
        m_betas = fillBeta(polyRegContent);
        m_meanValues = polyRegContent.getMeans();
        ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
        crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
        PortObject[] bdt = new PortObject[] { createPMMLModel(inPMMLPort, inSpec), exec.createColumnRearrangeTable(inTable, crea, exec.createSilentSubExecutionContext(.2)), polyRegContent.createTablePortObject(exec.createSubExecutionContext(0.2)) };
        m_squaredError /= rowCount;
        if (polyRegContent.getWarningMessage() != null) {
            setWarningMessage(polyRegContent.getWarningMessage());
        }
        double[] stdErrors = PolyRegViewData.mapToArray(polyRegContent.getStandardErrors(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptStdErr());
        double[] tValues = PolyRegViewData.mapToArray(polyRegContent.getTValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptTValue());
        double[] pValues = PolyRegViewData.mapToArray(polyRegContent.getPValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptPValue());
        m_viewData = new PolyRegViewData(m_meanValues, m_betas, stdErrors, tValues, pValues, m_squaredError, polyRegContent.getAdjustedRSquared(), m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
        return bdt;
    } catch (ModelSpecificationException e) {
        final String origWarning = getWarningMessage();
        final String warning = (origWarning != null && !origWarning.isEmpty()) ? (origWarning + "\n") : "" + e.getMessage();
        setWarningMessage(warning);
        final ExecutionContext subExec = exec.createSubExecutionContext(.1);
        final BufferedDataContainer empty = subExec.createDataContainer(STATS_SPEC);
        int rowIdx = 1;
        for (final String column : m_columnNames) {
            for (int d = 1; d <= m_settings.getDegree(); ++d) {
                empty.addRowToTable(new DefaultRow("Row" + rowIdx++, new StringCell(column), new IntCell(d), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
            }
        }
        empty.addRowToTable(new DefaultRow("Row" + rowIdx, new StringCell("Intercept"), new IntCell(0), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
        double[] nans = new double[m_columnNames.length * m_settings.getDegree() + 1];
        Arrays.fill(nans, Double.NaN);
        m_betas = new double[nans.length];
        // Mean only for the linear tags
        m_meanValues = new double[nans.length / m_settings.getDegree()];
        m_viewData = new PolyRegViewData(m_meanValues, m_betas, nans, nans, nans, m_squaredError, Double.NaN, m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
        empty.close();
        ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
        crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
        BufferedDataTable rearrangerTable = exec.createColumnRearrangeTable(inTable, crea, exec.createSubProgress(0.6));
        PMMLPortObject model = createPMMLModel(inPMMLPort, inTable.getDataTableSpec());
        PortObject[] bdt = new PortObject[] { model, rearrangerTable, empty.getTable() };
        return bdt;
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) DoubleCell(org.knime.core.data.def.DoubleCell) FilterColumnTable(org.knime.base.data.filter.column.FilterColumnTable) DataArray(org.knime.base.node.util.DataArray) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ModelSpecificationException(org.apache.commons.math3.stat.regression.ModelSpecificationException) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) HashSet(java.util.HashSet) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) StringCell(org.knime.core.data.def.StringCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 33 with PMMLPortObjectSpec

use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.

the class ClusterAssignerNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    PMMLPortObjectSpec spec = ((PMMLPortObjectSpec) inSpecs[PMML_PORT]);
    DataTableSpec dataSpec = (DataTableSpec) inSpecs[DATA_PORT];
    ColumnRearranger colre = new ColumnRearranger(dataSpec);
    colre.append(new ClusterAssignFactory(null, null, createNewOutSpec(dataSpec), findLearnedColumnIndices(dataSpec, new HashSet<String>(spec.getLearningFields()))));
    DataTableSpec out = colre.createSpec();
    return new DataTableSpec[] { out };
}
Also used : PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger)

Example 34 with PMMLPortObjectSpec

use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.

the class ClusterNodeModel method configure.

/**
 * Returns <code>true</code> always and passes the current input spec to
 * the output spec which is identical to the input specification - after
 * all, we are building cluster centers in the original feature space.
 *
 * @param inSpecs the specifications of the input port(s) - should be one
 * @return the copied input spec
 * @throws InvalidSettingsException if PMML incompatible type was found
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec spec = (DataTableSpec) inSpecs[0];
    // input is output spec with all double compatible values set to
    // Double.
    m_dimension = spec.getNumColumns();
    // Find out which columns we can use (must be Double compatible)
    // Note that, for simplicity, we still use the entire dimensionality
    // for cluster prototypes below and simply ignore useless columns.
    m_ignoreColumn = new boolean[m_dimension];
    m_nrIgnoredColumns = 0;
    LinkedList<String> includes = new LinkedList<String>();
    includes.addAll(m_usedColumns.getIncludeList());
    LinkedList<String> excludes = new LinkedList<String>();
    excludes.addAll(m_usedColumns.getExcludeList());
    LinkedList<String> includes2 = new LinkedList<String>();
    includes2.addAll(m_usedColumns.getIncludeList());
    LinkedList<String> excludes2 = new LinkedList<String>();
    excludes2.addAll(m_usedColumns.getExcludeList());
    // First check if all incoming columns are either excluded or included
    for (String col : spec.getColumnNames()) {
        if (m_usedColumns.getIncludeList().contains(col)) {
            includes2.remove(col);
        } else if (m_usedColumns.getExcludeList().contains(col)) {
            excludes2.remove(col);
        } else {
            includes.add(col);
        }
    }
    // Leftover included columns that do not exist in the incoming table
    for (String col : includes2) {
        includes.remove(col);
    }
    // Same for excluded columns
    for (String col : excludes2) {
        excludes.remove(col);
    }
    m_usedColumns.setExcludeList(excludes);
    m_usedColumns.setIncludeList(includes);
    if (m_usedColumns.isKeepAllSelected()) {
        boolean hasNumericColumn = false;
        for (DataColumnSpec colSpec : spec) {
            if (colSpec.getType().isCompatible(DoubleValue.class)) {
                hasNumericColumn = true;
                break;
            }
        }
        if (!hasNumericColumn) {
            throw new InvalidSettingsException("No numeric columns in input");
        }
    } else {
        // double compatible columns
        if (m_usedColumns.getIncludeList().size() == 0 && m_usedColumns.getExcludeList().size() == 0) {
            List<String> includedColumns = new ArrayList<String>();
            List<String> excludedColumns = new ArrayList<String>();
            for (int i = 0; i < spec.getNumColumns(); i++) {
                DataColumnSpec colSpec = spec.getColumnSpec(i);
                if (colSpec.getType().isCompatible(DoubleValue.class)) {
                    includedColumns.add(colSpec.getName());
                } else {
                    excludedColumns.add(colSpec.getName());
                }
            }
            // set all double compatible columns as include list
            m_usedColumns.setIncludeList(includedColumns);
            m_usedColumns.setExcludeList(excludedColumns);
        }
        // check if some columns are included
        if (m_usedColumns.getIncludeList().size() <= 0) {
            setWarningMessage("No column in include list! Produces one huge cluster");
        }
    }
    addExcludeColumnsToIgnoreList(spec);
    DataTableSpec appendedSpec = createAppendedSpec(spec);
    // return spec for data and model outport!
    PMMLPortObjectSpec pmmlSpec;
    if (m_pmmlInEnabled) {
        pmmlSpec = (PMMLPortObjectSpec) inSpecs[1];
    } else {
        pmmlSpec = new PMMLPortObjectSpecCreator(spec).createSpec();
    }
    if (m_outputCenters) {
        return new PortObjectSpec[] { appendedSpec, createClusterCentersSpec(spec), createPMMLSpec(pmmlSpec, spec) };
    } else {
        return new PortObjectSpec[] { appendedSpec, createPMMLSpec(pmmlSpec, spec) };
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) ArrayList(java.util.ArrayList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) LinkedList(java.util.LinkedList) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Example 35 with PMMLPortObjectSpec

use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.

the class FuzzyClusterNodeModel method execute.

/**
 * Generate new clustering based on InputDataTable and specified number of
 * clusters. In the output table, you will find the datarow with
 * supplementary information about the membership to each cluster center.
 * OUTPORT = original datarows with cluster membership information
 *
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable indata = (BufferedDataTable) inData[0];
    m_clusters = null;
    m_betweenClusterVariation = Double.NaN;
    m_withinClusterVariation = null;
    if (m_noise) {
        if (m_calculateDelta) {
            if (m_memory) {
                m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
            } else {
                m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
            }
        } else {
            if (m_memory) {
                m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
            } else {
                m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
            }
        }
    } else {
        if (m_memory) {
            m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier);
        } else {
            m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier);
        }
    }
    int nrRows = indata.getRowCount();
    DataTableSpec spec = indata.getDataTableSpec();
    int nrCols = spec.getNumColumns();
    List<String> learningCols = new LinkedList<String>();
    List<String> ignoreCols = new LinkedList<String>();
    // counter for included columns
    int z = 0;
    final int[] columns = new int[m_list.size()];
    for (int i = 0; i < nrCols; i++) {
        // if include does contain current column name
        String colname = spec.getColumnSpec(i).getName();
        if (m_list.contains(colname)) {
            columns[z] = i;
            z++;
            learningCols.add(colname);
        } else {
            ignoreCols.add(colname);
        }
    }
    ColumnRearranger colre = new ColumnRearranger(spec);
    colre.keepOnly(columns);
    BufferedDataTable filteredtable = exec.createColumnRearrangeTable(indata, colre, exec);
    // get dimension of feature space
    int dimension = filteredtable.getDataTableSpec().getNumColumns();
    Random random = new Random();
    if (m_useRandomSeed) {
        random.setSeed(m_randomSeed);
    }
    m_fcmAlgo.init(nrRows, dimension, filteredtable, random);
    // main loop - until clusters stop changing or maxNrIterations reached
    int currentIteration = 0;
    double totalchange = Double.MAX_VALUE;
    while ((totalchange > 1e-7) && (currentIteration < m_maxNrIterations)) {
        exec.checkCanceled();
        exec.setProgress((double) currentIteration / (double) m_maxNrIterations, "Iteration " + currentIteration + " Total change of prototypes: " + totalchange);
        totalchange = m_fcmAlgo.doOneIteration(exec);
        currentIteration++;
    }
    if (m_measures) {
        double[][] data = null;
        if (m_fcmAlgo instanceof FCMAlgorithmMemory) {
            data = ((FCMAlgorithmMemory) m_fcmAlgo).getConvertedData();
        } else {
            data = new double[nrRows][m_fcmAlgo.getDimension()];
            int curRow = 0;
            for (DataRow dRow : filteredtable) {
                for (int j = 0; j < dRow.getNumCells(); j++) {
                    if (!(dRow.getCell(j).isMissing())) {
                        DoubleValue dv = (DoubleValue) dRow.getCell(j);
                        data[curRow][j] = dv.getDoubleValue();
                    } else {
                        data[curRow][j] = 0;
                    }
                }
                curRow++;
            }
        }
        m_fcmmeasures = new FCMQualityMeasures(m_fcmAlgo.getClusterCentres(), m_fcmAlgo.getweightMatrix(), data, m_fuzzifier);
    }
    ColumnRearranger colRearranger = new ColumnRearranger(spec);
    CellFactory membershipFac = new ClusterMembershipFactory(m_fcmAlgo);
    colRearranger.append(membershipFac);
    BufferedDataTable result = exec.createColumnRearrangeTable(indata, colRearranger, exec);
    // don't write out the noise cluster!
    double[][] clustercentres = m_fcmAlgo.getClusterCentres();
    if (m_noise) {
        double[][] cleaned = new double[clustercentres.length - 1][];
        for (int i = 0; i < cleaned.length; i++) {
            cleaned[i] = new double[clustercentres[i].length];
            System.arraycopy(clustercentres[i], 0, cleaned[i], 0, clustercentres[i].length);
        }
        clustercentres = cleaned;
    }
    exec.setMessage("Creating PMML cluster model...");
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_enablePMMLInput ? (PMMLPortObject) inData[1] : null;
    PMMLPortObjectSpec inPMMLSpec = null;
    if (inPMMLPort != null) {
        inPMMLSpec = inPMMLPort.getSpec();
    }
    PMMLPortObjectSpec pmmlOutSpec = createPMMLPortObjectSpec(inPMMLSpec, spec, learningCols);
    PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
    outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrClusters, clustercentres, null, new LinkedHashSet<String>(pmmlOutSpec.getLearningFields())));
    return new PortObject[] { result, outPMMLPort };
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataRow(org.knime.core.data.DataRow) LinkedList(java.util.LinkedList) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) Random(java.util.Random) PMMLClusterTranslator(org.knime.base.node.mine.cluster.PMMLClusterTranslator) DoubleValue(org.knime.core.data.DoubleValue) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable) CellFactory(org.knime.core.data.container.CellFactory) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject)

Aggregations

PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)77 DataTableSpec (org.knime.core.data.DataTableSpec)57 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)40 DataColumnSpec (org.knime.core.data.DataColumnSpec)31 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)30 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)23 PMMLPortObjectSpecCreator (org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)23 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)22 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)20 BufferedDataTable (org.knime.core.node.BufferedDataTable)15 PortObject (org.knime.core.node.port.PortObject)12 DataCell (org.knime.core.data.DataCell)10 DoubleValue (org.knime.core.data.DoubleValue)10 DataRow (org.knime.core.data.DataRow)8 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)7 ArrayList (java.util.ArrayList)6 LinkedList (java.util.LinkedList)6 DataColumnDomain (org.knime.core.data.DataColumnDomain)6 DoubleCell (org.knime.core.data.def.DoubleCell)6 IOException (java.io.IOException)4