Search in sources :

Example 1 with PMMLPPCell

use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell in project knime-core by knime.

the class RegressionContent method createGeneralRegressionContent.

/**
 * Creates a new PMML General Regression Content from this linear regression model.
 *
 * @return the PMMLGeneralRegressionContent
 */
public PMMLGeneralRegressionContent createGeneralRegressionContent() {
    List<PMMLPredictor> factors = new ArrayList<PMMLPredictor>();
    for (String factor : m_factorList) {
        PMMLPredictor predictor = new PMMLPredictor(factor);
        factors.add(predictor);
    }
    List<PMMLPredictor> covariates = new ArrayList<PMMLPredictor>();
    for (String covariate : m_covariateList) {
        PMMLPredictor predictor = new PMMLPredictor(covariate);
        covariates.add(predictor);
    }
    // the ParameterList, the PPMatrix and the ParamMatrix
    List<PMMLParameter> parameterList = new ArrayList<PMMLParameter>();
    List<PMMLPPCell> ppMatrix = new ArrayList<PMMLPPCell>();
    List<PMMLPCell> paramMatrix = new ArrayList<PMMLPCell>();
    int p = 0;
    if (m_includeConstant) {
        // Define the intercept
        parameterList.add(new PMMLParameter("p" + p, "Intercept"));
        paramMatrix.add(new PMMLPCell("p" + p, m_beta.getEntry(0, 0), 1));
        p++;
    }
    for (String colName : m_outSpec.getLearningFields()) {
        if (m_factorList.contains(colName)) {
            Iterator<DataCell> designIter = m_factorDomainValues.get(colName).iterator();
            if (!designIter.hasNext()) {
                continue;
            }
            // Omit first
            designIter.next();
            while (designIter.hasNext()) {
                DataCell dvValue = designIter.next();
                String pName = "p" + p;
                parameterList.add(new PMMLParameter(pName, "[" + colName + "=" + dvValue + "]"));
                ppMatrix.add(new PMMLPPCell(dvValue.toString(), colName, pName));
                paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
                p++;
            }
        } else {
            String pName = "p" + p;
            parameterList.add(new PMMLParameter("p" + p, colName));
            ppMatrix.add(new PMMLPPCell("1", colName, pName));
            paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
            p++;
        }
    }
    List<PMMLPCovCell> pCovMatrix = new ArrayList<PMMLPCovCell>();
    PMMLGeneralRegressionContent content = new PMMLGeneralRegressionContent(ModelType.generalLinear, "KNIME Linear Regression", FunctionName.regression, "LinearRegression", parameterList.toArray(new PMMLParameter[0]), factors.toArray(new PMMLPredictor[0]), covariates.toArray(new PMMLPredictor[0]), ppMatrix.toArray(new PMMLPPCell[0]), pCovMatrix.toArray(new PMMLPCovCell[0]), paramMatrix.toArray(new PMMLPCell[0]));
    if (!m_includeConstant) {
        content.setOffsetValue(m_offsetValue);
    }
    return content;
}
Also used : PMMLPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCell) PMMLParameter(org.knime.base.node.mine.regression.pmmlgreg.PMMLParameter) PMMLPCovCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCovCell) ArrayList(java.util.ArrayList) PMMLPredictor(org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor) PMMLGeneralRegressionContent(org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionContent) PMMLPPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell) DataCell(org.knime.core.data.DataCell)

Example 2 with PMMLPPCell

use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell in project knime-core by knime.

the class PolyRegContent method createGeneralRegressionContent.

/**
 * Creates a new PMML General Regression Content from this polynomial regression model.
 *
 * @return the PMMLGeneralRegressionContent
 */
@Override
public PMMLGeneralRegressionContent createGeneralRegressionContent() {
    List<PMMLPredictor> factors = new ArrayList<PMMLPredictor>();
    for (String factor : m_factorList) {
        PMMLPredictor predictor = new PMMLPredictor(factor);
        factors.add(predictor);
    }
    List<PMMLPredictor> covariates = new ArrayList<PMMLPredictor>();
    for (String covariate : m_covariateList) {
        PMMLPredictor predictor = new PMMLPredictor(covariate);
        covariates.add(predictor);
    }
    // the ParameterList, the PPMatrix and the ParamMatrix
    List<PMMLParameter> parameterList = new ArrayList<PMMLParameter>();
    List<PMMLPPCell> ppMatrix = new ArrayList<PMMLPPCell>();
    List<PMMLPCell> paramMatrix = new ArrayList<PMMLPCell>();
    int p = 0;
    if (m_includeConstant) {
        // Define the intercept
        parameterList.add(new PMMLParameter("p" + p, "Intercept"));
        paramMatrix.add(new PMMLPCell("p" + p, m_beta.getEntry(0, 0), 1));
        p++;
    }
    for (String colName : m_outSpec.getLearningFields()) {
        if (m_factorList.contains(colName)) {
            Iterator<DataCell> designIter = m_factorDomainValues.get(colName).iterator();
            // Omit first
            designIter.next();
            while (designIter.hasNext()) {
                DataCell dvValue = designIter.next();
                String pName = "p" + p;
                parameterList.add(new PMMLParameter(pName, "[" + colName + "=" + dvValue + "]"));
                ppMatrix.add(new PMMLPPCell(dvValue.toString(), colName, pName));
                paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
                p++;
            }
        } else {
            String pName = "p" + p;
            parameterList.add(new PMMLParameter(pName, colName));
            ppMatrix.add(new PMMLPPCell("1", colName, pName));
            paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
            p++;
        }
    }
    List<PMMLPCovCell> pCovMatrix = new ArrayList<PMMLPCovCell>();
    PMMLGeneralRegressionContent content = new PMMLGeneralRegressionContent(ModelType.generalLinear, "KNIME Polynomial Regression", FunctionName.regression, "LinearRegression", parameterList.toArray(new PMMLParameter[0]), factors.toArray(new PMMLPredictor[0]), covariates.toArray(new PMMLPredictor[0]), ppMatrix.toArray(new PMMLPPCell[0]), pCovMatrix.toArray(new PMMLPCovCell[0]), paramMatrix.toArray(new PMMLPCell[0]));
    if (!m_includeConstant) {
        content.setOffsetValue(m_offsetValue);
    }
    return content;
}
Also used : PMMLPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCell) PMMLParameter(org.knime.base.node.mine.regression.pmmlgreg.PMMLParameter) PMMLPCovCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCovCell) ArrayList(java.util.ArrayList) PMMLPredictor(org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor) PMMLGeneralRegressionContent(org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionContent) PMMLPPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell) DataCell(org.knime.core.data.DataCell)

Example 3 with PMMLPPCell

use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell in project knime-core by knime.

the class LogisticRegressionContent method createGeneralRegressionContent.

/**
 * Creates a new PMML General Regression Content from this logistic
 * regression model.
 * @return the PMMLGeneralRegressionContent
 */
public PMMLGeneralRegressionContent createGeneralRegressionContent() {
    List<PMMLPredictor> factors = new ArrayList<PMMLPredictor>();
    for (String factor : m_factorList) {
        PMMLPredictor predictor = new PMMLPredictor(factor);
        factors.add(predictor);
    }
    List<PMMLPredictor> covariates = new ArrayList<PMMLPredictor>();
    for (String covariate : m_covariateList) {
        PMMLPredictor predictor = new PMMLPredictor(covariate);
        covariates.add(predictor);
    }
    // the ParameterList, the PPMatrix and the ParamMatrix
    List<PMMLParameter> parameterList = new ArrayList<PMMLParameter>();
    List<PMMLPPCell> ppMatrix = new ArrayList<PMMLPPCell>();
    List<PMMLPCell> paramMatrix = new ArrayList<PMMLPCell>();
    int pCount = m_beta.getColumnDimension() / (m_targetCategories.size() - 1);
    int p = 0;
    parameterList.add(new PMMLParameter("p" + p, "Intercept"));
    for (int k = 0; k < m_targetCategories.size() - 1; k++) {
        paramMatrix.add(new PMMLPCell("p" + p, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
    }
    p++;
    final List<String> learningFields = new ArrayList<>(m_outSpec.getLearningFields());
    // learningFields.addAll(m_vectorLengths.keySet());
    for (String colName : learningFields) {
        if (m_factorList.contains(colName)) {
            Iterator<DataCell> designIter = m_factorDomainValues.get(colName).iterator();
            // Omit first
            designIter.next();
            while (designIter.hasNext()) {
                DataCell dvValue = designIter.next();
                String pName = "p" + p;
                parameterList.add(new PMMLParameter(pName, "[" + colName + "=" + dvValue + "]"));
                ppMatrix.add(new PMMLPPCell(dvValue.toString(), colName, pName));
                for (int k = 0; k < m_targetCategories.size() - 1; k++) {
                    paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
                }
                p++;
            }
        } else {
            if (m_vectorLengths.containsKey(colName)) {
                final int length = m_vectorLengths.get(colName);
                final int pFrozen = p;
                for (int idx = 0; idx < length; ++idx) {
                    final String pName = "p" + pFrozen + "_" + idx;
                    final String predictorName = VectorHandling.valueAt(colName, idx);
                    parameterList.add(new PMMLParameter(pName, predictorName));
                    ppMatrix.add(new PMMLPPCell("1", predictorName, pName));
                    for (int k = 0; k < m_targetCategories.size() - 1; k++) {
                        paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
                    }
                    p++;
                }
            } else {
                String pName = "p" + p;
                parameterList.add(new PMMLParameter("p" + p, colName));
                ppMatrix.add(new PMMLPPCell("1", colName, pName));
                for (int k = 0; k < m_targetCategories.size() - 1; k++) {
                    paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
                }
                p++;
            }
        }
    }
    // TODO PCovMatrix
    List<PMMLPCovCell> pCovMatrix = new ArrayList<PMMLPCovCell>();
    PMMLGeneralRegressionContent content = new PMMLGeneralRegressionContent(ModelType.multinomialLogistic, "KNIME Logistic Regression", FunctionName.classification, "LogisticRegression", parameterList.toArray(new PMMLParameter[0]), factors.toArray(new PMMLPredictor[0]), covariates.toArray(new PMMLPredictor[0]), m_vectorLengths, ppMatrix.toArray(new PMMLPPCell[0]), pCovMatrix.toArray(new PMMLPCovCell[0]), paramMatrix.toArray(new PMMLPCell[0]));
    content.setTargetReferenceCategory(m_targetCategories.get(m_targetCategories.size() - 1).toString());
    return content;
}
Also used : PMMLPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCell) PMMLParameter(org.knime.base.node.mine.regression.pmmlgreg.PMMLParameter) PMMLPCovCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCovCell) ArrayList(java.util.ArrayList) PMMLPredictor(org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor) PMMLGeneralRegressionContent(org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionContent) PMMLPPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell) DataCell(org.knime.core.data.DataCell)

Example 4 with PMMLPPCell

use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell in project knime-core by knime.

the class RegressionPredictorCellFactory method determineFactorValues.

/**
 * @param trainingSpec the table spec of the training set
 * @param content the content
 * @return the factors name mapped to its values
 * @throws InvalidSettingsException If the PMML data dictionary contains more elements for a nominal column
 *                                  than represented in the data
 */
protected static Map<String, List<DataCell>> determineFactorValues(final PMMLGeneralRegressionContent content, final DataTableSpec trainingSpec) throws InvalidSettingsException {
    HashMap<String, List<DataCell>> values = new HashMap<String, List<DataCell>>();
    for (PMMLPredictor factor : content.getFactorList()) {
        String factorName = factor.getName();
        Map<String, DataCell> domainValues = new HashMap<String, DataCell>();
        for (DataCell cell : trainingSpec.getColumnSpec(factorName).getDomain().getValues()) {
            domainValues.put(cell.toString(), cell);
        }
        Set<DataCell> factorValues = new LinkedHashSet<DataCell>();
        // add all values for all PMMLGeneralRegression model that do not specify all values in the PPMatrix
        factorValues.addAll(trainingSpec.getColumnSpec(factorName).getDomain().getValues());
        int count = 0;
        for (PMMLPPCell ppCell : content.getPPMatrix()) {
            if (ppCell.getPredictorName().equals(factorName)) {
                DataCell cell = domainValues.get(ppCell.getValue());
                // move cell to the end of the list, this gives in the end the same ordering
                // as in the PPMatrix of the PMMLGeneralRegression model
                factorValues.remove(cell);
                factorValues.add(cell);
                count++;
            }
        }
        // The base line category may not be in the PPMatrix of the PMMLGeneralRegression model
        // in this case count is lower than the number of domain values, but if count if even
        // less than that the base line category is ambiguous.
        final int valuesDataDictionary = trainingSpec.getColumnSpec(factorName).getDomain().getValues().size();
        if (count < valuesDataDictionary - 1) {
            throw new InvalidSettingsException("The data dictionary to column \"" + factorName + "\" contains more elements than represented in the regression model " + "(unable to decode dummy variables as reference is unknown: " + valuesDataDictionary + " > " + count + " + 1)");
        }
        List<DataCell> vals = new ArrayList<DataCell>();
        vals.addAll(factorValues);
        values.put(factorName, vals);
    }
    return values;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) PMMLPredictor(org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataCell(org.knime.core.data.DataCell) PMMLPPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell) ArrayList(java.util.ArrayList) List(java.util.List)

Example 5 with PMMLPPCell

use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell in project knime-core by knime.

the class LogisticRegressionContent method createGeneralRegressionContent.

/**
 * Creates a new PMML General Regression Content from this logistic
 * regression model.
 * @return the PMMLGeneralRegressionContent
 */
public PMMLGeneralRegressionContent createGeneralRegressionContent() {
    List<PMMLPredictor> factors = new ArrayList<PMMLPredictor>();
    for (String factor : m_factorList) {
        PMMLPredictor predictor = new PMMLPredictor(factor);
        factors.add(predictor);
    }
    List<PMMLPredictor> covariates = new ArrayList<PMMLPredictor>();
    for (String covariate : m_covariateList) {
        PMMLPredictor predictor = new PMMLPredictor(covariate);
        covariates.add(predictor);
    }
    // the ParameterList, the PPMatrix and the ParamMatrix
    List<PMMLParameter> parameterList = new ArrayList<PMMLParameter>();
    List<PMMLPPCell> ppMatrix = new ArrayList<PMMLPPCell>();
    List<PMMLPCell> paramMatrix = new ArrayList<PMMLPCell>();
    int pCount = m_beta.getColumnDimension() / (m_targetCategories.size() - 1);
    int p = 0;
    parameterList.add(new PMMLParameter("p" + p, "Intercept"));
    for (int k = 0; k < m_targetCategories.size() - 1; k++) {
        paramMatrix.add(new PMMLPCell("p" + p, m_beta.get(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
    }
    p++;
    for (String colName : m_outSpec.getLearningFields()) {
        if (m_factorList.contains(colName)) {
            Iterator<DataCell> designIter = m_factorDomainValues.get(colName).iterator();
            // Omit first
            designIter.next();
            while (designIter.hasNext()) {
                DataCell dvValue = designIter.next();
                String pName = "p" + p;
                parameterList.add(new PMMLParameter(pName, "[" + colName + "=" + dvValue + "]"));
                ppMatrix.add(new PMMLPPCell(dvValue.toString(), colName, pName));
                for (int k = 0; k < m_targetCategories.size() - 1; k++) {
                    paramMatrix.add(new PMMLPCell(pName, m_beta.get(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
                }
                p++;
            }
        } else {
            String pName = "p" + p;
            parameterList.add(new PMMLParameter("p" + p, colName));
            ppMatrix.add(new PMMLPPCell("1", colName, pName));
            for (int k = 0; k < m_targetCategories.size() - 1; k++) {
                paramMatrix.add(new PMMLPCell(pName, m_beta.get(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
            }
            p++;
        }
    }
    // TODO PCovMatrix
    List<PMMLPCovCell> pCovMatrix = new ArrayList<PMMLPCovCell>();
    PMMLGeneralRegressionContent content = new PMMLGeneralRegressionContent(ModelType.multinomialLogistic, "KNIME Logistic Regression", FunctionName.classification, "LogisticRegression", parameterList.toArray(new PMMLParameter[0]), factors.toArray(new PMMLPredictor[0]), covariates.toArray(new PMMLPredictor[0]), ppMatrix.toArray(new PMMLPPCell[0]), pCovMatrix.toArray(new PMMLPCovCell[0]), paramMatrix.toArray(new PMMLPCell[0]));
    content.setTargetReferenceCategory(m_targetCategories.get(m_targetCategories.size() - 1).toString());
    return content;
}
Also used : PMMLPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCell) PMMLParameter(org.knime.base.node.mine.regression.pmmlgreg.PMMLParameter) PMMLPCovCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCovCell) ArrayList(java.util.ArrayList) PMMLPredictor(org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor) PMMLGeneralRegressionContent(org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionContent) PMMLPPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell) DataCell(org.knime.core.data.DataCell)

Aggregations

ArrayList (java.util.ArrayList)7 PMMLPPCell (org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell)7 PMMLPredictor (org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor)7 DataCell (org.knime.core.data.DataCell)7 PMMLGeneralRegressionContent (org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionContent)5 PMMLPCell (org.knime.base.node.mine.regression.pmmlgreg.PMMLPCell)5 PMMLPCovCell (org.knime.base.node.mine.regression.pmmlgreg.PMMLPCovCell)5 PMMLParameter (org.knime.base.node.mine.regression.pmmlgreg.PMMLParameter)5 HashMap (java.util.HashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 List (java.util.List)2 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)2