Search in sources :

Example 1 with PMMLPredictor

use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor in project knime-core by knime.

the class RegressionPredictorNodeModel method createRearranger.

private ColumnRearranger createRearranger(final PMMLGeneralRegressionContent content, final PMMLPortObjectSpec pmmlSpec, final DataTableSpec inDataSpec) throws InvalidSettingsException {
    if (content == null) {
        throw new InvalidSettingsException("No input");
    }
    // the predictor can only predict linear regression models
    if (!(content.getModelType().equals(ModelType.multinomialLogistic) || content.getModelType().equals(ModelType.generalLinear))) {
        throw new InvalidSettingsException("Model Type: " + content.getModelType() + " is not supported.");
    }
    if (content.getModelType().equals(ModelType.generalLinear) && !content.getFunctionName().equals(FunctionName.regression)) {
        throw new InvalidSettingsException("Function Name: " + content.getFunctionName() + " is not supported for linear regression.");
    }
    if (content.getModelType().equals(ModelType.multinomialLogistic) && !content.getFunctionName().equals(FunctionName.classification)) {
        throw new InvalidSettingsException("Function Name: " + content.getFunctionName() + " is not supported for logistic regression.");
    }
    // are nominal values
    for (PMMLPredictor factor : content.getFactorList()) {
        DataColumnSpec columnSpec = inDataSpec.getColumnSpec(factor.getName());
        if (null == columnSpec) {
            throw new InvalidSettingsException("The column \"" + factor.getName() + "\" is in the model but not in given table.");
        }
        if (!columnSpec.getType().isCompatible(NominalValue.class)) {
            throw new InvalidSettingsException("The column \"" + factor.getName() + "\" is supposed to be nominal.");
        }
    }
    // check if all covariates are in the given data table and that they
    // are numeric values
    Pattern pattern = Pattern.compile("(.*)\\[\\d+\\]");
    for (PMMLPredictor covariate : content.getCovariateList()) {
        DataColumnSpec columnSpec = inDataSpec.getColumnSpec(covariate.getName());
        if (null == columnSpec) {
            Matcher matcher = pattern.matcher(covariate.getName());
            boolean found = matcher.matches();
            columnSpec = inDataSpec.getColumnSpec(matcher.group(1));
            found = found && null != columnSpec;
            if (!found) {
                throw new InvalidSettingsException("The column \"" + covariate.getName() + "\" is in the model but not in given table.");
            }
        }
        if (columnSpec != null && !columnSpec.getType().isCompatible(DoubleValue.class) && !(content.getVectorLengths().containsKey(columnSpec.getName()) && ((columnSpec.getType().isCollectionType() && columnSpec.getType().getCollectionElementType().isCompatible(DoubleValue.class)) || columnSpec.getType().isCompatible(BitVectorValue.class) || columnSpec.getType().isCompatible(ByteVectorValue.class)))) {
            throw new InvalidSettingsException("The column \"" + covariate.getName() + "\" is supposed to be numeric.");
        }
    }
    ColumnRearranger c = new ColumnRearranger(inDataSpec);
    if (content.getModelType().equals(ModelType.generalLinear)) {
        c.append(new LinReg2Predictor(content, inDataSpec, pmmlSpec, pmmlSpec.getTargetFields().get(0), m_settings));
    } else {
        c.append(new LogRegPredictor(content, inDataSpec, pmmlSpec, pmmlSpec.getTargetFields().get(0), m_settings));
    }
    return c;
}
Also used : PMMLPredictor(org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor) Pattern(java.util.regex.Pattern) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) Matcher(java.util.regex.Matcher) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Example 2 with PMMLPredictor

use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor in project knime-core by knime.

the class RegressionPredictorNodeModel method createRearranger.

private ColumnRearranger createRearranger(final PMMLGeneralRegressionContent content, final PMMLPortObjectSpec pmmlSpec, final DataTableSpec inDataSpec) throws InvalidSettingsException {
    if (content == null) {
        throw new InvalidSettingsException("No input");
    }
    // the predictor can only predict linear regression models
    if (!(content.getModelType().equals(ModelType.multinomialLogistic) || content.getModelType().equals(ModelType.generalLinear))) {
        throw new InvalidSettingsException("Model Type: " + content.getModelType() + " is not supported.");
    }
    if (content.getModelType().equals(ModelType.generalLinear) && !content.getFunctionName().equals(FunctionName.regression)) {
        throw new InvalidSettingsException("Function Name: " + content.getFunctionName() + " is not supported for linear regression.");
    }
    if (content.getModelType().equals(ModelType.multinomialLogistic) && !content.getFunctionName().equals(FunctionName.classification)) {
        throw new InvalidSettingsException("Function Name: " + content.getFunctionName() + " is not supported for logistic regression.");
    }
    // are nominal values
    for (PMMLPredictor factor : content.getFactorList()) {
        DataColumnSpec columnSpec = inDataSpec.getColumnSpec(factor.getName());
        if (null == columnSpec) {
            throw new InvalidSettingsException("The column \"" + factor.getName() + "\" is in the model but not in given table.");
        }
        if (!columnSpec.getType().isCompatible(NominalValue.class)) {
            throw new InvalidSettingsException("The column \"" + factor.getName() + "\" is supposed to be nominal.");
        }
    }
    // check if all covariates are in the given data table and that they
    // are numeric values
    Pattern pattern = Pattern.compile("(.*)\\[\\d+\\]");
    for (PMMLPredictor covariate : content.getCovariateList()) {
        DataColumnSpec columnSpec = inDataSpec.getColumnSpec(covariate.getName());
        if (null == columnSpec) {
            Matcher matcher = pattern.matcher(covariate.getName());
            boolean found = matcher.matches();
            columnSpec = inDataSpec.getColumnSpec(matcher.group(1));
            found = found && null != columnSpec;
            if (!found) {
                throw new InvalidSettingsException("The column \"" + covariate.getName() + "\" is in the model but not in given table.");
            }
        }
        if (columnSpec != null && !columnSpec.getType().isCompatible(DoubleValue.class) && !(content.getVectorLengths().containsKey(columnSpec.getName()) && ((columnSpec.getType().isCollectionType() && columnSpec.getType().getCollectionElementType().isCompatible(DoubleValue.class)) || columnSpec.getType().isCompatible(BitVectorValue.class) || columnSpec.getType().isCompatible(ByteVectorValue.class)))) {
            throw new InvalidSettingsException("The column \"" + covariate.getName() + "\" is supposed to be numeric.");
        }
    }
    ColumnRearranger c = new ColumnRearranger(inDataSpec);
    if (content.getModelType().equals(ModelType.generalLinear)) {
        c.append(new LinReg2Predictor(content, inDataSpec, pmmlSpec, pmmlSpec.getTargetFields().get(0), m_settings));
    } else {
        c.append(new LogRegPredictor(content, inDataSpec, pmmlSpec, pmmlSpec.getTargetFields().get(0), m_settings));
    }
    return c;
}
Also used : PMMLPredictor(org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor) Pattern(java.util.regex.Pattern) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) Matcher(java.util.regex.Matcher) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Example 3 with PMMLPredictor

use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor in project knime-core by knime.

the class RegressionContent method createGeneralRegressionContent.

/**
 * Creates a new PMML General Regression Content from this linear regression model.
 *
 * @return the PMMLGeneralRegressionContent
 */
public PMMLGeneralRegressionContent createGeneralRegressionContent() {
    List<PMMLPredictor> factors = new ArrayList<PMMLPredictor>();
    for (String factor : m_factorList) {
        PMMLPredictor predictor = new PMMLPredictor(factor);
        factors.add(predictor);
    }
    List<PMMLPredictor> covariates = new ArrayList<PMMLPredictor>();
    for (String covariate : m_covariateList) {
        PMMLPredictor predictor = new PMMLPredictor(covariate);
        covariates.add(predictor);
    }
    // the ParameterList, the PPMatrix and the ParamMatrix
    List<PMMLParameter> parameterList = new ArrayList<PMMLParameter>();
    List<PMMLPPCell> ppMatrix = new ArrayList<PMMLPPCell>();
    List<PMMLPCell> paramMatrix = new ArrayList<PMMLPCell>();
    int p = 0;
    if (m_includeConstant) {
        // Define the intercept
        parameterList.add(new PMMLParameter("p" + p, "Intercept"));
        paramMatrix.add(new PMMLPCell("p" + p, m_beta.getEntry(0, 0), 1));
        p++;
    }
    for (String colName : m_outSpec.getLearningFields()) {
        if (m_factorList.contains(colName)) {
            Iterator<DataCell> designIter = m_factorDomainValues.get(colName).iterator();
            if (!designIter.hasNext()) {
                continue;
            }
            // Omit first
            designIter.next();
            while (designIter.hasNext()) {
                DataCell dvValue = designIter.next();
                String pName = "p" + p;
                parameterList.add(new PMMLParameter(pName, "[" + colName + "=" + dvValue + "]"));
                ppMatrix.add(new PMMLPPCell(dvValue.toString(), colName, pName));
                paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
                p++;
            }
        } else {
            String pName = "p" + p;
            parameterList.add(new PMMLParameter("p" + p, colName));
            ppMatrix.add(new PMMLPPCell("1", colName, pName));
            paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
            p++;
        }
    }
    List<PMMLPCovCell> pCovMatrix = new ArrayList<PMMLPCovCell>();
    PMMLGeneralRegressionContent content = new PMMLGeneralRegressionContent(ModelType.generalLinear, "KNIME Linear Regression", FunctionName.regression, "LinearRegression", parameterList.toArray(new PMMLParameter[0]), factors.toArray(new PMMLPredictor[0]), covariates.toArray(new PMMLPredictor[0]), ppMatrix.toArray(new PMMLPPCell[0]), pCovMatrix.toArray(new PMMLPCovCell[0]), paramMatrix.toArray(new PMMLPCell[0]));
    if (!m_includeConstant) {
        content.setOffsetValue(m_offsetValue);
    }
    return content;
}
Also used : PMMLPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCell) PMMLParameter(org.knime.base.node.mine.regression.pmmlgreg.PMMLParameter) PMMLPCovCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCovCell) ArrayList(java.util.ArrayList) PMMLPredictor(org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor) PMMLGeneralRegressionContent(org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionContent) PMMLPPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell) DataCell(org.knime.core.data.DataCell)

Example 4 with PMMLPredictor

use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor in project knime-core by knime.

the class PolyRegContent method createGeneralRegressionContent.

/**
 * Creates a new PMML General Regression Content from this polynomial regression model.
 *
 * @return the PMMLGeneralRegressionContent
 */
@Override
public PMMLGeneralRegressionContent createGeneralRegressionContent() {
    List<PMMLPredictor> factors = new ArrayList<PMMLPredictor>();
    for (String factor : m_factorList) {
        PMMLPredictor predictor = new PMMLPredictor(factor);
        factors.add(predictor);
    }
    List<PMMLPredictor> covariates = new ArrayList<PMMLPredictor>();
    for (String covariate : m_covariateList) {
        PMMLPredictor predictor = new PMMLPredictor(covariate);
        covariates.add(predictor);
    }
    // the ParameterList, the PPMatrix and the ParamMatrix
    List<PMMLParameter> parameterList = new ArrayList<PMMLParameter>();
    List<PMMLPPCell> ppMatrix = new ArrayList<PMMLPPCell>();
    List<PMMLPCell> paramMatrix = new ArrayList<PMMLPCell>();
    int p = 0;
    if (m_includeConstant) {
        // Define the intercept
        parameterList.add(new PMMLParameter("p" + p, "Intercept"));
        paramMatrix.add(new PMMLPCell("p" + p, m_beta.getEntry(0, 0), 1));
        p++;
    }
    for (String colName : m_outSpec.getLearningFields()) {
        if (m_factorList.contains(colName)) {
            Iterator<DataCell> designIter = m_factorDomainValues.get(colName).iterator();
            // Omit first
            designIter.next();
            while (designIter.hasNext()) {
                DataCell dvValue = designIter.next();
                String pName = "p" + p;
                parameterList.add(new PMMLParameter(pName, "[" + colName + "=" + dvValue + "]"));
                ppMatrix.add(new PMMLPPCell(dvValue.toString(), colName, pName));
                paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
                p++;
            }
        } else {
            String pName = "p" + p;
            parameterList.add(new PMMLParameter(pName, colName));
            ppMatrix.add(new PMMLPPCell("1", colName, pName));
            paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
            p++;
        }
    }
    List<PMMLPCovCell> pCovMatrix = new ArrayList<PMMLPCovCell>();
    PMMLGeneralRegressionContent content = new PMMLGeneralRegressionContent(ModelType.generalLinear, "KNIME Polynomial Regression", FunctionName.regression, "LinearRegression", parameterList.toArray(new PMMLParameter[0]), factors.toArray(new PMMLPredictor[0]), covariates.toArray(new PMMLPredictor[0]), ppMatrix.toArray(new PMMLPPCell[0]), pCovMatrix.toArray(new PMMLPCovCell[0]), paramMatrix.toArray(new PMMLPCell[0]));
    if (!m_includeConstant) {
        content.setOffsetValue(m_offsetValue);
    }
    return content;
}
Also used : PMMLPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCell) PMMLParameter(org.knime.base.node.mine.regression.pmmlgreg.PMMLParameter) PMMLPCovCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCovCell) ArrayList(java.util.ArrayList) PMMLPredictor(org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor) PMMLGeneralRegressionContent(org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionContent) PMMLPPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell) DataCell(org.knime.core.data.DataCell)

Example 5 with PMMLPredictor

use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor in project knime-core by knime.

the class LogisticRegressionContent method createGeneralRegressionContent.

/**
 * Creates a new PMML General Regression Content from this logistic
 * regression model.
 * @return the PMMLGeneralRegressionContent
 */
public PMMLGeneralRegressionContent createGeneralRegressionContent() {
    List<PMMLPredictor> factors = new ArrayList<PMMLPredictor>();
    for (String factor : m_factorList) {
        PMMLPredictor predictor = new PMMLPredictor(factor);
        factors.add(predictor);
    }
    List<PMMLPredictor> covariates = new ArrayList<PMMLPredictor>();
    for (String covariate : m_covariateList) {
        PMMLPredictor predictor = new PMMLPredictor(covariate);
        covariates.add(predictor);
    }
    // the ParameterList, the PPMatrix and the ParamMatrix
    List<PMMLParameter> parameterList = new ArrayList<PMMLParameter>();
    List<PMMLPPCell> ppMatrix = new ArrayList<PMMLPPCell>();
    List<PMMLPCell> paramMatrix = new ArrayList<PMMLPCell>();
    int pCount = m_beta.getColumnDimension() / (m_targetCategories.size() - 1);
    int p = 0;
    parameterList.add(new PMMLParameter("p" + p, "Intercept"));
    for (int k = 0; k < m_targetCategories.size() - 1; k++) {
        paramMatrix.add(new PMMLPCell("p" + p, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
    }
    p++;
    final List<String> learningFields = new ArrayList<>(m_outSpec.getLearningFields());
    // learningFields.addAll(m_vectorLengths.keySet());
    for (String colName : learningFields) {
        if (m_factorList.contains(colName)) {
            Iterator<DataCell> designIter = m_factorDomainValues.get(colName).iterator();
            // Omit first
            designIter.next();
            while (designIter.hasNext()) {
                DataCell dvValue = designIter.next();
                String pName = "p" + p;
                parameterList.add(new PMMLParameter(pName, "[" + colName + "=" + dvValue + "]"));
                ppMatrix.add(new PMMLPPCell(dvValue.toString(), colName, pName));
                for (int k = 0; k < m_targetCategories.size() - 1; k++) {
                    paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
                }
                p++;
            }
        } else {
            if (m_vectorLengths.containsKey(colName)) {
                final int length = m_vectorLengths.get(colName);
                final int pFrozen = p;
                for (int idx = 0; idx < length; ++idx) {
                    final String pName = "p" + pFrozen + "_" + idx;
                    final String predictorName = VectorHandling.valueAt(colName, idx);
                    parameterList.add(new PMMLParameter(pName, predictorName));
                    ppMatrix.add(new PMMLPPCell("1", predictorName, pName));
                    for (int k = 0; k < m_targetCategories.size() - 1; k++) {
                        paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
                    }
                    p++;
                }
            } else {
                String pName = "p" + p;
                parameterList.add(new PMMLParameter("p" + p, colName));
                ppMatrix.add(new PMMLPPCell("1", colName, pName));
                for (int k = 0; k < m_targetCategories.size() - 1; k++) {
                    paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
                }
                p++;
            }
        }
    }
    // TODO PCovMatrix
    List<PMMLPCovCell> pCovMatrix = new ArrayList<PMMLPCovCell>();
    PMMLGeneralRegressionContent content = new PMMLGeneralRegressionContent(ModelType.multinomialLogistic, "KNIME Logistic Regression", FunctionName.classification, "LogisticRegression", parameterList.toArray(new PMMLParameter[0]), factors.toArray(new PMMLPredictor[0]), covariates.toArray(new PMMLPredictor[0]), m_vectorLengths, ppMatrix.toArray(new PMMLPPCell[0]), pCovMatrix.toArray(new PMMLPCovCell[0]), paramMatrix.toArray(new PMMLPCell[0]));
    content.setTargetReferenceCategory(m_targetCategories.get(m_targetCategories.size() - 1).toString());
    return content;
}
Also used : PMMLPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCell) PMMLParameter(org.knime.base.node.mine.regression.pmmlgreg.PMMLParameter) PMMLPCovCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPCovCell) ArrayList(java.util.ArrayList) PMMLPredictor(org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor) PMMLGeneralRegressionContent(org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionContent) PMMLPPCell(org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell) DataCell(org.knime.core.data.DataCell)

Aggregations

PMMLPredictor (org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor)10 ArrayList (java.util.ArrayList)7 PMMLPPCell (org.knime.base.node.mine.regression.pmmlgreg.PMMLPPCell)7 DataCell (org.knime.core.data.DataCell)7 PMMLGeneralRegressionContent (org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionContent)5 PMMLPCell (org.knime.base.node.mine.regression.pmmlgreg.PMMLPCell)5 PMMLPCovCell (org.knime.base.node.mine.regression.pmmlgreg.PMMLPCovCell)5 PMMLParameter (org.knime.base.node.mine.regression.pmmlgreg.PMMLParameter)5 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)5 DataColumnSpec (org.knime.core.data.DataColumnSpec)3 DoubleValue (org.knime.core.data.DoubleValue)3 NominalValue (org.knime.core.data.NominalValue)3 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)3 HashMap (java.util.HashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 List (java.util.List)2 Matcher (java.util.regex.Matcher)2 Pattern (java.util.regex.Pattern)2 BitVectorValue (org.knime.core.data.vector.bitvector.BitVectorValue)2 LogRegPredictor (org.knime.base.node.mine.regression.predict2.LogRegPredictor)1