use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor in project knime-core by knime.
the class RegressionPredictorNodeModel method createRearranger.
private ColumnRearranger createRearranger(final PMMLGeneralRegressionContent content, final PMMLPortObjectSpec pmmlSpec, final DataTableSpec inDataSpec) throws InvalidSettingsException {
if (content == null) {
throw new InvalidSettingsException("No input");
}
// the predictor can only predict linear regression models
if (!(content.getModelType().equals(ModelType.multinomialLogistic) || content.getModelType().equals(ModelType.generalLinear))) {
throw new InvalidSettingsException("Model Type: " + content.getModelType() + " is not supported.");
}
if (content.getModelType().equals(ModelType.generalLinear) && !content.getFunctionName().equals(FunctionName.regression)) {
throw new InvalidSettingsException("Function Name: " + content.getFunctionName() + " is not supported for linear regression.");
}
if (content.getModelType().equals(ModelType.multinomialLogistic) && !content.getFunctionName().equals(FunctionName.classification)) {
throw new InvalidSettingsException("Function Name: " + content.getFunctionName() + " is not supported for logistic regression.");
}
// are nominal values
for (PMMLPredictor factor : content.getFactorList()) {
DataColumnSpec columnSpec = inDataSpec.getColumnSpec(factor.getName());
if (null == columnSpec) {
throw new InvalidSettingsException("The column \"" + factor.getName() + "\" is in the model but not in given table.");
}
if (!columnSpec.getType().isCompatible(NominalValue.class)) {
throw new InvalidSettingsException("The column \"" + factor.getName() + "\" is supposed to be nominal.");
}
}
// check if all covariates are in the given data table and that they
// are numeric values
Pattern pattern = Pattern.compile("(.*)\\[\\d+\\]");
for (PMMLPredictor covariate : content.getCovariateList()) {
DataColumnSpec columnSpec = inDataSpec.getColumnSpec(covariate.getName());
if (null == columnSpec) {
Matcher matcher = pattern.matcher(covariate.getName());
boolean found = matcher.matches();
columnSpec = inDataSpec.getColumnSpec(matcher.group(1));
found = found && null != columnSpec;
if (!found) {
throw new InvalidSettingsException("The column \"" + covariate.getName() + "\" is in the model but not in given table.");
}
}
if (columnSpec != null && !columnSpec.getType().isCompatible(DoubleValue.class) && !(content.getVectorLengths().containsKey(columnSpec.getName()) && ((columnSpec.getType().isCollectionType() && columnSpec.getType().getCollectionElementType().isCompatible(DoubleValue.class)) || columnSpec.getType().isCompatible(BitVectorValue.class) || columnSpec.getType().isCompatible(ByteVectorValue.class)))) {
throw new InvalidSettingsException("The column \"" + covariate.getName() + "\" is supposed to be numeric.");
}
}
ColumnRearranger c = new ColumnRearranger(inDataSpec);
if (content.getModelType().equals(ModelType.generalLinear)) {
c.append(new LinReg2Predictor(content, inDataSpec, pmmlSpec, pmmlSpec.getTargetFields().get(0), m_settings));
} else {
c.append(new LogRegPredictor(content, inDataSpec, pmmlSpec, pmmlSpec.getTargetFields().get(0), m_settings));
}
return c;
}
use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor in project knime-core by knime.
the class RegressionPredictorNodeModel method createRearranger.
private ColumnRearranger createRearranger(final PMMLGeneralRegressionContent content, final PMMLPortObjectSpec pmmlSpec, final DataTableSpec inDataSpec) throws InvalidSettingsException {
if (content == null) {
throw new InvalidSettingsException("No input");
}
// the predictor can only predict linear regression models
if (!(content.getModelType().equals(ModelType.multinomialLogistic) || content.getModelType().equals(ModelType.generalLinear))) {
throw new InvalidSettingsException("Model Type: " + content.getModelType() + " is not supported.");
}
if (content.getModelType().equals(ModelType.generalLinear) && !content.getFunctionName().equals(FunctionName.regression)) {
throw new InvalidSettingsException("Function Name: " + content.getFunctionName() + " is not supported for linear regression.");
}
if (content.getModelType().equals(ModelType.multinomialLogistic) && !content.getFunctionName().equals(FunctionName.classification)) {
throw new InvalidSettingsException("Function Name: " + content.getFunctionName() + " is not supported for logistic regression.");
}
// are nominal values
for (PMMLPredictor factor : content.getFactorList()) {
DataColumnSpec columnSpec = inDataSpec.getColumnSpec(factor.getName());
if (null == columnSpec) {
throw new InvalidSettingsException("The column \"" + factor.getName() + "\" is in the model but not in given table.");
}
if (!columnSpec.getType().isCompatible(NominalValue.class)) {
throw new InvalidSettingsException("The column \"" + factor.getName() + "\" is supposed to be nominal.");
}
}
// check if all covariates are in the given data table and that they
// are numeric values
Pattern pattern = Pattern.compile("(.*)\\[\\d+\\]");
for (PMMLPredictor covariate : content.getCovariateList()) {
DataColumnSpec columnSpec = inDataSpec.getColumnSpec(covariate.getName());
if (null == columnSpec) {
Matcher matcher = pattern.matcher(covariate.getName());
boolean found = matcher.matches();
columnSpec = inDataSpec.getColumnSpec(matcher.group(1));
found = found && null != columnSpec;
if (!found) {
throw new InvalidSettingsException("The column \"" + covariate.getName() + "\" is in the model but not in given table.");
}
}
if (columnSpec != null && !columnSpec.getType().isCompatible(DoubleValue.class) && !(content.getVectorLengths().containsKey(columnSpec.getName()) && ((columnSpec.getType().isCollectionType() && columnSpec.getType().getCollectionElementType().isCompatible(DoubleValue.class)) || columnSpec.getType().isCompatible(BitVectorValue.class) || columnSpec.getType().isCompatible(ByteVectorValue.class)))) {
throw new InvalidSettingsException("The column \"" + covariate.getName() + "\" is supposed to be numeric.");
}
}
ColumnRearranger c = new ColumnRearranger(inDataSpec);
if (content.getModelType().equals(ModelType.generalLinear)) {
c.append(new LinReg2Predictor(content, inDataSpec, pmmlSpec, pmmlSpec.getTargetFields().get(0), m_settings));
} else {
c.append(new LogRegPredictor(content, inDataSpec, pmmlSpec, pmmlSpec.getTargetFields().get(0), m_settings));
}
return c;
}
use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor in project knime-core by knime.
the class RegressionContent method createGeneralRegressionContent.
/**
* Creates a new PMML General Regression Content from this linear regression model.
*
* @return the PMMLGeneralRegressionContent
*/
public PMMLGeneralRegressionContent createGeneralRegressionContent() {
List<PMMLPredictor> factors = new ArrayList<PMMLPredictor>();
for (String factor : m_factorList) {
PMMLPredictor predictor = new PMMLPredictor(factor);
factors.add(predictor);
}
List<PMMLPredictor> covariates = new ArrayList<PMMLPredictor>();
for (String covariate : m_covariateList) {
PMMLPredictor predictor = new PMMLPredictor(covariate);
covariates.add(predictor);
}
// the ParameterList, the PPMatrix and the ParamMatrix
List<PMMLParameter> parameterList = new ArrayList<PMMLParameter>();
List<PMMLPPCell> ppMatrix = new ArrayList<PMMLPPCell>();
List<PMMLPCell> paramMatrix = new ArrayList<PMMLPCell>();
int p = 0;
if (m_includeConstant) {
// Define the intercept
parameterList.add(new PMMLParameter("p" + p, "Intercept"));
paramMatrix.add(new PMMLPCell("p" + p, m_beta.getEntry(0, 0), 1));
p++;
}
for (String colName : m_outSpec.getLearningFields()) {
if (m_factorList.contains(colName)) {
Iterator<DataCell> designIter = m_factorDomainValues.get(colName).iterator();
if (!designIter.hasNext()) {
continue;
}
// Omit first
designIter.next();
while (designIter.hasNext()) {
DataCell dvValue = designIter.next();
String pName = "p" + p;
parameterList.add(new PMMLParameter(pName, "[" + colName + "=" + dvValue + "]"));
ppMatrix.add(new PMMLPPCell(dvValue.toString(), colName, pName));
paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
p++;
}
} else {
String pName = "p" + p;
parameterList.add(new PMMLParameter("p" + p, colName));
ppMatrix.add(new PMMLPPCell("1", colName, pName));
paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
p++;
}
}
List<PMMLPCovCell> pCovMatrix = new ArrayList<PMMLPCovCell>();
PMMLGeneralRegressionContent content = new PMMLGeneralRegressionContent(ModelType.generalLinear, "KNIME Linear Regression", FunctionName.regression, "LinearRegression", parameterList.toArray(new PMMLParameter[0]), factors.toArray(new PMMLPredictor[0]), covariates.toArray(new PMMLPredictor[0]), ppMatrix.toArray(new PMMLPPCell[0]), pCovMatrix.toArray(new PMMLPCovCell[0]), paramMatrix.toArray(new PMMLPCell[0]));
if (!m_includeConstant) {
content.setOffsetValue(m_offsetValue);
}
return content;
}
use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor in project knime-core by knime.
the class PolyRegContent method createGeneralRegressionContent.
/**
* Creates a new PMML General Regression Content from this polynomial regression model.
*
* @return the PMMLGeneralRegressionContent
*/
@Override
public PMMLGeneralRegressionContent createGeneralRegressionContent() {
List<PMMLPredictor> factors = new ArrayList<PMMLPredictor>();
for (String factor : m_factorList) {
PMMLPredictor predictor = new PMMLPredictor(factor);
factors.add(predictor);
}
List<PMMLPredictor> covariates = new ArrayList<PMMLPredictor>();
for (String covariate : m_covariateList) {
PMMLPredictor predictor = new PMMLPredictor(covariate);
covariates.add(predictor);
}
// the ParameterList, the PPMatrix and the ParamMatrix
List<PMMLParameter> parameterList = new ArrayList<PMMLParameter>();
List<PMMLPPCell> ppMatrix = new ArrayList<PMMLPPCell>();
List<PMMLPCell> paramMatrix = new ArrayList<PMMLPCell>();
int p = 0;
if (m_includeConstant) {
// Define the intercept
parameterList.add(new PMMLParameter("p" + p, "Intercept"));
paramMatrix.add(new PMMLPCell("p" + p, m_beta.getEntry(0, 0), 1));
p++;
}
for (String colName : m_outSpec.getLearningFields()) {
if (m_factorList.contains(colName)) {
Iterator<DataCell> designIter = m_factorDomainValues.get(colName).iterator();
// Omit first
designIter.next();
while (designIter.hasNext()) {
DataCell dvValue = designIter.next();
String pName = "p" + p;
parameterList.add(new PMMLParameter(pName, "[" + colName + "=" + dvValue + "]"));
ppMatrix.add(new PMMLPPCell(dvValue.toString(), colName, pName));
paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
p++;
}
} else {
String pName = "p" + p;
parameterList.add(new PMMLParameter(pName, colName));
ppMatrix.add(new PMMLPPCell("1", colName, pName));
paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p), 1));
p++;
}
}
List<PMMLPCovCell> pCovMatrix = new ArrayList<PMMLPCovCell>();
PMMLGeneralRegressionContent content = new PMMLGeneralRegressionContent(ModelType.generalLinear, "KNIME Polynomial Regression", FunctionName.regression, "LinearRegression", parameterList.toArray(new PMMLParameter[0]), factors.toArray(new PMMLPredictor[0]), covariates.toArray(new PMMLPredictor[0]), ppMatrix.toArray(new PMMLPPCell[0]), pCovMatrix.toArray(new PMMLPCovCell[0]), paramMatrix.toArray(new PMMLPCell[0]));
if (!m_includeConstant) {
content.setOffsetValue(m_offsetValue);
}
return content;
}
use of org.knime.base.node.mine.regression.pmmlgreg.PMMLPredictor in project knime-core by knime.
the class LogisticRegressionContent method createGeneralRegressionContent.
/**
* Creates a new PMML General Regression Content from this logistic
* regression model.
* @return the PMMLGeneralRegressionContent
*/
public PMMLGeneralRegressionContent createGeneralRegressionContent() {
List<PMMLPredictor> factors = new ArrayList<PMMLPredictor>();
for (String factor : m_factorList) {
PMMLPredictor predictor = new PMMLPredictor(factor);
factors.add(predictor);
}
List<PMMLPredictor> covariates = new ArrayList<PMMLPredictor>();
for (String covariate : m_covariateList) {
PMMLPredictor predictor = new PMMLPredictor(covariate);
covariates.add(predictor);
}
// the ParameterList, the PPMatrix and the ParamMatrix
List<PMMLParameter> parameterList = new ArrayList<PMMLParameter>();
List<PMMLPPCell> ppMatrix = new ArrayList<PMMLPPCell>();
List<PMMLPCell> paramMatrix = new ArrayList<PMMLPCell>();
int pCount = m_beta.getColumnDimension() / (m_targetCategories.size() - 1);
int p = 0;
parameterList.add(new PMMLParameter("p" + p, "Intercept"));
for (int k = 0; k < m_targetCategories.size() - 1; k++) {
paramMatrix.add(new PMMLPCell("p" + p, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
}
p++;
final List<String> learningFields = new ArrayList<>(m_outSpec.getLearningFields());
// learningFields.addAll(m_vectorLengths.keySet());
for (String colName : learningFields) {
if (m_factorList.contains(colName)) {
Iterator<DataCell> designIter = m_factorDomainValues.get(colName).iterator();
// Omit first
designIter.next();
while (designIter.hasNext()) {
DataCell dvValue = designIter.next();
String pName = "p" + p;
parameterList.add(new PMMLParameter(pName, "[" + colName + "=" + dvValue + "]"));
ppMatrix.add(new PMMLPPCell(dvValue.toString(), colName, pName));
for (int k = 0; k < m_targetCategories.size() - 1; k++) {
paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
}
p++;
}
} else {
if (m_vectorLengths.containsKey(colName)) {
final int length = m_vectorLengths.get(colName);
final int pFrozen = p;
for (int idx = 0; idx < length; ++idx) {
final String pName = "p" + pFrozen + "_" + idx;
final String predictorName = VectorHandling.valueAt(colName, idx);
parameterList.add(new PMMLParameter(pName, predictorName));
ppMatrix.add(new PMMLPPCell("1", predictorName, pName));
for (int k = 0; k < m_targetCategories.size() - 1; k++) {
paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
}
p++;
}
} else {
String pName = "p" + p;
parameterList.add(new PMMLParameter("p" + p, colName));
ppMatrix.add(new PMMLPPCell("1", colName, pName));
for (int k = 0; k < m_targetCategories.size() - 1; k++) {
paramMatrix.add(new PMMLPCell(pName, m_beta.getEntry(0, p + (k * pCount)), 1, m_targetCategories.get(k).toString()));
}
p++;
}
}
}
// TODO PCovMatrix
List<PMMLPCovCell> pCovMatrix = new ArrayList<PMMLPCovCell>();
PMMLGeneralRegressionContent content = new PMMLGeneralRegressionContent(ModelType.multinomialLogistic, "KNIME Logistic Regression", FunctionName.classification, "LogisticRegression", parameterList.toArray(new PMMLParameter[0]), factors.toArray(new PMMLPredictor[0]), covariates.toArray(new PMMLPredictor[0]), m_vectorLengths, ppMatrix.toArray(new PMMLPPCell[0]), pCovMatrix.toArray(new PMMLPCovCell[0]), paramMatrix.toArray(new PMMLPCell[0]));
content.setTargetReferenceCategory(m_targetCategories.get(m_targetCategories.size() - 1).toString());
return content;
}
Aggregations