use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.
the class LearnerTest method testPerformChdAgeData.
/**
* Test method for {@link org.knime.base.node.mine.regression.logistic.learner.Learner#perform(BufferedDataTable, org.knime.core.node.ExecutionContext)}.
* @throws CanceledExecutionException
*/
@Test
public final void testPerformChdAgeData() throws Exception {
final BufferedDataTable data = m_exec.createBufferedDataTable(new ChdAgeData(), m_exec);
PMMLPortObjectSpecCreator specCreator = new PMMLPortObjectSpecCreator(data.getDataTableSpec());
specCreator.setLearningColsNames(Arrays.asList(new String[] { "Age" }));
specCreator.setTargetColName("Evidence of Coronary Heart Disease");
final PMMLPortObjectSpec spec = specCreator.createSpec();
// done in KNIME thread pool, expected by code
Future<LogisticRegressionContent> callable = KNIMEConstants.GLOBAL_THREAD_POOL.enqueue(new Callable<LogisticRegressionContent>() {
@Override
public LogisticRegressionContent call() throws Exception {
final Learner learner = new Learner(spec, null, true, true);
return learner.perform(data, m_exec);
}
});
LogisticRegressionContent content = callable.get();
// Reference results are published in the book:
// Applied Logistic Regression,
// David W. Hosmer and Stanley Lemeshow
// Wiley, 2000 (2nd. ed)
// The table of results are found on page 10
Assert.assertEquals(-53.67656, content.getEstimatedLikelihood(), 0.001);
}
use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.
the class LogisticRegressionContent method load.
/**
* @param parContent the content that holds the internals
* @param spec the data table spec of the training data
* @return a instance with he loaded values
* @throws InvalidSettingsException when data are not well formed
*/
static LogisticRegressionContent load(final ModelContentRO parContent, final DataTableSpec spec) throws InvalidSettingsException {
String target = parContent.getString(CFG_TARGET);
String[] learningCols = parContent.getStringArray(CFG_LEARNING_COLS);
PMMLPortObjectSpec pmmlSpec = createSpec(spec, target, learningCols);
String[] factors = parContent.getStringArray(CFG_FACTORS);
// Since 3.0
final String[] vectorColumns = parContent.getStringArray(CFG_VECTOR_NAMES);
final int[] vectorLengths = parContent.getIntArray(CFG_VECTOR_LENGTHS);
CheckUtils.checkSetting(vectorColumns.length == vectorLengths.length, "The length of vector names and their length do not match: " + vectorColumns.length + " <> " + vectorLengths.length);
Map<String, Integer> vectorLengthMap = new LinkedHashMap<>();
for (int i = 0; i < vectorLengths.length; i++) {
vectorLengthMap.put(vectorColumns[i], vectorLengths[i]);
}
String[] covariates = parContent.getStringArray(CFG_COVARIATES);
double[] coeff = parContent.getDoubleArray(CFG_COEFFICIENTS);
double likelihood = parContent.getDouble(CFG_LOG_LIKELIHOOD);
RealMatrix covMat = null;
if (parContent.getBoolean(CFG_COVMAT_PRESENT)) {
covMat = toMatrix(parContent.getDoubleArray(CFG_COVARIANCE_MATRIX), coeff.length);
}
int iter = parContent.getInt(CFG_ITER);
// introduced in 2.9
DataCell targetReferenceCategory = parContent.getDataCell(CFG_TARGET_REFERENCE_CATEGORY, null);
boolean sortTargetCategories = parContent.getBoolean(CFG_SORT_TARGET_CATEGORIES, true);
boolean sortFactorsCategories = parContent.getBoolean(CFG_SORT_FACTORS_CATEGORIES, true);
return new LogisticRegressionContent(pmmlSpec, Arrays.asList(factors), Arrays.asList(covariates), vectorLengthMap, targetReferenceCategory, sortTargetCategories, sortFactorsCategories, toMatrix(coeff, coeff.length), likelihood, covMat, iter);
}
use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.
the class SVMPredictorNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final PMMLPortObject pmmlModel, final DataTableSpec inSpec) throws InvalidSettingsException {
List<Node> models = pmmlModel.getPMMLValue().getModels(PMMLModelType.SupportVectorMachineModel);
if (models.isEmpty()) {
String msg = "SVM evaluation failed: " + "No support vector machine model found.";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
PMMLSVMTranslator trans = new PMMLSVMTranslator();
pmmlModel.initializeModelTranslator(trans);
List<Svm> svms = trans.getSVMs();
m_svms = svms.toArray(new Svm[svms.size()]);
if (m_addProbabilities.getBooleanValue() == pmmlModel.getSpec().getTargetCols().size() > 0) {
adjustOrder(pmmlModel.getSpec().getTargetCols().get(0));
}
DataTableSpec testSpec = inSpec;
PMMLPortObjectSpec pmmlSpec = pmmlModel.getSpec();
DataTableSpec trainingSpec = pmmlSpec.getDataTableSpec();
// try to find all columns (except the class column)
Vector<Integer> colindices = new Vector<Integer>();
for (DataColumnSpec colspec : trainingSpec) {
if (colspec.getType().isCompatible(DoubleValue.class)) {
int colindex = testSpec.findColumnIndex(colspec.getName());
if (colindex < 0) {
throw new InvalidSettingsException("Column " + "\'" + colspec.getName() + "\' not found" + " in test data");
}
colindices.add(colindex);
}
}
m_colindices = new int[colindices.size()];
for (int i = 0; i < m_colindices.length; i++) {
m_colindices[i] = colindices.get(i);
}
final PredictorHelper predictorHelper = PredictorHelper.getInstance();
final String targetCol = pmmlSpec.getTargetFields().iterator().next();
SVMPredictor svmpredict = new SVMPredictor(targetCol, m_svms, m_colindices, predictorHelper.computePredictionColumnName(m_predictionColumn.getStringValue(), m_overridePrediction.getBooleanValue(), targetCol), m_addProbabilities.getBooleanValue(), m_suffix.getStringValue());
ColumnRearranger colre = new ColumnRearranger(testSpec);
colre.append(svmpredict);
return colre;
}
use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.
the class PredictorHelper method createOutTableSpec.
/**
* Computes the output table's specifaction based on common node settings.
*
* @param dataSpec The input table {@link DataColumnSpec}.
* @param modelSpec The model {@link PMMLPortObjectSpec}.
* @param addProbs Add the probability columns?
* @param predictionCol Custom name of the prediction column.
* @param shouldOverride Should we use that name?
* @param suffix Suffix for probability columns.
* @return The output table {@link DataTableSpec}.
* @throws InvalidSettingsException Invalid settings for the prediction column name.
*/
public DataTableSpec createOutTableSpec(final PortObjectSpec dataSpec, final PortObjectSpec modelSpec, final boolean addProbs, final String predictionCol, final boolean shouldOverride, final String suffix) throws InvalidSettingsException {
CheckUtils.checkSettingNotNull(predictionCol, "Prediction column name cannot be null");
CheckUtils.checkSetting(!predictionCol.trim().isEmpty(), "Prediction column name cannot be empty");
List<DataCell> predValues = null;
if (addProbs) {
predValues = getPredictionValues((PMMLPortObjectSpec) modelSpec);
if (predValues == null) {
// no out spec can be determined
return null;
}
}
int numCols = (predValues == null ? 0 : predValues.size()) + 1;
DataTableSpec inSpec = (DataTableSpec) dataSpec;
DataColumnSpec[] newCols = new DataColumnSpec[numCols];
/* Set bar renderer and domain [0,1] as default for the double cells
* containing the distribution */
// DataColumnProperties propsRendering = new DataColumnProperties(
// Collections.singletonMap(
// DataValueRenderer.PROPERTY_PREFERRED_RENDERER,
// DoubleBarRenderer.DESCRIPTION));
DataColumnDomain domain = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain();
String trainingColumnName = ((PMMLPortObjectSpec) modelSpec).getTargetFields().iterator().next();
// add all distribution columns
for (int i = 0; i < numCols - 1; i++) {
assert predValues != null;
DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(probabilityColumnName(trainingColumnName, predValues.get(i).toString(), suffix), DoubleCell.TYPE);
// colSpecCreator.setProperties(propsRendering);
colSpecCreator.setDomain(domain);
newCols[i] = colSpecCreator.createSpec();
}
// add the prediction column
String predictionColumnName = computePredictionColumnName(predictionCol, shouldOverride, trainingColumnName);
newCols[numCols - 1] = new DataColumnSpecCreator(predictionColumnName, StringCell.TYPE).createSpec();
DataTableSpec newColSpec = new DataTableSpec(newCols);
return new DataTableSpec(inSpec, newColSpec);
}
use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.
the class RegressionPredictorNodeDialogPane method loadSettingsFrom.
/**
* {@inheritDoc}
*/
@Override
protected void loadSettingsFrom(final NodeSettingsRO settings, final PortObjectSpec[] specs) throws NotConfigurableException {
RegressionPredictorSettings s = new RegressionPredictorSettings(m_showProbOptions);
s.loadSettingsForDialog(settings);
m_hasCustomPredictionName.setSelected(s.getHasCustomPredictionName());
PMMLPortObjectSpec portSpec = (PMMLPortObjectSpec) specs[0];
DataTableSpec tableSpec = (DataTableSpec) specs[1];
// check if the model has the correct target type
try {
RegressionPredictorNodeModel.checkModelTargetType(portSpec, m_showProbOptions);
} catch (InvalidSettingsException e1) {
throw new NotConfigurableException(e1.getMessage());
}
if (s.getCustomPredictionName() != null) {
m_customPredictionName.setText(s.getCustomPredictionName());
} else {
try {
DataColumnSpec[] outSpec = RegressionPredictorCellFactory.createColumnSpec(portSpec, tableSpec, new RegressionPredictorSettings(m_showProbOptions));
m_customPredictionName.setText(outSpec[outSpec.length - 1].getName());
} catch (InvalidSettingsException e) {
// Open dialog and give a chance define settings
}
}
if (m_showProbOptions) {
m_includeProbs.setSelected(s.getIncludeProbabilities());
m_probColumnSuffix.setText(s.getPropColumnSuffix());
}
updateEnableState();
}
Aggregations