use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.
the class PolyRegLearnerNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
DataTableSpec tableSpec = (DataTableSpec) inSpecs[0];
PMMLPortObjectSpec pmmlSpec = m_pmmlInEnabled ? (PMMLPortObjectSpec) inSpecs[1] : null;
String[] selectedCols = computeSelectedColumns(tableSpec);
m_columnNames = selectedCols;
for (String colName : selectedCols) {
DataColumnSpec dcs = tableSpec.getColumnSpec(colName);
if (dcs == null) {
throw new InvalidSettingsException("Selected column '" + colName + "' does not exist in input table");
}
if (!dcs.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Selected column '" + dcs.getName() + "' from the input table is not a numeric column.");
}
}
if (m_settings.getTargetColumn() == null) {
throw new InvalidSettingsException("No target column selected");
}
if (tableSpec.findColumnIndex(m_settings.getTargetColumn()) == -1) {
throw new InvalidSettingsException("Target column '" + m_settings.getTargetColumn() + "' does not exist.");
}
DataColumnSpecCreator crea = new DataColumnSpecCreator("PolyReg prediction", DoubleCell.TYPE);
DataColumnSpec col1 = crea.createSpec();
crea = new DataColumnSpecCreator("Prediction Error", DoubleCell.TYPE);
DataColumnSpec col2 = crea.createSpec();
return new PortObjectSpec[] { createModelSpec(pmmlSpec, tableSpec), AppendedColumnTable.getTableSpec(tableSpec, col1, col2), STATS_SPEC };
}
use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.
the class PolyRegLearnerNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getDataTableSpec();
final int colCount = inSpec.getNumColumns();
String[] selectedCols = computeSelectedColumns(inSpec);
Set<String> hash = new HashSet<String>(Arrays.asList(selectedCols));
m_colSelected = new boolean[colCount];
for (int i = 0; i < colCount; i++) {
m_colSelected[i] = hash.contains(inTable.getDataTableSpec().getColumnSpec(i).getName());
}
final int rowCount = inTable.getRowCount();
String[] temp = new String[m_columnNames.length + 1];
System.arraycopy(m_columnNames, 0, temp, 0, m_columnNames.length);
temp[temp.length - 1] = m_settings.getTargetColumn();
FilterColumnTable filteredTable = new FilterColumnTable(inTable, temp);
final DataArray rowContainer = new DefaultDataArray(filteredTable, 1, m_settings.getMaxRowsForView());
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[1] : null;
PortObjectSpec[] outputSpec = configure((inPMMLPort == null) ? new PortObjectSpec[] { inData[0].getSpec(), null } : new PortObjectSpec[] { inData[0].getSpec(), inPMMLPort.getSpec() });
Learner learner = new Learner((PMMLPortObjectSpec) outputSpec[0], 0d, m_settings.getMissingValueHandling() == MissingValueHandling.fail, m_settings.getDegree());
try {
PolyRegContent polyRegContent = learner.perform(inTable, exec);
m_betas = fillBeta(polyRegContent);
m_meanValues = polyRegContent.getMeans();
ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
PortObject[] bdt = new PortObject[] { createPMMLModel(inPMMLPort, inSpec), exec.createColumnRearrangeTable(inTable, crea, exec.createSilentSubExecutionContext(.2)), polyRegContent.createTablePortObject(exec.createSubExecutionContext(0.2)) };
m_squaredError /= rowCount;
if (polyRegContent.getWarningMessage() != null) {
setWarningMessage(polyRegContent.getWarningMessage());
}
double[] stdErrors = PolyRegViewData.mapToArray(polyRegContent.getStandardErrors(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptStdErr());
double[] tValues = PolyRegViewData.mapToArray(polyRegContent.getTValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptTValue());
double[] pValues = PolyRegViewData.mapToArray(polyRegContent.getPValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptPValue());
m_viewData = new PolyRegViewData(m_meanValues, m_betas, stdErrors, tValues, pValues, m_squaredError, polyRegContent.getAdjustedRSquared(), m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
return bdt;
} catch (ModelSpecificationException e) {
final String origWarning = getWarningMessage();
final String warning = (origWarning != null && !origWarning.isEmpty()) ? (origWarning + "\n") : "" + e.getMessage();
setWarningMessage(warning);
final ExecutionContext subExec = exec.createSubExecutionContext(.1);
final BufferedDataContainer empty = subExec.createDataContainer(STATS_SPEC);
int rowIdx = 1;
for (final String column : m_columnNames) {
for (int d = 1; d <= m_settings.getDegree(); ++d) {
empty.addRowToTable(new DefaultRow("Row" + rowIdx++, new StringCell(column), new IntCell(d), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
}
}
empty.addRowToTable(new DefaultRow("Row" + rowIdx, new StringCell("Intercept"), new IntCell(0), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
double[] nans = new double[m_columnNames.length * m_settings.getDegree() + 1];
Arrays.fill(nans, Double.NaN);
m_betas = new double[nans.length];
// Mean only for the linear tags
m_meanValues = new double[nans.length / m_settings.getDegree()];
m_viewData = new PolyRegViewData(m_meanValues, m_betas, nans, nans, nans, m_squaredError, Double.NaN, m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
empty.close();
ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
BufferedDataTable rearrangerTable = exec.createColumnRearrangeTable(inTable, crea, exec.createSubProgress(0.6));
PMMLPortObject model = createPMMLModel(inPMMLPort, inTable.getDataTableSpec());
PortObject[] bdt = new PortObject[] { model, rearrangerTable, empty.getTable() };
return bdt;
}
}
use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.
the class ClusterAssignerNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
PMMLPortObjectSpec spec = ((PMMLPortObjectSpec) inSpecs[PMML_PORT]);
DataTableSpec dataSpec = (DataTableSpec) inSpecs[DATA_PORT];
ColumnRearranger colre = new ColumnRearranger(dataSpec);
colre.append(new ClusterAssignFactory(null, null, createNewOutSpec(dataSpec), findLearnedColumnIndices(dataSpec, new HashSet<String>(spec.getLearningFields()))));
DataTableSpec out = colre.createSpec();
return new DataTableSpec[] { out };
}
use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.
the class ClusterNodeModel method configure.
/**
* Returns <code>true</code> always and passes the current input spec to
* the output spec which is identical to the input specification - after
* all, we are building cluster centers in the original feature space.
*
* @param inSpecs the specifications of the input port(s) - should be one
* @return the copied input spec
* @throws InvalidSettingsException if PMML incompatible type was found
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
DataTableSpec spec = (DataTableSpec) inSpecs[0];
// input is output spec with all double compatible values set to
// Double.
m_dimension = spec.getNumColumns();
// Find out which columns we can use (must be Double compatible)
// Note that, for simplicity, we still use the entire dimensionality
// for cluster prototypes below and simply ignore useless columns.
m_ignoreColumn = new boolean[m_dimension];
m_nrIgnoredColumns = 0;
LinkedList<String> includes = new LinkedList<String>();
includes.addAll(m_usedColumns.getIncludeList());
LinkedList<String> excludes = new LinkedList<String>();
excludes.addAll(m_usedColumns.getExcludeList());
LinkedList<String> includes2 = new LinkedList<String>();
includes2.addAll(m_usedColumns.getIncludeList());
LinkedList<String> excludes2 = new LinkedList<String>();
excludes2.addAll(m_usedColumns.getExcludeList());
// First check if all incoming columns are either excluded or included
for (String col : spec.getColumnNames()) {
if (m_usedColumns.getIncludeList().contains(col)) {
includes2.remove(col);
} else if (m_usedColumns.getExcludeList().contains(col)) {
excludes2.remove(col);
} else {
includes.add(col);
}
}
// Leftover included columns that do not exist in the incoming table
for (String col : includes2) {
includes.remove(col);
}
// Same for excluded columns
for (String col : excludes2) {
excludes.remove(col);
}
m_usedColumns.setExcludeList(excludes);
m_usedColumns.setIncludeList(includes);
if (m_usedColumns.isKeepAllSelected()) {
boolean hasNumericColumn = false;
for (DataColumnSpec colSpec : spec) {
if (colSpec.getType().isCompatible(DoubleValue.class)) {
hasNumericColumn = true;
break;
}
}
if (!hasNumericColumn) {
throw new InvalidSettingsException("No numeric columns in input");
}
} else {
// double compatible columns
if (m_usedColumns.getIncludeList().size() == 0 && m_usedColumns.getExcludeList().size() == 0) {
List<String> includedColumns = new ArrayList<String>();
List<String> excludedColumns = new ArrayList<String>();
for (int i = 0; i < spec.getNumColumns(); i++) {
DataColumnSpec colSpec = spec.getColumnSpec(i);
if (colSpec.getType().isCompatible(DoubleValue.class)) {
includedColumns.add(colSpec.getName());
} else {
excludedColumns.add(colSpec.getName());
}
}
// set all double compatible columns as include list
m_usedColumns.setIncludeList(includedColumns);
m_usedColumns.setExcludeList(excludedColumns);
}
// check if some columns are included
if (m_usedColumns.getIncludeList().size() <= 0) {
setWarningMessage("No column in include list! Produces one huge cluster");
}
}
addExcludeColumnsToIgnoreList(spec);
DataTableSpec appendedSpec = createAppendedSpec(spec);
// return spec for data and model outport!
PMMLPortObjectSpec pmmlSpec;
if (m_pmmlInEnabled) {
pmmlSpec = (PMMLPortObjectSpec) inSpecs[1];
} else {
pmmlSpec = new PMMLPortObjectSpecCreator(spec).createSpec();
}
if (m_outputCenters) {
return new PortObjectSpec[] { appendedSpec, createClusterCentersSpec(spec), createPMMLSpec(pmmlSpec, spec) };
} else {
return new PortObjectSpec[] { appendedSpec, createPMMLSpec(pmmlSpec, spec) };
}
}
use of org.knime.core.node.port.pmml.PMMLPortObjectSpec in project knime-core by knime.
the class FuzzyClusterNodeModel method execute.
/**
* Generate new clustering based on InputDataTable and specified number of
* clusters. In the output table, you will find the datarow with
* supplementary information about the membership to each cluster center.
* OUTPORT = original datarows with cluster membership information
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable indata = (BufferedDataTable) inData[0];
m_clusters = null;
m_betweenClusterVariation = Double.NaN;
m_withinClusterVariation = null;
if (m_noise) {
if (m_calculateDelta) {
if (m_memory) {
m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
} else {
m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
}
} else {
if (m_memory) {
m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
} else {
m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
}
}
} else {
if (m_memory) {
m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier);
} else {
m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier);
}
}
int nrRows = indata.getRowCount();
DataTableSpec spec = indata.getDataTableSpec();
int nrCols = spec.getNumColumns();
List<String> learningCols = new LinkedList<String>();
List<String> ignoreCols = new LinkedList<String>();
// counter for included columns
int z = 0;
final int[] columns = new int[m_list.size()];
for (int i = 0; i < nrCols; i++) {
// if include does contain current column name
String colname = spec.getColumnSpec(i).getName();
if (m_list.contains(colname)) {
columns[z] = i;
z++;
learningCols.add(colname);
} else {
ignoreCols.add(colname);
}
}
ColumnRearranger colre = new ColumnRearranger(spec);
colre.keepOnly(columns);
BufferedDataTable filteredtable = exec.createColumnRearrangeTable(indata, colre, exec);
// get dimension of feature space
int dimension = filteredtable.getDataTableSpec().getNumColumns();
Random random = new Random();
if (m_useRandomSeed) {
random.setSeed(m_randomSeed);
}
m_fcmAlgo.init(nrRows, dimension, filteredtable, random);
// main loop - until clusters stop changing or maxNrIterations reached
int currentIteration = 0;
double totalchange = Double.MAX_VALUE;
while ((totalchange > 1e-7) && (currentIteration < m_maxNrIterations)) {
exec.checkCanceled();
exec.setProgress((double) currentIteration / (double) m_maxNrIterations, "Iteration " + currentIteration + " Total change of prototypes: " + totalchange);
totalchange = m_fcmAlgo.doOneIteration(exec);
currentIteration++;
}
if (m_measures) {
double[][] data = null;
if (m_fcmAlgo instanceof FCMAlgorithmMemory) {
data = ((FCMAlgorithmMemory) m_fcmAlgo).getConvertedData();
} else {
data = new double[nrRows][m_fcmAlgo.getDimension()];
int curRow = 0;
for (DataRow dRow : filteredtable) {
for (int j = 0; j < dRow.getNumCells(); j++) {
if (!(dRow.getCell(j).isMissing())) {
DoubleValue dv = (DoubleValue) dRow.getCell(j);
data[curRow][j] = dv.getDoubleValue();
} else {
data[curRow][j] = 0;
}
}
curRow++;
}
}
m_fcmmeasures = new FCMQualityMeasures(m_fcmAlgo.getClusterCentres(), m_fcmAlgo.getweightMatrix(), data, m_fuzzifier);
}
ColumnRearranger colRearranger = new ColumnRearranger(spec);
CellFactory membershipFac = new ClusterMembershipFactory(m_fcmAlgo);
colRearranger.append(membershipFac);
BufferedDataTable result = exec.createColumnRearrangeTable(indata, colRearranger, exec);
// don't write out the noise cluster!
double[][] clustercentres = m_fcmAlgo.getClusterCentres();
if (m_noise) {
double[][] cleaned = new double[clustercentres.length - 1][];
for (int i = 0; i < cleaned.length; i++) {
cleaned[i] = new double[clustercentres[i].length];
System.arraycopy(clustercentres[i], 0, cleaned[i], 0, clustercentres[i].length);
}
clustercentres = cleaned;
}
exec.setMessage("Creating PMML cluster model...");
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_enablePMMLInput ? (PMMLPortObject) inData[1] : null;
PMMLPortObjectSpec inPMMLSpec = null;
if (inPMMLPort != null) {
inPMMLSpec = inPMMLPort.getSpec();
}
PMMLPortObjectSpec pmmlOutSpec = createPMMLPortObjectSpec(inPMMLSpec, spec, learningCols);
PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrClusters, clustercentres, null, new LinkedHashSet<String>(pmmlOutSpec.getLearningFields())));
return new PortObject[] { result, outPMMLPort };
}
Aggregations