use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.
the class NaiveBayesCellFactory method createPredictedClassColSpec.
private static DataColumnSpec createPredictedClassColSpec(final String classColumnName, final DataType classType, final DataTableSpec inSpec) {
final String colName = DataTableSpec.getUniqueColumnName(inSpec, classColumnName);
// we have to do this back and forth conversion because long data cells are converted into double by PMML
// that is why we convert the KNIME type to PMML to see what PMML uses as type and then use the PMML type
// to inver the right KNIME type
final DataType pmmlConformDataType = PMMLDataDictionaryTranslator.getKNIMEDataType(PMMLDataDictionaryTranslator.getPMMLDataType(classType));
final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(colName, pmmlConformDataType);
final DataColumnSpec classColSpec = colSpecCreator.createSpec();
return classColSpec;
}
use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.
the class NaiveBayesCellFactory method createResultColSpecs.
/**
* Creates the column specification of the result columns and returns
* them in the order they should be appended to the original table
* specification.
* @param model the {@link NaiveBayesModel} to use
* @param predictionColName the name of the prediction column
* @param inSpec the <code>DataTableSpec</code> of the input data to check
* if the winner column name already exists
* @param inclClassProbVals if the probability values should be displayed
* @param suffix the suffix for the probability columns
* @return <code>DataColumnSpec[]</code> with the column specifications
* of the result columns
*/
private static DataColumnSpec[] createResultColSpecs(final NaiveBayesModel model, final String predictionColName, final DataTableSpec inSpec, final boolean inclClassProbVals, final String suffix) {
final DataColumnSpec classColSpec = createPredictedClassColSpec(predictionColName, model.getClassColumnDataType(), inSpec);
if (!inclClassProbVals) {
return new DataColumnSpec[] { classColSpec };
}
final List<String> classValues = model.getSortedClassValues();
final Collection<DataColumnSpec> colSpecs = new ArrayList<>(classValues.size() + 1);
final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator("dummy", DoubleCell.TYPE);
final PredictorHelper predictorHelper = PredictorHelper.getInstance();
for (final String classVal : classValues) {
colSpecCreator.setName(predictorHelper.probabilityColumnName(model.getClassColumnName(), classVal, suffix));
colSpecs.add(colSpecCreator.createSpec());
}
colSpecs.add(classColSpec);
return colSpecs.toArray(new DataColumnSpec[0]);
}
use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.
the class BasisFunctionLearnerNodeModel method execute.
/**
* Starts the learning algorithm in the learner.
*
* @param inData the input training data at index 0
* @param exec the execution monitor
* @return the output fuzzy rule model
* @throws CanceledExecutionException if the training was canceled
*/
@Override
public PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws CanceledExecutionException {
BufferedDataTable data = (BufferedDataTable) inData[0];
// find all double cell columns in the data
DataTableSpec tSpec = data.getDataTableSpec();
LinkedHashSet<String> columns = new LinkedHashSet<String>(tSpec.getNumColumns());
List<String> targetHash = Arrays.asList(m_targetColumns);
for (int c = 0; c < tSpec.getNumColumns(); c++) {
DataColumnSpec cSpec = tSpec.getColumnSpec(c);
String name = cSpec.getName();
if (!targetHash.contains(name)) {
// TODO only numeric columns allowed
if (cSpec.getType().isCompatible(DoubleValue.class)) {
columns.add(cSpec.getName());
}
}
}
// get all data columns without target columns
String[] dataCols = BasisFunctionFactory.findDataColumns(tSpec, targetHash);
columns.addAll(Arrays.asList(dataCols));
// add target columns at the end
columns.addAll(Arrays.asList(m_targetColumns));
// filter selected columns from input data
String[] cols = columns.toArray(new String[] {});
ColumnRearranger colRe = new ColumnRearranger(tSpec);
colRe.keepOnly(cols);
BufferedDataTable trainData = exec.createColumnRearrangeTable(data, colRe, exec);
// print settings info
LOGGER.debug("distance : " + getDistance());
LOGGER.debug("missing : " + getMissingFct());
LOGGER.debug("target columns: " + Arrays.toString(m_targetColumns));
LOGGER.debug("shrink commit : " + isShrinkAfterCommit());
LOGGER.debug("max coverage : " + isMaxClassCoverage());
LOGGER.debug("max no. epochs: " + m_maxEpochs);
// create factory
BasisFunctionFactory factory = getFactory(trainData.getDataTableSpec());
// start training
BasisFunctionLearnerTable table = new BasisFunctionLearnerTable(trainData, dataCols, m_targetColumns, factory, BasisFunctionLearnerTable.MISSINGS[m_missing], m_shrinkAfterCommit, m_maxCoverage, m_maxEpochs, exec);
DataTableSpec modelSpec = table.getDataTableSpec();
DataColumnSpec[] modelSpecs = new DataColumnSpec[modelSpec.getNumColumns()];
for (int i = 0; i < modelSpecs.length; i++) {
DataColumnSpecCreator creator = new DataColumnSpecCreator(modelSpec.getColumnSpec(i));
creator.removeAllHandlers();
modelSpecs[i] = creator.createSpec();
}
// set translator mapping
m_translator.setMapper(table.getHiLiteMapper());
ModelContent modelInfo = new ModelContent(MODEL_INFO);
table.saveInfos(modelInfo);
m_modelInfo = modelInfo;
// return rules[0] and rule_model[1]
return new PortObject[] { exec.createBufferedDataTable(table, exec), createPortObject(new BasisFunctionModelContent(table.getDataTableSpec(), table.getBasisFunctions())) };
}
use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.
the class OneMissingValueReplacementFunction method getDataTableSpec.
/**
* {@inheritDoc}
*/
@Override
public DataTableSpec getDataTableSpec() {
DataTableSpec spec = getFactory().getModelSpec();
final int idx = spec.getNumColumns() - 5;
final DataColumnSpec cspec = spec.getColumnSpec(idx);
DataColumnSpecCreator cr = new DataColumnSpecCreator(cspec);
TreeSet<DataCell> domValues = new TreeSet<DataCell>(cspec.getType().getComparator());
domValues.addAll(m_bfs.keySet());
if (cspec.getDomain().hasValues()) {
domValues.addAll(cspec.getDomain().getValues());
}
cr.setDomain(new DataColumnDomainCreator(domValues).createDomain());
ColumnRearranger colre = new ColumnRearranger(spec);
colre.replace(new SingleCellFactory(cr.createSpec()) {
@Override
public DataCell getCell(final DataRow row) {
return row.getCell(idx);
}
}, idx);
return colre.createSpec();
}
use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.
the class RegressionPredictorCellFactory method createColumnSpec.
/**
* Creates the spec of the output if possible.
*
* @param portSpec the spec of the pmml input port
* @param tableSpec the spec of the data input port
* @param settings settings for the predictor node
* @return The spec of the output or null
* @throws InvalidSettingsException when tableSpec and portSpec do not match
*/
public static DataColumnSpec[] createColumnSpec(final PMMLPortObjectSpec portSpec, final DataTableSpec tableSpec, final RegressionPredictorSettings settings) throws InvalidSettingsException {
// Assertions
if (portSpec.getTargetCols().isEmpty()) {
throw new InvalidSettingsException("The general regression model" + " does not specify a target column.");
}
for (DataColumnSpec learningColSpec : portSpec.getLearningCols()) {
String learningCol = learningColSpec.getName();
if (tableSpec.containsName(learningCol)) {
DataColumnSpec colSpec = tableSpec.getColumnSpec(learningCol);
if (learningColSpec.getType().isCompatible(NominalValue.class)) {
if (!colSpec.getType().isCompatible(BitVectorValue.class) && !colSpec.getType().isCompatible(ByteVectorValue.class) && !colSpec.getType().isCompatible(NominalValue.class)) {
throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be compatible with " + "\"NominalValue\".");
}
} else if (learningColSpec.getType().isCompatible(DoubleValue.class) && !colSpec.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be numeric.");
}
} else {
throw new InvalidSettingsException("The table for prediction " + "does not contain the column \"" + learningCol + "\".");
}
}
// The list of added columns
List<DataColumnSpec> newColsSpec = new ArrayList<DataColumnSpec>();
String targetCol = portSpec.getTargetFields().get(0);
DataColumnSpec targetColSpec = portSpec.getDataTableSpec().getColumnSpec(targetCol);
if (settings.getIncludeProbabilities() && targetColSpec.getType().isCompatible(NominalValue.class)) {
if (!targetColSpec.getDomain().hasValues()) {
return null;
}
List<DataCell> targetCategories = new ArrayList<DataCell>();
targetCategories.addAll(targetColSpec.getDomain().getValues());
for (DataCell value : targetCategories) {
String name = "P (" + targetCol + "=" + value.toString() + ")" + settings.getPropColumnSuffix();
String newColName = DataTableSpec.getUniqueColumnName(tableSpec, name);
DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(newColName, DoubleCell.TYPE);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0));
colSpecCreator.setDomain(domainCreator.createDomain());
newColsSpec.add(colSpecCreator.createSpec());
}
}
String targetColName = settings.getHasCustomPredictionName() ? settings.getCustomPredictionName() : "Prediction (" + targetCol + ")";
String uniqueTargetColName = DataTableSpec.getUniqueColumnName(tableSpec, targetColName);
DataType targetType = targetColSpec.getType().isCompatible(NominalValue.class) ? targetColSpec.getType() : DoubleCell.TYPE;
DataColumnSpecCreator targetColSpecCreator = new DataColumnSpecCreator(uniqueTargetColName, targetType);
if (targetColSpec.getType().isCompatible(NominalValue.class)) {
DataColumnDomainCreator targetDomainCreator = new DataColumnDomainCreator(targetColSpec.getDomain());
targetColSpecCreator.setDomain(targetDomainCreator.createDomain());
}
newColsSpec.add(targetColSpecCreator.createSpec());
return newColsSpec.toArray(new DataColumnSpec[0]);
}
Aggregations