use of org.knime.core.data.DataColumnSpec in project knime-core by knime.
the class RegressionTreePredictorCellFactory method createFactory.
/**
* @param predictor
* @return factory based on RegressionTreePredictor <b>predictor</b>
* @throws InvalidSettingsException
*/
public static RegressionTreePredictorCellFactory createFactory(final RegressionTreePredictor predictor) throws InvalidSettingsException {
DataTableSpec testDataSpec = predictor.getDataSpec();
RegressionTreeModelPortObjectSpec modelSpec = predictor.getModelSpec();
RegressionTreePredictorConfiguration configuration = predictor.getConfiguration();
UniqueNameGenerator nameGen = new UniqueNameGenerator(testDataSpec);
List<DataColumnSpec> newColsList = new ArrayList<DataColumnSpec>();
String targetColName = configuration.getPredictionColumnName();
DataColumnSpec targetCol = nameGen.newColumn(targetColName, DoubleCell.TYPE);
newColsList.add(targetCol);
DataColumnSpec[] newCols = newColsList.toArray(new DataColumnSpec[newColsList.size()]);
int[] learnColumnInRealDataIndices = modelSpec.calculateFilterIndices(testDataSpec);
return new RegressionTreePredictorCellFactory(predictor, newCols, learnColumnInRealDataIndices);
}
use of org.knime.core.data.DataColumnSpec in project knime-core by knime.
the class GradientBoostingPMMLPredictorNodeModel method translateSpec.
private TreeEnsembleModelPortObjectSpec translateSpec(final PMMLPortObjectSpec pmmlSpec) {
DataTableSpec pmmlDataSpec = pmmlSpec.getDataTableSpec();
ColumnRearranger cr = new ColumnRearranger(pmmlDataSpec);
List<DataColumnSpec> targets = pmmlSpec.getTargetCols();
CheckUtils.checkArgument(!targets.isEmpty(), "The provided PMML does not declare a target field.");
CheckUtils.checkArgument(targets.size() == 1, "The provided PMML declares multiple target. " + "This behavior is currently not supported.");
cr.move(targets.get(0).getName(), pmmlDataSpec.getNumColumns());
return new TreeEnsembleModelPortObjectSpec(cr.createSpec());
}
use of org.knime.core.data.DataColumnSpec in project knime-core by knime.
the class TreeEnsembleLearnerConfiguration method checkColumnSelection.
/**
* To be used in the configure of the learner nodes. Checks if the column selection makes sense and throws an
* InvalidSettingsException otherwise. The sanity checks include: <br>
* Existence and type check of fingerprint columns if specified. <br>
* Check if any attributes are selected if no fingerprint column is used for learning.
*
* @param inSpec Spec of the incoming table
* @throws InvalidSettingsException thrown if the column selection makes no sense
*/
public void checkColumnSelection(final DataTableSpec inSpec) throws InvalidSettingsException {
FilterResult filterResult = m_columnFilterConfig.applyTo(inSpec);
if (m_fingerprintColumn != null) {
DataColumnSpec colSpec = inSpec.getColumnSpec(m_fingerprintColumn);
if (colSpec == null) {
throw new InvalidSettingsException("The fingerprint column is not contained in the incoming table.");
}
DataType colType = colSpec.getType();
if (!(colType.isCompatible(BitVectorValue.class) || colType.isCompatible(ByteVectorValue.class) || colType.isCompatible(DoubleVectorValue.class))) {
throw new InvalidSettingsException("The specified fingerprint column is not of a compatible vector type.");
}
} else if (filterResult.getIncludes().length > 0) {
// ok, there are some features selected
} else {
throw new InvalidSettingsException("No attributes are selected.");
}
}
use of org.knime.core.data.DataColumnSpec in project knime-core by knime.
the class TreeEnsembleLearnerConfiguration method filterLearnColumns.
/**
* @param spec
* @return ColumnRearranger that filters out all columns not part of the learning columns.
* @throws InvalidSettingsException
*/
public FilterLearnColumnRearranger filterLearnColumns(final DataTableSpec spec) throws InvalidSettingsException {
// (ColumnRearranger is a final class in v2.5)
if (m_targetColumn == null) {
throw new InvalidSettingsException("Target column not set");
}
DataColumnSpec targetCol = spec.getColumnSpec(m_targetColumn);
if (targetCol == null || !targetCol.getType().isCompatible(getRequiredTargetClass())) {
throw new InvalidSettingsException("Target column \"" + m_targetColumn + "\" does not exist or is not of the " + "correct type");
}
FilterResult filterResult = m_columnFilterConfig.applyTo(spec);
List<String> noDomainColumns = new ArrayList<String>();
FilterLearnColumnRearranger rearranger = new FilterLearnColumnRearranger(spec);
if (m_fingerprintColumn == null) {
// use ordinary data
Set<String> incl = new HashSet<String>(Arrays.asList(filterResult.getIncludes()));
// the target column can possibly show up in the include list of the filter result
// therefore we have to remove it
incl.remove(targetCol.getName());
for (DataColumnSpec col : spec) {
String colName = col.getName();
if (colName.equals(m_targetColumn)) {
continue;
}
DataType type = col.getType();
boolean ignoreColumn = false;
boolean isAppropriateType = type.isCompatible(DoubleValue.class) || type.isCompatible(NominalValue.class);
if (incl.remove(colName)) {
// accept unless type mismatch
if (!isAppropriateType) {
throw new InvalidSettingsException("Attribute column \"" + colName + "\" is " + "not of the expected type (must be " + "numeric or nominal).");
} else if (shouldIgnoreLearnColumn(col)) {
ignoreColumn = true;
noDomainColumns.add(colName);
} else {
// accept
}
} else {
ignoreColumn = true;
}
// }
if (ignoreColumn) {
rearranger.remove(colName);
}
}
if (rearranger.getColumnCount() <= 1) {
StringBuilder b = new StringBuilder("Input table has no valid " + "learning columns (need one additional numeric or " + "nominal column).");
if (!noDomainColumns.isEmpty()) {
b.append(" ").append(noDomainColumns.size());
b.append(" column(s) were ignored due to missing domain ");
b.append("information -- execute predecessor and/or ");
b.append(" use Domain Calculator node.");
throw new InvalidSettingsException(b.toString());
}
}
if (/*!m_includeAllColumns &&*/
!incl.isEmpty()) {
StringBuilder missings = new StringBuilder();
int i = 0;
for (Iterator<String> it = incl.iterator(); it.hasNext() && i < 4; i++) {
String s = it.next();
missings.append(i > 0 ? ", " : "").append(s);
it.remove();
}
if (!incl.isEmpty()) {
missings.append(",...").append(incl.size()).append(" more");
}
throw new InvalidSettingsException("Some selected attributes " + "are not present in the input table: " + missings);
}
} else {
// use fingerprint data
DataColumnSpec fpCol = spec.getColumnSpec(m_fingerprintColumn);
if (fpCol == null || !(fpCol.getType().isCompatible(BitVectorValue.class) || fpCol.getType().isCompatible(ByteVectorValue.class) || fpCol.getType().isCompatible(DoubleVectorValue.class))) {
throw new InvalidSettingsException("Fingerprint columnn \"" + m_fingerprintColumn + "\" does not exist or is not " + "of correct type.");
}
rearranger.keepOnly(m_targetColumn, m_fingerprintColumn);
}
rearranger.move(m_targetColumn, rearranger.getColumnCount());
String warn = null;
if (!noDomainColumns.isEmpty()) {
StringBuilder b = new StringBuilder();
b.append(noDomainColumns.size());
b.append(" column(s) were ignored due to missing domain");
b.append(" information: [");
int index = 0;
for (String s : noDomainColumns) {
if (index > 3) {
b.append(", ...");
break;
}
if (index > 0) {
b.append(", ");
}
b.append("\"").append(s).append("\"");
index++;
}
b.append("] -- change the node configuration or use a");
b.append(" Domain Calculator node to fix it");
warn = b.toString();
}
rearranger.setWarning(warn);
return rearranger;
}
use of org.knime.core.data.DataColumnSpec in project knime-core by knime.
the class AttributeSelectionPanel method loadSettingsFrom.
/**
* Load settings from config <b>cfg</b>
*
* @param inSpec
* @param cfg
* @throws NotConfigurableException
*/
public void loadSettingsFrom(final DataTableSpec inSpec, final TreeEnsembleLearnerConfiguration cfg) throws NotConfigurableException {
// disabled automatic propagation of table specs
m_lastTableSpec = null;
int nrNominalCols = 0;
int nrNumericCols = 0;
for (DataColumnSpec col : inSpec) {
DataType type = col.getType();
if (type.isCompatible(NominalValue.class)) {
nrNominalCols += 1;
} else if (type.isCompatible(DoubleValue.class)) {
nrNumericCols += 1;
}
}
boolean hasOrdinaryColumnsInInput = nrNominalCols > 1 || nrNumericCols > 0;
boolean hasFPColumnInInput = inSpec.containsCompatibleType(BitVectorValue.class) || inSpec.containsCompatibleType(ByteVectorValue.class) || inSpec.containsCompatibleType(DoubleVectorValue.class);
m_targetColumnBox.update(inSpec, cfg.getTargetColumn());
DataTableSpec attSpec = removeColumn(inSpec, m_targetColumnBox.getSelectedColumn());
String fpColumn = cfg.getFingerprintColumn();
m_useOrdinaryColumnsRadio.setEnabled(true);
m_useFingerprintColumnRadio.setEnabled(true);
// default, fix later
m_useOrdinaryColumnsRadio.doClick();
if (hasOrdinaryColumnsInInput) {
m_includeColumnsFilterPanel2.loadConfiguration(cfg.getColumnFilterConfig(), attSpec);
} else {
m_useOrdinaryColumnsRadio.setEnabled(false);
m_useFingerprintColumnRadio.doClick();
m_includeColumnsFilterPanel2.loadConfiguration(cfg.getColumnFilterConfig(), NO_VALID_INPUT_SPEC);
}
if (hasFPColumnInInput) {
m_fingerprintColumnBox.update(inSpec, fpColumn);
} else {
m_useOrdinaryColumnsRadio.doClick();
m_fingerprintColumnBox.update(NO_VALID_INPUT_SPEC, "");
m_useFingerprintColumnRadio.setEnabled(false);
fpColumn = null;
}
if (fpColumn != null || !hasOrdinaryColumnsInInput) {
m_useFingerprintColumnRadio.doClick();
} else {
m_useOrdinaryColumnsRadio.doClick();
}
boolean ignoreColsNoDomain = cfg.isIgnoreColumnsWithoutDomain();
m_ignoreColumnsWithoutDomainChecker.setSelected(ignoreColsNoDomain);
int hiliteCount = cfg.getNrHilitePatterns();
if (hiliteCount > 0) {
m_enableHiliteChecker.setSelected(true);
m_hiliteCountSpinner.setValue(hiliteCount);
} else {
m_enableHiliteChecker.setSelected(false);
m_hiliteCountSpinner.setValue(2000);
}
m_saveTargetDistributionInNodesChecker.setSelected(cfg.isSaveTargetDistributionInNodes());
m_lastTableSpec = inSpec;
}
Aggregations