use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.
the class ColumnAggregatorNodeModel method createRearranger.
private ColumnRearranger createRearranger(final DataTableSpec oSpec, final CellFactory cellFactory) {
final ColumnRearranger cr = new ColumnRearranger(oSpec);
cr.append(cellFactory);
final FilterResult filterResult = m_aggregationCols.applyTo(oSpec);
if (m_removeAggregationCols.getBooleanValue()) {
cr.remove(filterResult.getIncludes());
}
if (m_removeRetainedCols.getBooleanValue()) {
cr.remove(filterResult.getExcludes());
}
return cr;
}
use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.
the class CorrelationComputeNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
DataTableSpec in = (DataTableSpec) inSpecs[0];
if (!in.containsCompatibleType(DoubleValue.class) && !in.containsCompatibleType(NominalValue.class)) {
throw new InvalidSettingsException("No double or nominal compatible columns in input");
}
final String[] includes;
if (m_columnFilterModel == null) {
m_columnFilterModel = createColumnFilterModel();
// auto-configure, no previous configuration
m_columnFilterModel.loadDefaults(in);
includes = m_columnFilterModel.applyTo(in).getIncludes();
setWarningMessage("Auto configuration: Using all suitable " + "columns (in total " + includes.length + ")");
} else {
FilterResult applyTo = m_columnFilterModel.applyTo(in);
includes = applyTo.getIncludes();
}
if (includes.length == 0) {
throw new InvalidSettingsException("No columns selected");
}
return new PortObjectSpec[] { PMCCPortObjectAndSpec.createOutSpec(includes), new PMCCPortObjectAndSpec(includes) };
}
use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.
the class RoundDoubleNodeModel method createColumnRearranger.
/**
* {@inheritDoc}
*/
@Override
public ColumnRearranger createColumnRearranger(final DataTableSpec dataSpec) throws InvalidSettingsException {
//
// / SPEC CHECKS
//
FilterResult filteredCols = m_filterDoubleColModel.applyTo(dataSpec);
// check for at least one double column in input data table spec
if (filteredCols.getIncludes().length == 0) {
throw new InvalidSettingsException("There are no columns containing double values in the input table!");
}
// check if all included columns are available in the spec
String[] unknownCols = filteredCols.getRemovedFromIncludes();
if (unknownCols.length == 1) {
setWarningMessage("Column \"" + unknownCols[0] + "\" is not available.");
} else if (unknownCols.length > 1) {
setWarningMessage("" + unknownCols.length + " selected columns are not available anymore.");
}
//
// / CREATE COLUMN REARRANGER
//
// parameters
int precision = m_numberPrecisionModel.getIntValue();
boolean append = m_appendColumnsModel.getBooleanValue();
RoundingMode roundingMode = RoundingMode.valueOf(m_roundingModeModel.getStringValue());
NumberMode numberMode = NumberMode.valueByDescription(m_numberModeModel.getStringValue());
final RoundOutputType outputType = RoundOutputType.valueByTextLabel(m_outputTypeModel.getStringValue());
String colSuffix = m_columnSuffixModel.getStringValue();
// get array of indices of included columns
int[] includedColIndices = getIncludedColIndices(dataSpec, filteredCols.getIncludes());
ColumnRearranger cR = new ColumnRearranger(dataSpec);
// create spec of new output columns
DataColumnSpec[] newColsSpecs = getNewColSpecs(append, colSuffix, outputType, filteredCols.getIncludes(), dataSpec);
// Pass all necessary parameters to the cell factory, which rounds
// the values and creates new cells to replace or append.
RoundDoubleCellFactory cellFac = new RoundDoubleCellFactory(precision, numberMode, roundingMode, outputType, includedColIndices, newColsSpecs);
// replace or append columns
if (append) {
cR.append(cellFac);
} else {
cR.replace(cellFac, includedColIndices);
}
return cR;
}
use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.
the class TreeEnsembleLearnerConfiguration method loadInDialog.
/**
* Loads the settings. Intended for the use in the NodeDialog
*
* @param settings
* @param inSpec
* @throws NotConfigurableException
*/
public void loadInDialog(final NodeSettingsRO settings, final DataTableSpec inSpec) throws NotConfigurableException {
String defTargetColumn = null;
String defFingerprintColumn = null;
boolean hasAttributeColumns = false;
// guess defaults:
// traverse columns backwards; assign last (i.e. first-seen) appropriate
// column as target, use any subsequent as valid learning attribute
Class<? extends DataValue> targetClass = getRequiredTargetClass();
for (int i = inSpec.getNumColumns() - 1; i >= 0; i--) {
DataColumnSpec colSpec = inSpec.getColumnSpec(i);
DataType colType = colSpec.getType();
String colName = colSpec.getName();
if (colType.isCompatible(BitVectorValue.class) || colType.isCompatible(ByteVectorValue.class) || colType.isCompatible(DoubleVectorValue.class)) {
defFingerprintColumn = colName;
} else if (colType.isCompatible(NominalValue.class) || colType.isCompatible(DoubleValue.class)) {
if (colType.isCompatible(targetClass)) {
if (defTargetColumn == null) {
// first categorical column
defTargetColumn = colName;
} else {
hasAttributeColumns = true;
}
} else {
hasAttributeColumns = true;
}
}
}
if (defTargetColumn == null) {
throw new NotConfigurableException("No possible target in input (node not connected?) -- unable to configure.");
}
if (!hasAttributeColumns && defFingerprintColumn == null) {
throw new NotConfigurableException("No appropriate learning column " + "in input (need to have at least one additional " + "numeric/categorical column, fingerprint data or byte or double vector data)");
}
// assign fields:
m_targetColumn = settings.getString(KEY_TARGET_COLUMN, defTargetColumn);
DataColumnSpec targetColSpec = inSpec.getColumnSpec(m_targetColumn);
if (targetColSpec == null || !targetColSpec.getType().isCompatible(targetClass)) {
m_targetColumn = defTargetColumn;
}
String hardCodedRootColumn = settings.getString(KEY_ROOT_COLUMN, null);
if (inSpec.getColumnSpec(hardCodedRootColumn) == null) {
m_hardCodedRootColumn = null;
} else {
m_hardCodedRootColumn = hardCodedRootColumn;
}
m_fingerprintColumn = settings.getString(KEY_FINGERPRINT_COLUMN, defFingerprintColumn);
if (m_fingerprintColumn == null) {
// null in node settings - leave it
} else {
DataColumnSpec fpColSpec = inSpec.getColumnSpec(m_fingerprintColumn);
if (fpColSpec == null || !fpColSpec.getType().isCompatible(BitVectorValue.class)) {
m_fingerprintColumn = defFingerprintColumn;
}
}
// m_includeColumns = settings.getStringArray(KEY_INCLUDE_COLUMNS, (String[])null);
// m_includeAllColumns = settings.getBoolean(KEY_INCLUDE_ALL_COLUMNS, true);
m_columnFilterConfig.loadConfigurationInDialog(settings, inSpec);
Long defSeed = System.currentTimeMillis();
String seedS = settings.getString(KEY_SEED, Long.toString(defSeed));
Long seed;
if (seedS == null) {
seed = null;
} else {
try {
seed = Long.parseLong(seedS);
} catch (NumberFormatException nfe) {
seed = m_seed;
}
}
m_seed = seed;
m_maxLevels = settings.getInt(KEY_MAX_LEVELS, DEF_MAX_LEVEL);
if (m_maxLevels != MAX_LEVEL_INFINITE && m_maxLevels <= 0) {
m_maxLevels = DEF_MAX_LEVEL;
}
int minNodeSize = settings.getInt(KEY_MIN_NODE_SIZE, MIN_NODE_SIZE_UNDEFINED);
int minChildSize = settings.getInt(KEY_MIN_CHILD_SIZE, MIN_CHILD_SIZE_UNDEFINED);
try {
setMinSizes(minNodeSize, minChildSize);
} catch (InvalidSettingsException e) {
m_minNodeSize = MIN_NODE_SIZE_UNDEFINED;
m_minChildSize = MIN_CHILD_SIZE_UNDEFINED;
}
m_dataFractionPerTree = settings.getDouble(KEY_DATA_FRACTION, DEF_DATA_FRACTION);
if (m_dataFractionPerTree <= 0.0 || m_dataFractionPerTree > 1.0) {
m_dataFractionPerTree = DEF_DATA_FRACTION;
}
m_columnAbsoluteValue = settings.getInt(KEY_COLUMN_ABSOLUTE, DEF_COLUMN_ABSOLUTE);
if (m_columnAbsoluteValue <= 0) {
m_columnAbsoluteValue = DEF_COLUMN_ABSOLUTE;
}
m_isDataSelectionWithReplacement = settings.getBoolean(KEY_IS_DATA_SELECTION_WITH_REPLACEMENT, true);
ColumnSamplingMode defColSamplingMode = DEF_COLUMN_SAMPLING_MODE;
ColumnSamplingMode colSamplingMode = defColSamplingMode;
String colSamplingModeS = settings.getString(KEY_COLUMN_SAMPLING_MODE, null);
if (colSamplingModeS == null) {
colSamplingMode = defColSamplingMode;
} else {
try {
colSamplingMode = ColumnSamplingMode.valueOf(colSamplingModeS);
} catch (Exception e) {
colSamplingMode = defColSamplingMode;
}
}
double colFracLinValue;
switch(colSamplingMode) {
case Linear:
colFracLinValue = settings.getDouble(KEY_COLUMN_FRACTION_LINEAR, DEF_COLUMN_FRACTION);
if (colFracLinValue <= 0.0 || colFracLinValue > 1.0) {
colFracLinValue = DEF_COLUMN_FRACTION;
}
break;
default:
colFracLinValue = DEF_COLUMN_FRACTION;
}
m_columnSamplingMode = colSamplingMode;
m_columnFractionLinearValue = colFracLinValue;
m_isUseDifferentAttributesAtEachNode = settings.getBoolean(KEY_IS_USE_DIFFERENT_ATTRIBUTES_AT_EACH_NODE, true);
m_nrModels = settings.getInt(KEY_NR_MODELS, DEF_NR_MODELS);
if (m_nrModels <= 0) {
m_nrModels = DEF_NR_MODELS;
}
SplitCriterion defSplitCriterion = SplitCriterion.InformationGainRatio;
String splitCriterionS = settings.getString(KEY_SPLIT_CRITERION, defSplitCriterion.name());
SplitCriterion splitCriterion;
if (splitCriterionS == null) {
splitCriterion = defSplitCriterion;
} else {
try {
splitCriterion = SplitCriterion.valueOf(splitCriterionS);
} catch (Exception e) {
splitCriterion = defSplitCriterion;
}
}
m_splitCriterion = splitCriterion;
m_useAverageSplitPoints = settings.getBoolean(KEY_USE_AVERAGE_SPLIT_POINTS, DEF_AVERAGE_SPLIT_POINTS);
m_useBinaryNominalSplits = settings.getBoolean(KEY_USE_BINARY_NOMINAL_SPLITS, DEF_BINARY_NOMINAL_SPLITS);
String missingValueHandlingS = settings.getString(KEY_MISSING_VALUE_HANDLING, DEF_MISSING_VALUE_HANDLING.name());
MissingValueHandling missingValueHandling;
if (missingValueHandlingS == null) {
missingValueHandling = DEF_MISSING_VALUE_HANDLING;
} else {
try {
missingValueHandling = MissingValueHandling.valueOf(missingValueHandlingS);
} catch (Exception e) {
missingValueHandling = DEF_MISSING_VALUE_HANDLING;
}
}
m_missingValueHandling = missingValueHandling;
FilterResult filterResult = m_columnFilterConfig.applyTo(inSpec);
if (m_fingerprintColumn != null) {
// use fingerprint data, OK
} else if (filterResult.getIncludes().length > 0) {
// some attributes set, OK
// } else if (m_includeAllColumns) {
// use all appropriate columns, OK
} else if (defFingerprintColumn != null) {
// no valid columns but fingerprint column found - use it
m_fingerprintColumn = defFingerprintColumn;
// } else {
// m_includeAllColumns = true;
}
m_ignoreColumnsWithoutDomain = settings.getBoolean(KEY_IGNORE_COLUMNS_WITHOUT_DOMAIN, true);
m_nrHilitePatterns = settings.getInt(KEY_NR_HILITE_PATTERNS, -1);
m_saveTargetDistributionInNodes = settings.getBoolean(KEY_SAVE_TARGET_DISTRIBUTION_IN_NODES, DEF_SAVE_TARGET_DISTRIBUTION_IN_NODES);
setRowSamplingMode(RowSamplingMode.valueOf(settings.getString(KEY_ROW_SAMPLING_MODE, DEF_ROW_SAMPLING_MODE.name())));
}
use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.
the class LogRegLearner method init.
/**
* Initialize instance and check if settings are consistent.
*/
private void init(final DataTableSpec inSpec, final Set<String> exclude) throws InvalidSettingsException {
List<String> inputCols = new ArrayList<String>();
FilterResult includedColumns = m_settings.getIncludedColumns().applyTo(inSpec);
for (String column : includedColumns.getIncludes()) {
inputCols.add(column);
}
inputCols.remove(m_settings.getTargetColumn());
if (inputCols.isEmpty()) {
throw new InvalidSettingsException("At least one column must " + "be included.");
}
DataColumnSpec targetColSpec = null;
List<DataColumnSpec> regressorColSpecs = new ArrayList<DataColumnSpec>();
// Auto configuration when target is not set
if (null == m_settings.getTargetColumn() && m_settings.getIncludedColumns().applyTo(inSpec).getExcludes().length == 0) {
for (int i = 0; i < inSpec.getNumColumns(); i++) {
DataColumnSpec colSpec = inSpec.getColumnSpec(i);
String colName = colSpec.getName();
inputCols.remove(colName);
if (colSpec.getType().isCompatible(NominalValue.class)) {
m_settings.setTargetColumn(colName);
}
}
// when there is no column with nominal data
if (null == m_settings.getTargetColumn()) {
throw new InvalidSettingsException("No column in " + "spec compatible to \"NominalValue\".");
}
}
// remove all columns that should not be used
inputCols.removeAll(exclude);
m_specialColumns = new LinkedList<>();
for (int i = 0; i < inSpec.getNumColumns(); i++) {
DataColumnSpec colSpec = inSpec.getColumnSpec(i);
String colName = colSpec.getName();
final DataType type = colSpec.getType();
if (m_settings.getTargetColumn().equals(colName)) {
if (type.isCompatible(NominalValue.class)) {
targetColSpec = colSpec;
} else {
throw new InvalidSettingsException("Type of column \"" + colName + "\" is not nominal.");
}
} else if (inputCols.contains(colName)) {
if (type.isCompatible(DoubleValue.class) || type.isCompatible(NominalValue.class)) {
regressorColSpecs.add(colSpec);
} else if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class) || (type.isCollectionType() && type.getCollectionElementType().isCompatible(DoubleValue.class))) {
m_specialColumns.add(colSpec);
// We change the table spec later to encode it as a string.
regressorColSpecs.add(new DataColumnSpecCreator(colSpec.getName(), StringCell.TYPE).createSpec());
} else {
throw new InvalidSettingsException("Type of column \"" + colName + "\" is not one of the allowed types, " + "which are numeric or nomial.");
}
}
}
if (null != targetColSpec) {
// Check if target has at least two categories.
final Set<DataCell> targetValues = targetColSpec.getDomain().getValues();
if (targetValues != null && targetValues.size() < 2) {
throw new InvalidSettingsException("The target column \"" + targetColSpec.getName() + "\" has one value, only. " + "At least two target categories are expected.");
}
String[] learnerCols = new String[regressorColSpecs.size() + 1];
for (int i = 0; i < regressorColSpecs.size(); i++) {
learnerCols[i] = regressorColSpecs.get(i).getName();
}
learnerCols[learnerCols.length - 1] = targetColSpec.getName();
final DataColumnSpec[] updatedSpecs = new DataColumnSpec[inSpec.getNumColumns()];
for (int i = updatedSpecs.length; i-- > 0; ) {
final DataColumnSpec columnSpec = inSpec.getColumnSpec(i);
final DataType type = columnSpec.getType();
if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class)) {
final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(columnSpec.getName(), StringCell.TYPE);
colSpecCreator.setProperties(new DataColumnProperties(Collections.singletonMap("realType", type.isCompatible(BitVectorValue.class) ? "BitVector" : "ByteVector")));
updatedSpecs[i] = colSpecCreator.createSpec();
} else {
updatedSpecs[i] = columnSpec;
}
}
DataTableSpec updated = new DataTableSpec(updatedSpecs);
PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(updated);
creator.setTargetCols(Arrays.asList(targetColSpec));
creator.setLearningCols(regressorColSpecs);
// creator.addPreprocColNames(m_specialColumns.stream().flatMap(spec -> ));
m_pmmlOutSpec = creator.createSpec();
m_learner = new Learner(m_pmmlOutSpec, m_specialColumns, m_settings.getTargetReferenceCategory(), m_settings.getSortTargetCategories(), m_settings.getSortIncludesCategories());
} else {
throw new InvalidSettingsException("The target is " + "not in the input.");
}
}
Aggregations