use of org.knime.base.node.mine.bayes.naivebayes.datamodel2.AttributeModel in project knime-core by knime.
the class NaiveBayesModel method updateModel.
/**
* Updates the current {@link NaiveBayesModel} with the values from the
* given {@link DataRow}.
* @param row DataRow with values for update
* @param tableSpec underlying DataTableSpec
* @param classColIdx the index of the class column
* @throws InvalidSettingsException if missing values occur in class column
* or an attribute has too many values.
*/
public void updateModel(final DataRow row, final DataTableSpec tableSpec, final int classColIdx) throws InvalidSettingsException {
if (row == null) {
throw new NullPointerException("Row must not be null");
}
if (tableSpec == null) {
throw new NullPointerException("TableSpec must not be null");
}
final DataCell classCell = row.getCell(classColIdx);
if (classCell.isMissing()) {
if (m_skipMissingVals) {
return;
}
// check if the class value is missing
throw new InvalidSettingsException("Missing class value found in row " + row.getKey() + " to skip missing values tick the box in the dialog");
}
final String classVal = classCell.toString();
final int numColumns = tableSpec.getNumColumns();
for (int i = 0; i < numColumns; i++) {
final AttributeModel model = m_modelByAttrName.get(tableSpec.getColumnSpec(i).getName());
if (model != null) {
final DataCell cell = row.getCell(i);
try {
model.addValue(classVal, cell);
} catch (final TooManyValuesException e) {
if (model instanceof ClassAttributeModel) {
throw new InvalidSettingsException("Class attribute has too many unique values. " + "To avoid this exception increase the " + "maximum number of allowed nominal " + "values in the node dialog");
}
// delete the model if it contains too many unique values
m_modelByAttrName.remove(model.getAttributeName());
model.setInvalidCause("Too many values");
m_skippedAttributes.add(model);
}
}
}
m_noOfRecs++;
}
use of org.knime.base.node.mine.bayes.naivebayes.datamodel2.AttributeModel in project knime-core by knime.
the class NaiveBayesLearnerNodeModel2 method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
// check the internal variables if they are valid
final PortObjectSpec inSpec = inSpecs[TRAINING_DATA_PORT];
if (!(inSpec instanceof DataTableSpec)) {
throw new IllegalArgumentException("Invalid input data");
}
final DataTableSpec tableSpec = (DataTableSpec) inSpec;
if (m_classifyColumnName.getStringValue() == null) {
String predictedClassName = null;
for (DataColumnSpec colSpec : tableSpec) {
if (colSpec.getType().isCompatible(NominalValue.class)) {
if (predictedClassName == null) {
predictedClassName = colSpec.getName();
} else {
throw new InvalidSettingsException("Please define the classification column");
}
}
}
m_classifyColumnName.setStringValue(predictedClassName);
setWarningMessage("Classification column preset to " + predictedClassName);
}
final String classColumn = m_classifyColumnName.getStringValue();
final DataColumnSpec classColSpec = tableSpec.getColumnSpec(classColumn);
if (classColSpec == null) {
throw new InvalidSettingsException("Classification column not found in input table");
}
if (tableSpec.getNumColumns() < 2) {
throw new InvalidSettingsException("Input table should contain at least 2 columns");
}
final int maxNoOfNominalVals = m_maxNoOfNominalVals.getIntValue();
// and check each nominal column with a valid domain if it contains more values than allowed
// this needs to be in sync with the NaiveBayesModel.createModelMap method!!!
final List<String> ignoredColumns = new LinkedList<>();
final List<String> toBigNominalColumns = new LinkedList<>();
final List<String> learnCols = new LinkedList<>();
for (final DataColumnSpec colSpec : tableSpec) {
final AttributeModel model = NaiveBayesModel.getCompatibleModel(colSpec, classColumn, maxNoOfNominalVals, m_ignoreMissingVals.getBooleanValue(), m_pmmlCompatible.getBooleanValue());
if (model == null) {
// the column type is not supported by Naive Bayes
ignoredColumns.add(colSpec.getName());
continue;
}
final DataType colType = colSpec.getType();
if (colType.isCompatible(NominalValue.class)) {
final DataColumnDomain domain = colSpec.getDomain();
if (domain != null && domain.getValues() != null) {
if (domain.getValues().size() > maxNoOfNominalVals) {
// unique values
if (colSpec.getName().equals(classColumn)) {
// contains too many unique values
throw new InvalidSettingsException("Class column domain contains too many unique values" + " (count: " + domain.getValues().size() + ")");
}
toBigNominalColumns.add(colSpec.getName() + " (count: " + domain.getValues().size() + ")");
}
}
learnCols.add(model.getAttributeName());
}
}
warningMessage("The following columns will possibly be skipped due to too many values: ", toBigNominalColumns);
warningMessage("The following columns are not supported and thus will be ignored: ", ignoredColumns);
if (learnCols.size() < 1) {
throw new InvalidSettingsException("Not enough valid columns");
}
final PMMLPortObjectSpec modelSpec = m_pmmlInEnabled ? (PMMLPortObjectSpec) inSpecs[MODEL_INPORT] : null;
final PMMLPortObjectSpec pmmlSpec = createPMMLSpec(tableSpec, modelSpec, learnCols, classColumn);
return new PortObjectSpec[] { pmmlSpec, NaiveBayesModel.createStatisticsTableSpec(classColSpec.getType(), m_ignoreMissingVals.getBooleanValue()) };
}
Aggregations