Search in sources :

Example 1 with AttributeModel

use of org.knime.base.node.mine.bayes.naivebayes.datamodel2.AttributeModel in project knime-core by knime.

the class NaiveBayesModel method updateModel.

/**
 * Updates the current {@link NaiveBayesModel} with the values from the
 * given {@link DataRow}.
 * @param row DataRow with values for update
 * @param tableSpec underlying DataTableSpec
 * @param classColIdx the index of the class column
 * @throws InvalidSettingsException if missing values occur in class column
 * or an attribute has too many values.
 */
public void updateModel(final DataRow row, final DataTableSpec tableSpec, final int classColIdx) throws InvalidSettingsException {
    if (row == null) {
        throw new NullPointerException("Row must not be null");
    }
    if (tableSpec == null) {
        throw new NullPointerException("TableSpec must not be null");
    }
    final DataCell classCell = row.getCell(classColIdx);
    if (classCell.isMissing()) {
        if (m_skipMissingVals) {
            return;
        }
        // check if the class value is missing
        throw new InvalidSettingsException("Missing class value found in row " + row.getKey() + " to skip missing values tick the box in the dialog");
    }
    final String classVal = classCell.toString();
    final int numColumns = tableSpec.getNumColumns();
    for (int i = 0; i < numColumns; i++) {
        final AttributeModel model = m_modelByAttrName.get(tableSpec.getColumnSpec(i).getName());
        if (model != null) {
            final DataCell cell = row.getCell(i);
            try {
                model.addValue(classVal, cell);
            } catch (final TooManyValuesException e) {
                if (model instanceof ClassAttributeModel) {
                    throw new InvalidSettingsException("Class attribute has too many unique values. " + "To avoid this exception increase the " + "maximum number of allowed nominal " + "values in the node dialog");
                }
                // delete the model if it contains too many unique values
                m_modelByAttrName.remove(model.getAttributeName());
                model.setInvalidCause("Too many values");
                m_skippedAttributes.add(model);
            }
        }
    }
    m_noOfRecs++;
}
Also used : InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataCell(org.knime.core.data.DataCell) TooManyValuesException(org.knime.base.node.mine.bayes.naivebayes.datamodel2.TooManyValuesException)

Example 2 with AttributeModel

use of org.knime.base.node.mine.bayes.naivebayes.datamodel2.AttributeModel in project knime-core by knime.

the class NaiveBayesLearnerNodeModel2 method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    // check the internal variables if they are valid
    final PortObjectSpec inSpec = inSpecs[TRAINING_DATA_PORT];
    if (!(inSpec instanceof DataTableSpec)) {
        throw new IllegalArgumentException("Invalid input data");
    }
    final DataTableSpec tableSpec = (DataTableSpec) inSpec;
    if (m_classifyColumnName.getStringValue() == null) {
        String predictedClassName = null;
        for (DataColumnSpec colSpec : tableSpec) {
            if (colSpec.getType().isCompatible(NominalValue.class)) {
                if (predictedClassName == null) {
                    predictedClassName = colSpec.getName();
                } else {
                    throw new InvalidSettingsException("Please define the classification column");
                }
            }
        }
        m_classifyColumnName.setStringValue(predictedClassName);
        setWarningMessage("Classification column preset to " + predictedClassName);
    }
    final String classColumn = m_classifyColumnName.getStringValue();
    final DataColumnSpec classColSpec = tableSpec.getColumnSpec(classColumn);
    if (classColSpec == null) {
        throw new InvalidSettingsException("Classification column not found in input table");
    }
    if (tableSpec.getNumColumns() < 2) {
        throw new InvalidSettingsException("Input table should contain at least 2 columns");
    }
    final int maxNoOfNominalVals = m_maxNoOfNominalVals.getIntValue();
    // and check each nominal column with a valid domain if it contains more values than allowed
    // this needs to be in sync with the NaiveBayesModel.createModelMap method!!!
    final List<String> ignoredColumns = new LinkedList<>();
    final List<String> toBigNominalColumns = new LinkedList<>();
    final List<String> learnCols = new LinkedList<>();
    for (final DataColumnSpec colSpec : tableSpec) {
        final AttributeModel model = NaiveBayesModel.getCompatibleModel(colSpec, classColumn, maxNoOfNominalVals, m_ignoreMissingVals.getBooleanValue(), m_pmmlCompatible.getBooleanValue());
        if (model == null) {
            // the column type is not supported by Naive Bayes
            ignoredColumns.add(colSpec.getName());
            continue;
        }
        final DataType colType = colSpec.getType();
        if (colType.isCompatible(NominalValue.class)) {
            final DataColumnDomain domain = colSpec.getDomain();
            if (domain != null && domain.getValues() != null) {
                if (domain.getValues().size() > maxNoOfNominalVals) {
                    // unique values
                    if (colSpec.getName().equals(classColumn)) {
                        // contains too many unique values
                        throw new InvalidSettingsException("Class column domain contains too many unique values" + " (count: " + domain.getValues().size() + ")");
                    }
                    toBigNominalColumns.add(colSpec.getName() + " (count: " + domain.getValues().size() + ")");
                }
            }
            learnCols.add(model.getAttributeName());
        }
    }
    warningMessage("The following columns will possibly be skipped due to too many values: ", toBigNominalColumns);
    warningMessage("The following columns are not supported and thus will be ignored: ", ignoredColumns);
    if (learnCols.size() < 1) {
        throw new InvalidSettingsException("Not enough valid columns");
    }
    final PMMLPortObjectSpec modelSpec = m_pmmlInEnabled ? (PMMLPortObjectSpec) inSpecs[MODEL_INPORT] : null;
    final PMMLPortObjectSpec pmmlSpec = createPMMLSpec(tableSpec, modelSpec, learnCols, classColumn);
    return new PortObjectSpec[] { pmmlSpec, NaiveBayesModel.createStatisticsTableSpec(classColSpec.getType(), m_ignoreMissingVals.getBooleanValue()) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) LinkedList(java.util.LinkedList) DataColumnSpec(org.knime.core.data.DataColumnSpec) AttributeModel(org.knime.base.node.mine.bayes.naivebayes.datamodel2.AttributeModel) DataColumnDomain(org.knime.core.data.DataColumnDomain) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) DataType(org.knime.core.data.DataType)

Aggregations

InvalidSettingsException (org.knime.core.node.InvalidSettingsException)2 LinkedList (java.util.LinkedList)1 AttributeModel (org.knime.base.node.mine.bayes.naivebayes.datamodel2.AttributeModel)1 TooManyValuesException (org.knime.base.node.mine.bayes.naivebayes.datamodel2.TooManyValuesException)1 DataCell (org.knime.core.data.DataCell)1 DataColumnDomain (org.knime.core.data.DataColumnDomain)1 DataColumnSpec (org.knime.core.data.DataColumnSpec)1 DataTableSpec (org.knime.core.data.DataTableSpec)1 DataType (org.knime.core.data.DataType)1 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)1 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)1 PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)1