Search in sources :

Example 96 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class NaiveBayesLearnerNodeModel2 method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    // check the internal variables if they are valid
    final PortObjectSpec inSpec = inSpecs[TRAINING_DATA_PORT];
    if (!(inSpec instanceof DataTableSpec)) {
        throw new IllegalArgumentException("Invalid input data");
    }
    final DataTableSpec tableSpec = (DataTableSpec) inSpec;
    if (m_classifyColumnName.getStringValue() == null) {
        String predictedClassName = null;
        for (DataColumnSpec colSpec : tableSpec) {
            if (colSpec.getType().isCompatible(NominalValue.class)) {
                if (predictedClassName == null) {
                    predictedClassName = colSpec.getName();
                } else {
                    throw new InvalidSettingsException("Please define the classification column");
                }
            }
        }
        m_classifyColumnName.setStringValue(predictedClassName);
        setWarningMessage("Classification column preset to " + predictedClassName);
    }
    final String classColumn = m_classifyColumnName.getStringValue();
    final DataColumnSpec classColSpec = tableSpec.getColumnSpec(classColumn);
    if (classColSpec == null) {
        throw new InvalidSettingsException("Classification column not found in input table");
    }
    if (tableSpec.getNumColumns() < 2) {
        throw new InvalidSettingsException("Input table should contain at least 2 columns");
    }
    final int maxNoOfNominalVals = m_maxNoOfNominalVals.getIntValue();
    // and check each nominal column with a valid domain if it contains more values than allowed
    // this needs to be in sync with the NaiveBayesModel.createModelMap method!!!
    final List<String> ignoredColumns = new LinkedList<>();
    final List<String> toBigNominalColumns = new LinkedList<>();
    final List<String> learnCols = new LinkedList<>();
    for (final DataColumnSpec colSpec : tableSpec) {
        final AttributeModel model = NaiveBayesModel.getCompatibleModel(colSpec, classColumn, maxNoOfNominalVals, m_ignoreMissingVals.getBooleanValue(), m_pmmlCompatible.getBooleanValue());
        if (model == null) {
            // the column type is not supported by Naive Bayes
            ignoredColumns.add(colSpec.getName());
            continue;
        }
        final DataType colType = colSpec.getType();
        if (colType.isCompatible(NominalValue.class)) {
            final DataColumnDomain domain = colSpec.getDomain();
            if (domain != null && domain.getValues() != null) {
                if (domain.getValues().size() > maxNoOfNominalVals) {
                    // unique values
                    if (colSpec.getName().equals(classColumn)) {
                        // contains too many unique values
                        throw new InvalidSettingsException("Class column domain contains too many unique values" + " (count: " + domain.getValues().size() + ")");
                    }
                    toBigNominalColumns.add(colSpec.getName() + " (count: " + domain.getValues().size() + ")");
                }
            }
            learnCols.add(model.getAttributeName());
        }
    }
    warningMessage("The following columns will possibly be skipped due to too many values: ", toBigNominalColumns);
    warningMessage("The following columns are not supported and thus will be ignored: ", ignoredColumns);
    if (learnCols.size() < 1) {
        throw new InvalidSettingsException("Not enough valid columns");
    }
    final PMMLPortObjectSpec modelSpec = m_pmmlInEnabled ? (PMMLPortObjectSpec) inSpecs[MODEL_INPORT] : null;
    final PMMLPortObjectSpec pmmlSpec = createPMMLSpec(tableSpec, modelSpec, learnCols, classColumn);
    return new PortObjectSpec[] { pmmlSpec, NaiveBayesModel.createStatisticsTableSpec(classColSpec.getType(), m_ignoreMissingVals.getBooleanValue()) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) LinkedList(java.util.LinkedList) DataColumnSpec(org.knime.core.data.DataColumnSpec) AttributeModel(org.knime.base.node.mine.bayes.naivebayes.datamodel2.AttributeModel) DataColumnDomain(org.knime.core.data.DataColumnDomain) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) DataType(org.knime.core.data.DataType)

Example 97 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class NaiveBayesCellFactory method createPredictedClassColSpec.

private static DataColumnSpec createPredictedClassColSpec(final String classColumnName, final DataType classType, final DataTableSpec inSpec) {
    final String colName = DataTableSpec.getUniqueColumnName(inSpec, classColumnName);
    // we have to do this back and forth conversion because long data cells are converted into double by PMML
    // that is why we convert the KNIME type to PMML to see what PMML uses as type and then use the PMML type
    // to inver the right KNIME type
    final DataType pmmlConformDataType = PMMLDataDictionaryTranslator.getKNIMEDataType(PMMLDataDictionaryTranslator.getPMMLDataType(classType));
    final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(colName, pmmlConformDataType);
    final DataColumnSpec classColSpec = colSpecCreator.createSpec();
    return classColSpec;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataType(org.knime.core.data.DataType)

Example 98 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class RegressionPredictorCellFactory method createColumnSpec.

/**
 * Creates the spec of the output if possible.
 *
 * @param portSpec the spec of the pmml input port
 * @param tableSpec the spec of the data input port
 * @param settings settings for the predictor node
 * @return The spec of the output or null
 * @throws InvalidSettingsException when tableSpec and portSpec do not match
 */
public static DataColumnSpec[] createColumnSpec(final PMMLPortObjectSpec portSpec, final DataTableSpec tableSpec, final RegressionPredictorSettings settings) throws InvalidSettingsException {
    // Assertions
    if (portSpec.getTargetCols().isEmpty()) {
        throw new InvalidSettingsException("The general regression model" + " does not specify a target column.");
    }
    for (DataColumnSpec learningColSpec : portSpec.getLearningCols()) {
        String learningCol = learningColSpec.getName();
        if (tableSpec.containsName(learningCol)) {
            DataColumnSpec colSpec = tableSpec.getColumnSpec(learningCol);
            if (learningColSpec.getType().isCompatible(NominalValue.class)) {
                if (!colSpec.getType().isCompatible(BitVectorValue.class) && !colSpec.getType().isCompatible(ByteVectorValue.class) && !colSpec.getType().isCompatible(NominalValue.class)) {
                    throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be  compatible with " + "\"NominalValue\".");
                }
            } else if (learningColSpec.getType().isCompatible(DoubleValue.class) && !colSpec.getType().isCompatible(DoubleValue.class)) {
                throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be numeric.");
            }
        } else {
            throw new InvalidSettingsException("The table for prediction " + "does not contain the column \"" + learningCol + "\".");
        }
    }
    // The list of added columns
    List<DataColumnSpec> newColsSpec = new ArrayList<DataColumnSpec>();
    String targetCol = portSpec.getTargetFields().get(0);
    DataColumnSpec targetColSpec = portSpec.getDataTableSpec().getColumnSpec(targetCol);
    if (settings.getIncludeProbabilities() && targetColSpec.getType().isCompatible(NominalValue.class)) {
        if (!targetColSpec.getDomain().hasValues()) {
            return null;
        }
        List<DataCell> targetCategories = new ArrayList<DataCell>();
        targetCategories.addAll(targetColSpec.getDomain().getValues());
        for (DataCell value : targetCategories) {
            String name = "P (" + targetCol + "=" + value.toString() + ")" + settings.getPropColumnSuffix();
            String newColName = DataTableSpec.getUniqueColumnName(tableSpec, name);
            DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(newColName, DoubleCell.TYPE);
            DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0));
            colSpecCreator.setDomain(domainCreator.createDomain());
            newColsSpec.add(colSpecCreator.createSpec());
        }
    }
    String targetColName = settings.getHasCustomPredictionName() ? settings.getCustomPredictionName() : "Prediction (" + targetCol + ")";
    String uniqueTargetColName = DataTableSpec.getUniqueColumnName(tableSpec, targetColName);
    DataType targetType = targetColSpec.getType().isCompatible(NominalValue.class) ? targetColSpec.getType() : DoubleCell.TYPE;
    DataColumnSpecCreator targetColSpecCreator = new DataColumnSpecCreator(uniqueTargetColName, targetType);
    if (targetColSpec.getType().isCompatible(NominalValue.class)) {
        DataColumnDomainCreator targetDomainCreator = new DataColumnDomainCreator(targetColSpec.getDomain());
        targetColSpecCreator.setDomain(targetDomainCreator.createDomain());
    }
    newColsSpec.add(targetColSpecCreator.createSpec());
    return newColsSpec.toArray(new DataColumnSpec[0]);
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) NominalValue(org.knime.core.data.NominalValue) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Example 99 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class AccuracyScorerNodeModel method sort.

/**
 * @param order The cells to sort.
 */
private void sort(final DataCell[] order) {
    if (order.length == 0) {
        return;
    }
    DataType type = order[0].getType();
    for (DataCell dataCell : order) {
        type = DataType.getCommonSuperType(type, dataCell.getType());
    }
    final Comparator<DataCell> comparator;
    switch(m_sortingStrategy) {
        case InsertionOrder:
            if (m_sortingReversed) {
                reverse(order);
            }
            return;
        case Unsorted:
            return;
        case Lexical:
            if (StringCell.TYPE.isASuperTypeOf(type)) {
                Comparator<String> stringComparator;
                Collator instance = Collator.getInstance();
                // do not try to combine characters
                instance.setDecomposition(Collator.NO_DECOMPOSITION);
                // case and accents matter.
                instance.setStrength(Collator.IDENTICAL);
                @SuppressWarnings("unchecked") Comparator<String> collator = (Comparator<String>) (Comparator<?>) instance;
                stringComparator = collator;
                comparator = new StringValueComparator(stringComparator);
            } else if (DoubleCell.TYPE.isASuperTypeOf(type)) {
                comparator = new DataValueComparator() {

                    @Override
                    protected int compareDataValues(final DataValue v1, final DataValue v2) {
                        String s1 = v1.toString();
                        String s2 = v2.toString();
                        return s1.compareTo(s2);
                    }
                };
            } else {
                throw new IllegalStateException("Lexical sorting strategy is not supported.");
            }
            break;
        case Numeric:
            if (DoubleCell.TYPE.isASuperTypeOf(type)) {
                comparator = type.getComparator();
            } else {
                throw new IllegalStateException("Numerical sorting strategy is not supported.");
            }
            break;
        default:
            throw new IllegalStateException("Unrecognized sorting strategy: " + m_sortingStrategy);
    }
    Arrays.sort(order, comparator);
    if (m_sortingReversed) {
        reverse(order);
    }
}
Also used : DataValue(org.knime.core.data.DataValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DataValueComparator(org.knime.core.data.DataValueComparator) Collator(java.text.Collator) StringValueComparator(org.knime.base.util.StringValueComparator) DataValueComparator(org.knime.core.data.DataValueComparator) Comparator(java.util.Comparator) StringValueComparator(org.knime.base.util.StringValueComparator)

Example 100 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class MDSNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) throws InvalidSettingsException {
    assert inSpecs.length == 1;
    m_includeList = m_colModel.getIncludeList();
    List<String> allColumns = new ArrayList<String>();
    StringBuffer buffer = new StringBuffer();
    // check number of selected columns
    int numberCells = 0;
    int fuzzyCells = 0;
    for (int i = 0; i < inSpecs[0].getNumColumns(); i++) {
        allColumns.add(inSpecs[0].getColumnSpec(i).getName());
        if (m_includeList.contains(inSpecs[0].getColumnSpec(i).getName())) {
            DataType type = inSpecs[0].getColumnSpec(i).getType();
            if (SotaUtil.isNumberType(type)) {
                numberCells++;
            } else if (SotaUtil.isFuzzyIntervalType(type)) {
                fuzzyCells++;
            }
        }
    }
    // check if selected columns are still in spec
    for (String s : m_includeList) {
        if (!allColumns.contains(s)) {
            buffer.append("Selected column are not in spec !");
        }
    }
    // throw exception if number of selected columns is not valid.
    if (numberCells <= 0 && fuzzyCells <= 0) {
        buffer.append("Number of columns has to be " + "greater than zero !");
    } else if (numberCells > 0 && fuzzyCells > 0) {
        buffer.append("Number cells and fuzzy cells must not be mixed !");
    } else if (fuzzyCells > 0) {
        m_fuzzy = true;
    } else if (numberCells > 0) {
        m_fuzzy = false;
    }
    // if buffer throw exception
    if (buffer.length() > 0) {
        throw new InvalidSettingsException(buffer.toString());
    }
    // create output table spec (input spec with the additional mds columns
    // appended).
    ColumnRearranger rearranger = createColumnRearranger(inSpecs[0], new MDSCellFactory(null, m_outputDimModel.getIntValue()));
    return new DataTableSpec[] { rearranger.createSpec() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ArrayList(java.util.ArrayList) DataType(org.knime.core.data.DataType) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString)

Aggregations

DataType (org.knime.core.data.DataType)330 DataColumnSpec (org.knime.core.data.DataColumnSpec)142 DataTableSpec (org.knime.core.data.DataTableSpec)101 DataCell (org.knime.core.data.DataCell)96 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)95 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)71 DoubleValue (org.knime.core.data.DoubleValue)67 DataRow (org.knime.core.data.DataRow)61 ArrayList (java.util.ArrayList)55 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)34 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)32 DefaultRow (org.knime.core.data.def.DefaultRow)24 HashSet (java.util.HashSet)23 HashMap (java.util.HashMap)20 StringCell (org.knime.core.data.def.StringCell)20 NominalValue (org.knime.core.data.NominalValue)18 DoubleCell (org.knime.core.data.def.DoubleCell)18 IntCell (org.knime.core.data.def.IntCell)18 BitVectorValue (org.knime.core.data.vector.bitvector.BitVectorValue)18 ByteVectorValue (org.knime.core.data.vector.bytevector.ByteVectorValue)18