use of org.knime.core.data.DataType in project knime-core by knime.
the class NaiveBayesLearnerNodeModel2 method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
// check the internal variables if they are valid
final PortObjectSpec inSpec = inSpecs[TRAINING_DATA_PORT];
if (!(inSpec instanceof DataTableSpec)) {
throw new IllegalArgumentException("Invalid input data");
}
final DataTableSpec tableSpec = (DataTableSpec) inSpec;
if (m_classifyColumnName.getStringValue() == null) {
String predictedClassName = null;
for (DataColumnSpec colSpec : tableSpec) {
if (colSpec.getType().isCompatible(NominalValue.class)) {
if (predictedClassName == null) {
predictedClassName = colSpec.getName();
} else {
throw new InvalidSettingsException("Please define the classification column");
}
}
}
m_classifyColumnName.setStringValue(predictedClassName);
setWarningMessage("Classification column preset to " + predictedClassName);
}
final String classColumn = m_classifyColumnName.getStringValue();
final DataColumnSpec classColSpec = tableSpec.getColumnSpec(classColumn);
if (classColSpec == null) {
throw new InvalidSettingsException("Classification column not found in input table");
}
if (tableSpec.getNumColumns() < 2) {
throw new InvalidSettingsException("Input table should contain at least 2 columns");
}
final int maxNoOfNominalVals = m_maxNoOfNominalVals.getIntValue();
// and check each nominal column with a valid domain if it contains more values than allowed
// this needs to be in sync with the NaiveBayesModel.createModelMap method!!!
final List<String> ignoredColumns = new LinkedList<>();
final List<String> toBigNominalColumns = new LinkedList<>();
final List<String> learnCols = new LinkedList<>();
for (final DataColumnSpec colSpec : tableSpec) {
final AttributeModel model = NaiveBayesModel.getCompatibleModel(colSpec, classColumn, maxNoOfNominalVals, m_ignoreMissingVals.getBooleanValue(), m_pmmlCompatible.getBooleanValue());
if (model == null) {
// the column type is not supported by Naive Bayes
ignoredColumns.add(colSpec.getName());
continue;
}
final DataType colType = colSpec.getType();
if (colType.isCompatible(NominalValue.class)) {
final DataColumnDomain domain = colSpec.getDomain();
if (domain != null && domain.getValues() != null) {
if (domain.getValues().size() > maxNoOfNominalVals) {
// unique values
if (colSpec.getName().equals(classColumn)) {
// contains too many unique values
throw new InvalidSettingsException("Class column domain contains too many unique values" + " (count: " + domain.getValues().size() + ")");
}
toBigNominalColumns.add(colSpec.getName() + " (count: " + domain.getValues().size() + ")");
}
}
learnCols.add(model.getAttributeName());
}
}
warningMessage("The following columns will possibly be skipped due to too many values: ", toBigNominalColumns);
warningMessage("The following columns are not supported and thus will be ignored: ", ignoredColumns);
if (learnCols.size() < 1) {
throw new InvalidSettingsException("Not enough valid columns");
}
final PMMLPortObjectSpec modelSpec = m_pmmlInEnabled ? (PMMLPortObjectSpec) inSpecs[MODEL_INPORT] : null;
final PMMLPortObjectSpec pmmlSpec = createPMMLSpec(tableSpec, modelSpec, learnCols, classColumn);
return new PortObjectSpec[] { pmmlSpec, NaiveBayesModel.createStatisticsTableSpec(classColSpec.getType(), m_ignoreMissingVals.getBooleanValue()) };
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class NaiveBayesCellFactory method createPredictedClassColSpec.
private static DataColumnSpec createPredictedClassColSpec(final String classColumnName, final DataType classType, final DataTableSpec inSpec) {
final String colName = DataTableSpec.getUniqueColumnName(inSpec, classColumnName);
// we have to do this back and forth conversion because long data cells are converted into double by PMML
// that is why we convert the KNIME type to PMML to see what PMML uses as type and then use the PMML type
// to inver the right KNIME type
final DataType pmmlConformDataType = PMMLDataDictionaryTranslator.getKNIMEDataType(PMMLDataDictionaryTranslator.getPMMLDataType(classType));
final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(colName, pmmlConformDataType);
final DataColumnSpec classColSpec = colSpecCreator.createSpec();
return classColSpec;
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class RegressionPredictorCellFactory method createColumnSpec.
/**
* Creates the spec of the output if possible.
*
* @param portSpec the spec of the pmml input port
* @param tableSpec the spec of the data input port
* @param settings settings for the predictor node
* @return The spec of the output or null
* @throws InvalidSettingsException when tableSpec and portSpec do not match
*/
public static DataColumnSpec[] createColumnSpec(final PMMLPortObjectSpec portSpec, final DataTableSpec tableSpec, final RegressionPredictorSettings settings) throws InvalidSettingsException {
// Assertions
if (portSpec.getTargetCols().isEmpty()) {
throw new InvalidSettingsException("The general regression model" + " does not specify a target column.");
}
for (DataColumnSpec learningColSpec : portSpec.getLearningCols()) {
String learningCol = learningColSpec.getName();
if (tableSpec.containsName(learningCol)) {
DataColumnSpec colSpec = tableSpec.getColumnSpec(learningCol);
if (learningColSpec.getType().isCompatible(NominalValue.class)) {
if (!colSpec.getType().isCompatible(BitVectorValue.class) && !colSpec.getType().isCompatible(ByteVectorValue.class) && !colSpec.getType().isCompatible(NominalValue.class)) {
throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be compatible with " + "\"NominalValue\".");
}
} else if (learningColSpec.getType().isCompatible(DoubleValue.class) && !colSpec.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be numeric.");
}
} else {
throw new InvalidSettingsException("The table for prediction " + "does not contain the column \"" + learningCol + "\".");
}
}
// The list of added columns
List<DataColumnSpec> newColsSpec = new ArrayList<DataColumnSpec>();
String targetCol = portSpec.getTargetFields().get(0);
DataColumnSpec targetColSpec = portSpec.getDataTableSpec().getColumnSpec(targetCol);
if (settings.getIncludeProbabilities() && targetColSpec.getType().isCompatible(NominalValue.class)) {
if (!targetColSpec.getDomain().hasValues()) {
return null;
}
List<DataCell> targetCategories = new ArrayList<DataCell>();
targetCategories.addAll(targetColSpec.getDomain().getValues());
for (DataCell value : targetCategories) {
String name = "P (" + targetCol + "=" + value.toString() + ")" + settings.getPropColumnSuffix();
String newColName = DataTableSpec.getUniqueColumnName(tableSpec, name);
DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(newColName, DoubleCell.TYPE);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0));
colSpecCreator.setDomain(domainCreator.createDomain());
newColsSpec.add(colSpecCreator.createSpec());
}
}
String targetColName = settings.getHasCustomPredictionName() ? settings.getCustomPredictionName() : "Prediction (" + targetCol + ")";
String uniqueTargetColName = DataTableSpec.getUniqueColumnName(tableSpec, targetColName);
DataType targetType = targetColSpec.getType().isCompatible(NominalValue.class) ? targetColSpec.getType() : DoubleCell.TYPE;
DataColumnSpecCreator targetColSpecCreator = new DataColumnSpecCreator(uniqueTargetColName, targetType);
if (targetColSpec.getType().isCompatible(NominalValue.class)) {
DataColumnDomainCreator targetDomainCreator = new DataColumnDomainCreator(targetColSpec.getDomain());
targetColSpecCreator.setDomain(targetDomainCreator.createDomain());
}
newColsSpec.add(targetColSpecCreator.createSpec());
return newColsSpec.toArray(new DataColumnSpec[0]);
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class AccuracyScorerNodeModel method sort.
/**
* @param order The cells to sort.
*/
private void sort(final DataCell[] order) {
if (order.length == 0) {
return;
}
DataType type = order[0].getType();
for (DataCell dataCell : order) {
type = DataType.getCommonSuperType(type, dataCell.getType());
}
final Comparator<DataCell> comparator;
switch(m_sortingStrategy) {
case InsertionOrder:
if (m_sortingReversed) {
reverse(order);
}
return;
case Unsorted:
return;
case Lexical:
if (StringCell.TYPE.isASuperTypeOf(type)) {
Comparator<String> stringComparator;
Collator instance = Collator.getInstance();
// do not try to combine characters
instance.setDecomposition(Collator.NO_DECOMPOSITION);
// case and accents matter.
instance.setStrength(Collator.IDENTICAL);
@SuppressWarnings("unchecked") Comparator<String> collator = (Comparator<String>) (Comparator<?>) instance;
stringComparator = collator;
comparator = new StringValueComparator(stringComparator);
} else if (DoubleCell.TYPE.isASuperTypeOf(type)) {
comparator = new DataValueComparator() {
@Override
protected int compareDataValues(final DataValue v1, final DataValue v2) {
String s1 = v1.toString();
String s2 = v2.toString();
return s1.compareTo(s2);
}
};
} else {
throw new IllegalStateException("Lexical sorting strategy is not supported.");
}
break;
case Numeric:
if (DoubleCell.TYPE.isASuperTypeOf(type)) {
comparator = type.getComparator();
} else {
throw new IllegalStateException("Numerical sorting strategy is not supported.");
}
break;
default:
throw new IllegalStateException("Unrecognized sorting strategy: " + m_sortingStrategy);
}
Arrays.sort(order, comparator);
if (m_sortingReversed) {
reverse(order);
}
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class MDSNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) throws InvalidSettingsException {
assert inSpecs.length == 1;
m_includeList = m_colModel.getIncludeList();
List<String> allColumns = new ArrayList<String>();
StringBuffer buffer = new StringBuffer();
// check number of selected columns
int numberCells = 0;
int fuzzyCells = 0;
for (int i = 0; i < inSpecs[0].getNumColumns(); i++) {
allColumns.add(inSpecs[0].getColumnSpec(i).getName());
if (m_includeList.contains(inSpecs[0].getColumnSpec(i).getName())) {
DataType type = inSpecs[0].getColumnSpec(i).getType();
if (SotaUtil.isNumberType(type)) {
numberCells++;
} else if (SotaUtil.isFuzzyIntervalType(type)) {
fuzzyCells++;
}
}
}
// check if selected columns are still in spec
for (String s : m_includeList) {
if (!allColumns.contains(s)) {
buffer.append("Selected column are not in spec !");
}
}
// throw exception if number of selected columns is not valid.
if (numberCells <= 0 && fuzzyCells <= 0) {
buffer.append("Number of columns has to be " + "greater than zero !");
} else if (numberCells > 0 && fuzzyCells > 0) {
buffer.append("Number cells and fuzzy cells must not be mixed !");
} else if (fuzzyCells > 0) {
m_fuzzy = true;
} else if (numberCells > 0) {
m_fuzzy = false;
}
// if buffer throw exception
if (buffer.length() > 0) {
throw new InvalidSettingsException(buffer.toString());
}
// create output table spec (input spec with the additional mds columns
// appended).
ColumnRearranger rearranger = createColumnRearranger(inSpecs[0], new MDSCellFactory(null, m_outputDimModel.getIntValue()));
return new DataTableSpec[] { rearranger.createSpec() };
}
Aggregations