use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class ShapeManagerNodeDialogPane method loadSettingsFrom.
/**
* Updates this dialog by refreshing all components in the shape settings
* tab. Inits the column name combo box and sets the values for the default
* selected one.
*
* @param settings the settings to load
* @param specs the input table specs
* @throws NotConfigurableException if no column contains domain values
* @see NodeDialogPane#loadSettingsFrom(NodeSettingsRO, DataTableSpec[])
*/
@Override
protected void loadSettingsFrom(final NodeSettingsRO settings, final DataTableSpec[] specs) throws NotConfigurableException {
// remove all columns and column value to shape mappings
m_columns.removeAllItems();
m_map.clear();
// read settings and write into the map
String target = settings.getString(ShapeManagerNodeModel.SELECTED_COLUMN, null);
// add columns and domain value mapping
int cols = specs[0].getNumColumns();
Shape[] shapes = ShapeFactory.getShapes().toArray(new Shape[] {});
for (int i = 0; i < cols; i++) {
DataColumnSpec cspec = specs[0].getColumnSpec(i);
DataColumnDomain domain = cspec.getDomain();
if (domain.hasValues()) {
LinkedHashMap<DataCell, Shape> domMap = new LinkedHashMap<DataCell, Shape>();
int j = 0;
for (DataCell value : domain.getValues()) {
if (value != null) {
String shape = settings.getString(value.toString(), // no settings -> assign different shapes
null);
if (shape == null) {
// bugfix 1283
domMap.put(value, shapes[j++ % shapes.length]);
} else {
domMap.put(value, ShapeFactory.getShape(shape));
}
}
}
m_map.put(cspec.getName(), domMap);
} else {
continue;
}
m_columns.addItem(cspec);
if (cspec.getName().equals(target)) {
m_columns.setSelectedItem(cspec);
}
}
if (m_map.size() == 0) {
throw new NotConfigurableException("No column in data contains" + " domain values.");
}
columnChanged(getSelectedColumn());
m_columns.addItemListener(this);
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class IntervalBinCreator method createBins.
/**
* @param colSpec the column specification
* @param numberOfBins the number of bins to create
* @return the created bins
*/
public List<E> createBins(final DataColumnSpec colSpec, final int numberOfBins) {
// set the bounds for binning
final DataColumnDomain domain = colSpec.getDomain();
final DataCell lowerBoundCell = domain.getLowerBound();
if (lowerBoundCell == null || lowerBoundCell.isMissing() || !lowerBoundCell.getType().isCompatible(DoubleValue.class)) {
throw new IllegalArgumentException("The lower bound of the binning column domain " + "should be defined");
}
final double lowerBound = ((DoubleValue) lowerBoundCell).getDoubleValue();
final DataCell upperBoundCell = domain.getUpperBound();
if (upperBoundCell == null || upperBoundCell.isMissing() || !upperBoundCell.getType().isCompatible(DoubleValue.class)) {
throw new IllegalArgumentException("The upper bound of the binning column domain " + "should be defined");
}
final double upperBound = ((DoubleValue) upperBoundCell).getDoubleValue();
int noOfBins = numberOfBins;
// start the binning
if (noOfBins < 1) {
noOfBins = AbstractHistogramVizModel.DEFAULT_NO_OF_BINS;
}
if ((lowerBound - upperBound) == 0) {
noOfBins = 1;
}
final boolean isInteger = colSpec.getType().isCompatible(LongValue.class);
double binInterval = BinningUtil.createBinInterval(upperBound, lowerBound, noOfBins, isInteger);
final double calculatedLowerBound = BinningUtil.createBinStart(lowerBound, binInterval, isInteger);
if (calculatedLowerBound != lowerBound) {
binInterval = BinningUtil.createBinInterval(upperBound, calculatedLowerBound, noOfBins, isInteger);
}
double leftBoundary = calculatedLowerBound;
final double lastBoundary = BinningUtil.myRoundedBorders(upperBound, binInterval, AbstractHistogramVizModel.INTERVAL_DIGITS, isInteger);
// increase bin interval if we have rounding problems
while (leftBoundary + (binInterval * noOfBins) < lastBoundary) {
binInterval = binInterval + binInterval * 0.001;
}
boolean firstBar = true;
createList(noOfBins);
for (int i = 0; i < noOfBins; i++) {
// I have to use this rounding method to avoid problems with very
// small intervals. If the interval is very small it could happen
// that we get the same boundaries for several bars by rounding the
// borders
double rightBoundary;
if (isInteger && binInterval == 1) {
rightBoundary = leftBoundary;
} else {
rightBoundary = BinningUtil.myRoundedBorders(leftBoundary + binInterval, binInterval, AbstractHistogramVizModel.INTERVAL_DIGITS, isInteger);
}
final String binCaption = BinningUtil.createBarName(firstBar, leftBoundary, rightBoundary);
firstBar = false;
addBin(binCaption, leftBoundary, rightBoundary);
// boundary
if (isInteger && binInterval == 1) {
leftBoundary = rightBoundary + binInterval;
} else {
leftBoundary = rightBoundary;
}
}
return getBins();
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class AppendedRowsTable method generateDataTableSpec.
/**
* Factory method that determines the final {@link DataTableSpec} given the
* tables.
*
* @param tableSpecs the table specs as in the constructor
* @return the outcoming {qlink DataTableSpec}
* @see #AppendedRowsTable(DataTable[])
*/
public static final DataTableSpec generateDataTableSpec(final DataTableSpec... tableSpecs) {
// memorize the first column spec in the argument array for
// each column name, we use it later on to initialize the column
// spec creator.
LinkedHashMap<String, DataColumnSpec> columnSet = new LinkedHashMap<String, DataColumnSpec>();
LinkedHashMap<String, DataType> typeSet = new LinkedHashMap<String, DataType>();
LinkedHashMap<String, DataColumnDomain> domainSet = new LinkedHashMap<String, DataColumnDomain>();
// create final data table spec
for (int i = 0; i < tableSpecs.length; i++) {
DataTableSpec cur = tableSpecs[i];
for (int c = 0; c < cur.getNumColumns(); c++) {
DataColumnSpec colSpec = cur.getColumnSpec(c);
String colName = colSpec.getName();
// set the spec for this column if not yet done
if (!columnSet.containsKey(colName)) {
columnSet.put(colName, colSpec);
}
DataType colType = colSpec.getType();
DataColumnDomain colDomain = colSpec.getDomain();
// duplicates are welcome - but only if they match the type
if (typeSet.containsKey(colName)) {
DataType oldType = typeSet.get(colName);
DataColumnDomain oldDomain = domainSet.get(colName);
// the base type they share
DataType type = DataType.getCommonSuperType(oldType, colType);
assert type.isASuperTypeOf(oldType);
assert type.isASuperTypeOf(colType);
// that shouldn't happen though, eh: shit happens.
if (!oldType.equals(type)) {
LOGGER.info("Confusing data types for column \"" + colName + "\": " + oldType.toString() + " vs. " + colType.toString() + "\n" + "Using common base type " + type.toString());
// that must not change the order.
typeSet.put(colName, type);
}
DataColumnDomain newDomain = merge(oldDomain, colDomain, type.getComparator());
domainSet.put(colName, newDomain);
} else {
// doesn't contain the key
typeSet.put(colName, colType);
domainSet.put(colName, colDomain);
}
}
// for all columns in the current table spec
}
// for all tables
DataColumnSpec[] colSpecs = new DataColumnSpec[typeSet.size()];
int i = 0;
for (Map.Entry<String, DataType> entry : typeSet.entrySet()) {
String name = entry.getKey();
DataType type = entry.getValue();
// domain is null, if we did not remember it (e.g. "keepDomain" was
// false)
DataColumnDomain domain = domainSet.get(name);
DataColumnSpec initSpec = columnSet.get(name);
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(initSpec);
specCreator.setDomain(domain);
specCreator.setType(type);
colSpecs[i++] = specCreator.createSpec();
}
return new DataTableSpec(colSpecs);
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class PredictorHelper method createOutTableSpec.
/**
* Computes the output table's specifaction based on common node settings.
*
* @param dataSpec The input table {@link DataColumnSpec}.
* @param modelSpec The model {@link PMMLPortObjectSpec}.
* @param addProbs Add the probability columns?
* @param predictionCol Custom name of the prediction column.
* @param shouldOverride Should we use that name?
* @param suffix Suffix for probability columns.
* @return The output table {@link DataTableSpec}.
* @throws InvalidSettingsException Invalid settings for the prediction column name.
*/
public DataTableSpec createOutTableSpec(final PortObjectSpec dataSpec, final PortObjectSpec modelSpec, final boolean addProbs, final String predictionCol, final boolean shouldOverride, final String suffix) throws InvalidSettingsException {
CheckUtils.checkSettingNotNull(predictionCol, "Prediction column name cannot be null");
CheckUtils.checkSetting(!predictionCol.trim().isEmpty(), "Prediction column name cannot be empty");
List<DataCell> predValues = null;
if (addProbs) {
predValues = getPredictionValues((PMMLPortObjectSpec) modelSpec);
if (predValues == null) {
// no out spec can be determined
return null;
}
}
int numCols = (predValues == null ? 0 : predValues.size()) + 1;
DataTableSpec inSpec = (DataTableSpec) dataSpec;
DataColumnSpec[] newCols = new DataColumnSpec[numCols];
/* Set bar renderer and domain [0,1] as default for the double cells
* containing the distribution */
// DataColumnProperties propsRendering = new DataColumnProperties(
// Collections.singletonMap(
// DataValueRenderer.PROPERTY_PREFERRED_RENDERER,
// DoubleBarRenderer.DESCRIPTION));
DataColumnDomain domain = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain();
String trainingColumnName = ((PMMLPortObjectSpec) modelSpec).getTargetFields().iterator().next();
// add all distribution columns
for (int i = 0; i < numCols - 1; i++) {
assert predValues != null;
DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(probabilityColumnName(trainingColumnName, predValues.get(i).toString(), suffix), DoubleCell.TYPE);
// colSpecCreator.setProperties(propsRendering);
colSpecCreator.setDomain(domain);
newCols[i] = colSpecCreator.createSpec();
}
// add the prediction column
String predictionColumnName = computePredictionColumnName(predictionCol, shouldOverride, trainingColumnName);
newCols[numCols - 1] = new DataColumnSpecCreator(predictionColumnName, StringCell.TYPE).createSpec();
DataTableSpec newColSpec = new DataTableSpec(newCols);
return new DataTableSpec(inSpec, newColSpec);
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class NaiveBayesLearnerNodeModel2 method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
// check the internal variables if they are valid
final PortObjectSpec inSpec = inSpecs[TRAINING_DATA_PORT];
if (!(inSpec instanceof DataTableSpec)) {
throw new IllegalArgumentException("Invalid input data");
}
final DataTableSpec tableSpec = (DataTableSpec) inSpec;
if (m_classifyColumnName.getStringValue() == null) {
String predictedClassName = null;
for (DataColumnSpec colSpec : tableSpec) {
if (colSpec.getType().isCompatible(NominalValue.class)) {
if (predictedClassName == null) {
predictedClassName = colSpec.getName();
} else {
throw new InvalidSettingsException("Please define the classification column");
}
}
}
m_classifyColumnName.setStringValue(predictedClassName);
setWarningMessage("Classification column preset to " + predictedClassName);
}
final String classColumn = m_classifyColumnName.getStringValue();
final DataColumnSpec classColSpec = tableSpec.getColumnSpec(classColumn);
if (classColSpec == null) {
throw new InvalidSettingsException("Classification column not found in input table");
}
if (tableSpec.getNumColumns() < 2) {
throw new InvalidSettingsException("Input table should contain at least 2 columns");
}
final int maxNoOfNominalVals = m_maxNoOfNominalVals.getIntValue();
// and check each nominal column with a valid domain if it contains more values than allowed
// this needs to be in sync with the NaiveBayesModel.createModelMap method!!!
final List<String> ignoredColumns = new LinkedList<>();
final List<String> toBigNominalColumns = new LinkedList<>();
final List<String> learnCols = new LinkedList<>();
for (final DataColumnSpec colSpec : tableSpec) {
final AttributeModel model = NaiveBayesModel.getCompatibleModel(colSpec, classColumn, maxNoOfNominalVals, m_ignoreMissingVals.getBooleanValue(), m_pmmlCompatible.getBooleanValue());
if (model == null) {
// the column type is not supported by Naive Bayes
ignoredColumns.add(colSpec.getName());
continue;
}
final DataType colType = colSpec.getType();
if (colType.isCompatible(NominalValue.class)) {
final DataColumnDomain domain = colSpec.getDomain();
if (domain != null && domain.getValues() != null) {
if (domain.getValues().size() > maxNoOfNominalVals) {
// unique values
if (colSpec.getName().equals(classColumn)) {
// contains too many unique values
throw new InvalidSettingsException("Class column domain contains too many unique values" + " (count: " + domain.getValues().size() + ")");
}
toBigNominalColumns.add(colSpec.getName() + " (count: " + domain.getValues().size() + ")");
}
}
learnCols.add(model.getAttributeName());
}
}
warningMessage("The following columns will possibly be skipped due to too many values: ", toBigNominalColumns);
warningMessage("The following columns are not supported and thus will be ignored: ", ignoredColumns);
if (learnCols.size() < 1) {
throw new InvalidSettingsException("Not enough valid columns");
}
final PMMLPortObjectSpec modelSpec = m_pmmlInEnabled ? (PMMLPortObjectSpec) inSpecs[MODEL_INPORT] : null;
final PMMLPortObjectSpec pmmlSpec = createPMMLSpec(tableSpec, modelSpec, learnCols, classColumn);
return new PortObjectSpec[] { pmmlSpec, NaiveBayesModel.createStatisticsTableSpec(classColSpec.getType(), m_ignoreMissingVals.getBooleanValue()) };
}
Aggregations