Search in sources :

Example 6 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class LogRegLearner method checkConstantLearningFields.

private void checkConstantLearningFields(final BufferedDataTable data, final PMMLPortObjectSpec inPMMLSpec) throws InvalidSettingsException {
    Set<String> exclude = new HashSet<String>();
    for (DataColumnSpec colSpec : m_pmmlOutSpec.getLearningCols()) {
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            final DataColumnDomain domain = colSpec.getDomain();
            final DataCell lowerBound = domain.getLowerBound();
            final DataCell upperBound = domain.getUpperBound();
            assert lowerBound != null || data.size() == 0 : "Non empty table must have domain set at this point";
            if (ObjectUtils.equals(lowerBound, upperBound)) {
                exclude.add(colSpec.getName());
            }
        }
    }
    if (!exclude.isEmpty()) {
        StringBuilder warning = new StringBuilder();
        warning.append(exclude.size() == 1 ? "Column " : "Columns ");
        warning.append(ConvenienceMethods.getShortStringFrom(exclude, 5));
        warning.append(exclude.size() == 1 ? " has a constant value " : " have constant values ");
        warning.append(" - will be ignored during training");
        LOGGER.warn(warning.toString());
        m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
        // re-init learner so that it has the correct learning columns
        init(data.getDataTableSpec(), inPMMLSpec, exclude);
    }
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataCell(org.knime.core.data.DataCell) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 7 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class AbstractMetaDataMapper method checkForVectorColumn.

/**
 * Checks if <b>colSpec</b> could originate from a vector column.
 * If it does, this method throws an exception.
 * @param colSpec {@link DataColumnSpec} to check
 * @throws IllegalArgumentException if <b>colSpec</b> could originate from a vector column
 */
private static void checkForVectorColumn(final DataColumnSpec colSpec) {
    final boolean possibleVectorName = TranslationUtil.isVectorFieldName(colSpec.getName());
    DataType type = colSpec.getType();
    DataColumnDomain domain = colSpec.getDomain();
    boolean domainInformationIsMissing = false;
    if (type.isCompatible(StringValue.class)) {
        if (domain.hasValues()) {
            domainInformationIsMissing = domain.getValues().isEmpty();
        } else {
            domainInformationIsMissing = true;
        }
    } else if (type.isCompatible(DoubleValue.class)) {
        domainInformationIsMissing = !domain.hasBounds();
    }
    CheckUtils.checkArgument(!(possibleVectorName && domainInformationIsMissing), "The column %s seems to " + "originate from a vector column. A model learned on a vector can currently not be imported.", colSpec);
}
Also used : DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType)

Example 8 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class MissingValueHandling3Table method createTableSpecPrivate.

/* private helper that assumes the ColSetting to have the right format. */
private static DataTableSpec createTableSpecPrivate(final DataTableSpec spec, final MissingValueHandling2ColSetting[] sets) {
    assert (spec.getNumColumns() == sets.length);
    DataColumnSpec[] newSpecs = new DataColumnSpec[sets.length];
    for (int i = 0; i < sets.length; i++) {
        DataColumnSpec colSpec = spec.getColumnSpec(i);
        DataColumnSpec newSpec = colSpec;
        if (sets[i].getMethod() == MissingValueHandling2ColSetting.METHOD_FIX_VAL) {
            DataColumnDomain dom = colSpec.getDomain();
            Comparator<DataCell> comp = colSpec.getType().getComparator();
            DataCell fixCell = sets[i].getFixCell();
            boolean changed = false;
            DataCell l = dom.getLowerBound();
            // (but rather be null). It may happen anyway, we catch it here
            if (l != null && !l.isMissing() && (comp.compare(fixCell, l) < 0)) {
                changed = true;
                l = fixCell;
            }
            DataCell u = dom.getUpperBound();
            if (u != null && !u.isMissing() && (comp.compare(fixCell, u) > 0)) {
                changed = true;
                u = fixCell;
            }
            Set<DataCell> vals = dom.getValues();
            if (vals != null && !vals.contains(fixCell)) {
                changed = true;
                vals = new LinkedHashSet<DataCell>(vals);
                vals.add(fixCell);
            }
            if (changed) {
                DataColumnDomain newDom = new DataColumnDomainCreator(vals, l, u).createDomain();
                DataColumnSpecCreator c = new DataColumnSpecCreator(colSpec);
                c.setDomain(newDom);
                newSpec = c.createSpec();
            }
        }
        newSpecs[i] = newSpec;
    }
    return new DataTableSpec(newSpecs);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataCell(org.knime.core.data.DataCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator)

Example 9 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class PMMLDataDictionaryTranslator method addColSpecsForDataFields.

/**
 * @param pmmlDoc the PMML document to analyze
 * @param colSpecs the list to add the data column specs to
 */
private void addColSpecsForDataFields(final PMMLDocument pmmlDoc, final List<DataColumnSpec> colSpecs) {
    DataDictionary dict = pmmlDoc.getPMML().getDataDictionary();
    for (DataField dataField : dict.getDataFieldArray()) {
        String name = dataField.getName();
        DataType dataType = getKNIMEDataType(dataField.getDataType());
        DataColumnSpecCreator specCreator = new DataColumnSpecCreator(name, dataType);
        DataColumnDomain domain = null;
        if (dataType.isCompatible(NominalValue.class)) {
            Value[] valueArray = dataField.getValueArray();
            DataCell[] cells;
            if (DataType.getType(StringCell.class).equals(dataType)) {
                if (dataField.getIntervalArray().length > 0) {
                    throw new IllegalArgumentException("Intervals cannot be defined for Strings.");
                }
                cells = new StringCell[valueArray.length];
                if (valueArray.length > 0) {
                    for (int j = 0; j < cells.length; j++) {
                        cells[j] = new StringCell(valueArray[j].getValue());
                    }
                }
                domain = new DataColumnDomainCreator(cells).createDomain();
            }
        } else if (dataType.isCompatible(DoubleValue.class)) {
            Double leftMargin = null;
            Double rightMargin = null;
            Interval[] intervalArray = dataField.getIntervalArray();
            if (intervalArray != null && intervalArray.length > 0) {
                Interval interval = dataField.getIntervalArray(0);
                leftMargin = interval.getLeftMargin();
                rightMargin = interval.getRightMargin();
            } else if (dataField.getValueArray() != null && dataField.getValueArray().length > 0) {
                // try to derive the bounds from the values
                Value[] valueArray = dataField.getValueArray();
                List<Double> values = new ArrayList<Double>();
                for (int j = 0; j < valueArray.length; j++) {
                    String value = "";
                    try {
                        value = valueArray[j].getValue();
                        values.add(Double.parseDouble(value));
                    } catch (Exception e) {
                        throw new IllegalArgumentException("Skipping domain calculation. " + "Value \"" + value + "\" cannot be cast to double.");
                    }
                }
                leftMargin = Collections.min(values);
                rightMargin = Collections.max(values);
            }
            if (leftMargin != null && rightMargin != null) {
                // set the bounds of the domain if available
                DataCell lowerBound = null;
                DataCell upperBound = null;
                if (DataType.getType(IntCell.class).equals(dataType)) {
                    lowerBound = new IntCell(leftMargin.intValue());
                    upperBound = new IntCell(rightMargin.intValue());
                } else if (DataType.getType(DoubleCell.class).equals(dataType)) {
                    lowerBound = new DoubleCell(leftMargin);
                    upperBound = new DoubleCell(rightMargin);
                }
                domain = new DataColumnDomainCreator(lowerBound, upperBound).createDomain();
            } else {
                domain = new DataColumnDomainCreator().createDomain();
            }
        }
        specCreator.setDomain(domain);
        colSpecs.add(specCreator.createSpec());
        m_dictFields.add(name);
    }
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) IntCell(org.knime.core.data.def.IntCell) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataField(org.dmg.pmml.DataFieldDocument.DataField) StringCell(org.knime.core.data.def.StringCell) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) BooleanValue(org.knime.core.data.BooleanValue) IntValue(org.knime.core.data.IntValue) Value(org.dmg.pmml.ValueDocument.Value) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 10 with DataColumnDomain

use of org.knime.core.data.DataColumnDomain in project knime-core by knime.

the class DoubleBarRenderer method setValue.

/**
 *     /** Sets the value according to the column domain's min/max. If the
 * object is not instance of DoubleValue, the cell is painted red.
 * @param value The value to be rendered.
 * @see javax.swing.table.DefaultTableCellRenderer#setValue(Object)
 */
@Override
protected void setValue(final Object value) {
    double d = 0;
    if (value instanceof DoubleValue) {
        DoubleValue cell = (DoubleValue) value;
        double val = cell.getDoubleValue();
        DataColumnSpec spec = getColSpec();
        double min = Double.POSITIVE_INFINITY;
        double max = Double.NEGATIVE_INFINITY;
        if (spec != null) {
            DataColumnDomain domain = spec.getDomain();
            DataCell lower = domain.getLowerBound();
            DataCell upper = domain.getUpperBound();
            if (lower instanceof DoubleValue) {
                min = ((DoubleValue) lower).getDoubleValue();
            }
            if (upper instanceof DoubleValue) {
                max = ((DoubleValue) upper).getDoubleValue();
            }
        }
        if (min >= max) {
            min = 0.0;
            max = 1.0;
        }
        d = (float) ((val - min) / (max - min));
        setToolTipText(Double.toString(val));
        setIconValue(d);
        setTextInternal(null);
    } else {
        setToolTipText("Missing Value");
        setIcon(null);
        setTextInternal(DataType.getMissingCell().toString());
    }
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell)

Aggregations

DataColumnDomain (org.knime.core.data.DataColumnDomain)46 DataColumnSpec (org.knime.core.data.DataColumnSpec)34 DataCell (org.knime.core.data.DataCell)32 DataTableSpec (org.knime.core.data.DataTableSpec)20 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 DoubleValue (org.knime.core.data.DoubleValue)13 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)12 DataType (org.knime.core.data.DataType)11 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)8 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)7 PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)6 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 LinkedHashSet (java.util.LinkedHashSet)5 DoubleCell (org.knime.core.data.def.DoubleCell)5 HashMap (java.util.HashMap)3 LinkedHashMap (java.util.LinkedHashMap)3 LinkedList (java.util.LinkedList)3 Set (java.util.Set)3 DataRow (org.knime.core.data.DataRow)3