Search in sources :

Example 6 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class AutoBinner method calcDomainBoundsIfNeccessary.

/**
 * Determines the per column min/max values of the given data if not already present in the domain.
 *
 * @param data the data
 * @param exec the execution context
 * @param recalcValuesFor The columns
 * @return The data with extended domain information
 * @throws InvalidSettingsException ...
 * @throws CanceledExecutionException ...
 */
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
    if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
        return data;
    }
    List<Integer> valuesI = new ArrayList<Integer>();
    for (String colName : recalcValuesFor) {
        DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
        if (!colSpec.getType().isCompatible(DoubleValue.class)) {
            throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
        }
        if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
            valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
        }
    }
    if (valuesI.isEmpty()) {
        return data;
    }
    Map<Integer, Double> min = new HashMap<Integer, Double>();
    Map<Integer, Double> max = new HashMap<Integer, Double>();
    for (int col : valuesI) {
        min.put(col, Double.MAX_VALUE);
        max.put(col, Double.MIN_VALUE);
    }
    int c = 0;
    for (DataRow row : data) {
        c++;
        exec.checkCanceled();
        exec.setProgress(c / (double) data.getRowCount());
        for (int col : valuesI) {
            double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
            if (min.get(col) > val) {
                min.put(col, val);
            }
            if (max.get(col) < val) {
                min.put(col, val);
            }
        }
    }
    List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
    int cc = 0;
    for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
        if (recalcValuesFor.contains(columnSpec.getName())) {
            DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
            DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
            specCreator.setDomain(domainCreator.createDomain());
            DataColumnSpec newColSpec = specCreator.createSpec();
            newColSpecList.add(newColSpec);
        } else {
            newColSpecList.add(columnSpec);
        }
        cc++;
    }
    DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
    BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
    return newDataTable;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 7 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class NormalizerNodeModel method calculate.

/**
 * New normalized {@link org.knime.core.data.DataTable} is created depending
 * on the mode.
 */
/**
 * @param inData The input data.
 * @param exec For BufferedDataTable creation and progress.
 * @return the result of the calculation
 * @throws Exception If the node calculation fails for any reason.
 */
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getSpec();
    // extract selected numeric columns
    updateNumericColumnSelection(inSpec);
    Normalizer ntable = new Normalizer(inTable, m_columns);
    long rowcount = inTable.size();
    ExecutionMonitor prepareExec = exec.createSubProgress(0.3);
    AffineTransTable outTable;
    boolean fixDomainBounds = false;
    switch(m_mode) {
        case NONORM_MODE:
            return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
        case MINMAX_MODE:
            fixDomainBounds = true;
            outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
            break;
        case ZSCORE_MODE:
            outTable = ntable.doZScoreNorm(prepareExec);
            break;
        case DECIMALSCALING_MODE:
            outTable = ntable.doDecimalScaling(prepareExec);
            break;
        default:
            throw new Exception("No mode set");
    }
    if (outTable.getErrorMessage() != null) {
        // something went wrong, report and throw an exception
        throw new Exception(outTable.getErrorMessage());
    }
    if (ntable.getErrorMessage() != null) {
        // something went wrong during initialization, report.
        setWarningMessage(ntable.getErrorMessage());
    }
    DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
    AffineTransConfiguration configuration = outTable.getConfiguration();
    DataTableSpec spec = outTable.getDataTableSpec();
    // the same transformation, which is not guaranteed to snap to min/max)
    if (fixDomainBounds) {
        DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
        for (int i = 0; i < newColSpecs.length; i++) {
            newColSpecs[i] = spec.getColumnSpec(i);
        }
        for (int i = 0; i < m_columns.length; i++) {
            int index = spec.findColumnIndex(m_columns[i]);
            DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
            DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
            domCreator.setLowerBound(new DoubleCell(m_min));
            domCreator.setUpperBound(new DoubleCell(m_max));
            creator.setDomain(domCreator.createDomain());
            newColSpecs[index] = creator.createSpec();
        }
        spec = new DataTableSpec(spec.getName(), newColSpecs);
    }
    ExecutionMonitor normExec = exec.createSubProgress(.7);
    BufferedDataContainer container = exec.createDataContainer(spec);
    long count = 1;
    for (DataRow row : outTable) {
        normExec.checkCanceled();
        normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
        container.addRowToTable(row);
        count++;
    }
    container.close();
    return new CalculationResult(container.getTable(), modelSpec, configuration);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) Normalizer(org.knime.base.data.normalize.Normalizer) DoubleCell(org.knime.core.data.def.DoubleCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) AffineTransTable(org.knime.base.data.normalize.AffineTransTable) AffineTransConfiguration(org.knime.base.data.normalize.AffineTransConfiguration) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 8 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class MissingValueHandling3Table method createTableSpecPrivate.

/* private helper that assumes the ColSetting to have the right format. */
private static DataTableSpec createTableSpecPrivate(final DataTableSpec spec, final MissingValueHandling2ColSetting[] sets) {
    assert (spec.getNumColumns() == sets.length);
    DataColumnSpec[] newSpecs = new DataColumnSpec[sets.length];
    for (int i = 0; i < sets.length; i++) {
        DataColumnSpec colSpec = spec.getColumnSpec(i);
        DataColumnSpec newSpec = colSpec;
        if (sets[i].getMethod() == MissingValueHandling2ColSetting.METHOD_FIX_VAL) {
            DataColumnDomain dom = colSpec.getDomain();
            Comparator<DataCell> comp = colSpec.getType().getComparator();
            DataCell fixCell = sets[i].getFixCell();
            boolean changed = false;
            DataCell l = dom.getLowerBound();
            // (but rather be null). It may happen anyway, we catch it here
            if (l != null && !l.isMissing() && (comp.compare(fixCell, l) < 0)) {
                changed = true;
                l = fixCell;
            }
            DataCell u = dom.getUpperBound();
            if (u != null && !u.isMissing() && (comp.compare(fixCell, u) > 0)) {
                changed = true;
                u = fixCell;
            }
            Set<DataCell> vals = dom.getValues();
            if (vals != null && !vals.contains(fixCell)) {
                changed = true;
                vals = new LinkedHashSet<DataCell>(vals);
                vals.add(fixCell);
            }
            if (changed) {
                DataColumnDomain newDom = new DataColumnDomainCreator(vals, l, u).createDomain();
                DataColumnSpecCreator c = new DataColumnSpecCreator(colSpec);
                c.setDomain(newDom);
                newSpec = c.createSpec();
            }
        }
        newSpecs[i] = newSpec;
    }
    return new DataTableSpec(newSpecs);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataCell(org.knime.core.data.DataCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator)

Example 9 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class PMMLDataDictionaryTranslator method addColSpecsForDataFields.

/**
 * @param pmmlDoc the PMML document to analyze
 * @param colSpecs the list to add the data column specs to
 */
private void addColSpecsForDataFields(final PMMLDocument pmmlDoc, final List<DataColumnSpec> colSpecs) {
    DataDictionary dict = pmmlDoc.getPMML().getDataDictionary();
    for (DataField dataField : dict.getDataFieldArray()) {
        String name = dataField.getName();
        DataType dataType = getKNIMEDataType(dataField.getDataType());
        DataColumnSpecCreator specCreator = new DataColumnSpecCreator(name, dataType);
        DataColumnDomain domain = null;
        if (dataType.isCompatible(NominalValue.class)) {
            Value[] valueArray = dataField.getValueArray();
            DataCell[] cells;
            if (DataType.getType(StringCell.class).equals(dataType)) {
                if (dataField.getIntervalArray().length > 0) {
                    throw new IllegalArgumentException("Intervals cannot be defined for Strings.");
                }
                cells = new StringCell[valueArray.length];
                if (valueArray.length > 0) {
                    for (int j = 0; j < cells.length; j++) {
                        cells[j] = new StringCell(valueArray[j].getValue());
                    }
                }
                domain = new DataColumnDomainCreator(cells).createDomain();
            }
        } else if (dataType.isCompatible(DoubleValue.class)) {
            Double leftMargin = null;
            Double rightMargin = null;
            Interval[] intervalArray = dataField.getIntervalArray();
            if (intervalArray != null && intervalArray.length > 0) {
                Interval interval = dataField.getIntervalArray(0);
                leftMargin = interval.getLeftMargin();
                rightMargin = interval.getRightMargin();
            } else if (dataField.getValueArray() != null && dataField.getValueArray().length > 0) {
                // try to derive the bounds from the values
                Value[] valueArray = dataField.getValueArray();
                List<Double> values = new ArrayList<Double>();
                for (int j = 0; j < valueArray.length; j++) {
                    String value = "";
                    try {
                        value = valueArray[j].getValue();
                        values.add(Double.parseDouble(value));
                    } catch (Exception e) {
                        throw new IllegalArgumentException("Skipping domain calculation. " + "Value \"" + value + "\" cannot be cast to double.");
                    }
                }
                leftMargin = Collections.min(values);
                rightMargin = Collections.max(values);
            }
            if (leftMargin != null && rightMargin != null) {
                // set the bounds of the domain if available
                DataCell lowerBound = null;
                DataCell upperBound = null;
                if (DataType.getType(IntCell.class).equals(dataType)) {
                    lowerBound = new IntCell(leftMargin.intValue());
                    upperBound = new IntCell(rightMargin.intValue());
                } else if (DataType.getType(DoubleCell.class).equals(dataType)) {
                    lowerBound = new DoubleCell(leftMargin);
                    upperBound = new DoubleCell(rightMargin);
                }
                domain = new DataColumnDomainCreator(lowerBound, upperBound).createDomain();
            } else {
                domain = new DataColumnDomainCreator().createDomain();
            }
        }
        specCreator.setDomain(domain);
        colSpecs.add(specCreator.createSpec());
        m_dictFields.add(name);
    }
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) IntCell(org.knime.core.data.def.IntCell) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataField(org.dmg.pmml.DataFieldDocument.DataField) StringCell(org.knime.core.data.def.StringCell) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) BooleanValue(org.knime.core.data.BooleanValue) IntValue(org.knime.core.data.IntValue) Value(org.dmg.pmml.ValueDocument.Value) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 10 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class TreeNominalColumnDataTest method createPCATestData.

private static Pair<TreeNominalColumnData, TreeTargetNominalColumnData> createPCATestData(final TreeEnsembleLearnerConfiguration config) {
    DataColumnSpec colSpec = new DataColumnSpecCreator("test-col", StringCell.TYPE).createSpec();
    final String[] attVals = new String[] { "A", "B", "C", "D", "E" };
    final String[] classes = new String[] { "T1", "T2", "T3" };
    TreeNominalColumnDataCreator colCreator = new TreeNominalColumnDataCreator(colSpec);
    DataColumnSpecCreator specCreator = new DataColumnSpecCreator("target-col", StringCell.TYPE);
    specCreator.setDomain(new DataColumnDomainCreator(Arrays.stream(classes).distinct().map(s -> new StringCell(s)).toArray(i -> new StringCell[i])).createDomain());
    DataColumnSpec targetSpec = specCreator.createSpec();
    TreeTargetColumnDataCreator targetCreator = new TreeTargetNominalColumnDataCreator(targetSpec);
    long rowKeyCounter = 0;
    final int[][] classDistributions = new int[][] { { 40, 10, 10 }, { 10, 40, 10 }, { 20, 30, 10 }, { 20, 15, 25 }, { 10, 5, 45 } };
    for (int i = 0; i < attVals.length; i++) {
        for (int j = 0; j < classes.length; j++) {
            for (int k = 0; k < classDistributions[i][j]; k++) {
                RowKey key = RowKey.createRowKey(rowKeyCounter++);
                colCreator.add(key, new StringCell(attVals[i]));
                targetCreator.add(key, new StringCell(classes[j]));
            }
        }
    }
    final TreeNominalColumnData testColData = colCreator.createColumnData(0, config);
    testColData.getMetaData().setAttributeIndex(0);
    return Pair.create(testColData, (TreeTargetNominalColumnData) targetCreator.createColumnData());
}
Also used : Arrays(java.util.Arrays) RandomData(org.apache.commons.math.random.RandomData) RowKey(org.knime.core.data.RowKey) IsInstanceOf.instanceOf(org.hamcrest.core.IsInstanceOf.instanceOf) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) SplitCriterion(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.SplitCriterion) DataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships) TreeNodeNominalCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalCondition) Pair(org.knime.core.util.Pair) Assert.assertThat(org.junit.Assert.assertThat) ColumnSamplingMode(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.ColumnSamplingMode) TreeEnsembleLearnerConfiguration(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) NominalMultiwaySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalMultiwaySplitCandidate) SetLogic(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalBinaryCondition.SetLogic) NominalBinarySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalBinarySplitCandidate) BigInteger(java.math.BigInteger) TreeNodeNominalBinaryCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalBinaryCondition) SplitCandidate(org.knime.base.node.mine.treeensemble2.learner.SplitCandidate) TreeType(org.knime.base.node.mine.treeensemble2.model.AbstractTreeEnsembleModel.TreeType) Assert.assertNotNull(org.junit.Assert.assertNotNull) IDataIndexManager(org.knime.base.node.mine.treeensemble2.data.memberships.IDataIndexManager) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataIndexManager(org.knime.base.node.mine.treeensemble2.data.memberships.DefaultDataIndexManager) Assert.assertNull(org.junit.Assert.assertNull) Assert.assertFalse(org.junit.Assert.assertFalse) StringCell(org.knime.core.data.def.StringCell) BitSet(java.util.BitSet) MissingValueHandling(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.MissingValueHandling) Assert.assertEquals(org.junit.Assert.assertEquals) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) RowKey(org.knime.core.data.RowKey) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataColumnSpec(org.knime.core.data.DataColumnSpec) StringCell(org.knime.core.data.def.StringCell)

Aggregations

DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)57 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)51 DataColumnSpec (org.knime.core.data.DataColumnSpec)43 DoubleCell (org.knime.core.data.def.DoubleCell)28 DataCell (org.knime.core.data.DataCell)27 DataTableSpec (org.knime.core.data.DataTableSpec)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)15 ArrayList (java.util.ArrayList)14 DataColumnDomain (org.knime.core.data.DataColumnDomain)12 DataRow (org.knime.core.data.DataRow)12 DataType (org.knime.core.data.DataType)12 DoubleValue (org.knime.core.data.DoubleValue)11 StringCell (org.knime.core.data.def.StringCell)8 BufferedDataTable (org.knime.core.node.BufferedDataTable)7 LinkedHashSet (java.util.LinkedHashSet)6 Coordinate (org.knime.base.util.coordinate.Coordinate)6 HashMap (java.util.HashMap)5 HashSet (java.util.HashSet)5 LinkedHashMap (java.util.LinkedHashMap)5 NumericCoordinate (org.knime.base.util.coordinate.NumericCoordinate)5