Search in sources :

Example 26 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class ConditionalBoxPlotNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
    m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    double nrRows = inData[0].size();
    int rowCount = 0;
    int numericIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.numericColumn());
    int nominalIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.nominalColumn());
    Map<String, Map<Double, Set<RowKey>>> data = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    // some default values .. if one column only has missing values.
    for (DataCell d : inData[0].getDataTableSpec().getColumnSpec(nominalIndex).getDomain().getValues()) {
        String name = ((StringValue) d).getStringValue();
        m_mildOutliers.put(name, new HashMap<Double, Set<RowKey>>());
        m_extremeOutliers.put(name, new HashMap<Double, Set<RowKey>>());
    }
    for (DataRow r : inData[0]) {
        exec.checkCanceled();
        exec.setProgress(rowCount++ / nrRows, "Separating...");
        if (!m_settings.showMissingValues()) {
            if (r.getCell(nominalIndex).isMissing()) {
                // missing cell in nominal values is unwanted?
                continue;
            }
        }
        String nominal = replaceSpaces(r.getCell(nominalIndex).toString());
        if (r.getCell(numericIndex).isMissing()) {
            // ignore missing cells in numeric column
            continue;
        }
        DoubleValue numeric = (DoubleValue) r.getCell(numericIndex);
        Map<Double, Set<RowKey>> map = data.get(nominal);
        if (map == null) {
            map = new LinkedHashMap<Double, Set<RowKey>>();
        }
        Set<RowKey> set = map.get(numeric.getDoubleValue());
        if (set == null) {
            set = new HashSet<RowKey>();
        }
        set.add(r.getKey());
        map.put(numeric.getDoubleValue(), set);
        data.put(nominal, map);
    }
    List<String> keys = new ArrayList<String>(data.keySet());
    boolean ignoreMissingValues = false;
    if (m_settings.showMissingValues() && !keys.contains(DataType.getMissingCell().toString())) {
        // we promised to create data for missing values..
        // if there aren't any.. we have to create them ourselves
        setWarningMessage("No missing values found.");
        ignoreMissingValues = true;
    }
    Collections.sort(keys);
    DataColumnSpec[] colSpecs = createColumnSpec(inData[0].getDataTableSpec().getColumnSpec(nominalIndex), ignoreMissingValues);
    if (keys.size() == 0) {
        setWarningMessage("All classes are empty.");
    }
    int dataSetNr = 0;
    // for (String d : keys) {
    for (DataColumnSpec dcs : colSpecs) {
        String d = dcs.getName();
        if (data.get(d) == null || keys.size() == 0) {
            dataSetNr++;
            continue;
        }
        exec.checkCanceled();
        exec.setProgress(dataSetNr / (double) keys.size(), "Creating statistics");
        Map<Double, Set<RowKey>> extremeOutliers = new LinkedHashMap<Double, Set<RowKey>>();
        Map<Double, Set<RowKey>> mildOutliers = new LinkedHashMap<Double, Set<RowKey>>();
        double[] stats = calculateStatistic(data.get(d), mildOutliers, extremeOutliers);
        double minimum = stats[BoxPlotNodeModel.MIN];
        double maximum = stats[BoxPlotNodeModel.MAX];
        DataColumnSpecCreator creator = new DataColumnSpecCreator(colSpecs[dataSetNr]);
        creator.setDomain(new DataColumnDomainCreator(new DoubleCell(minimum), new DoubleCell(maximum)).createDomain());
        colSpecs[dataSetNr] = creator.createSpec();
        m_statistics.put(colSpecs[dataSetNr], stats);
        m_mildOutliers.put(d, mildOutliers);
        m_extremeOutliers.put(d, extremeOutliers);
        dataSetNr++;
    }
    DataTableSpec dts = new DataTableSpec("MyTempTable", colSpecs);
    DataContainer cont = new DataContainer(dts);
    cont.close();
    m_dataArray = new DefaultDataArray(cont.getTable(), 1, 2);
    cont.dispose();
    if (ignoreMissingValues) {
        DataColumnSpec[] temp = new DataColumnSpec[colSpecs.length + 1];
        DataColumnSpec missing = new DataColumnSpecCreator(DataType.getMissingCell().toString(), DataType.getMissingCell().getType()).createSpec();
        int i = 0;
        while (missing.getName().compareTo(colSpecs[i].getName()) > 0) {
            temp[i] = colSpecs[i];
            i++;
        }
        temp[i++] = missing;
        while (i < temp.length) {
            temp[i] = colSpecs[i - 1];
            i++;
        }
        colSpecs = temp;
    }
    /* Save inSpec of the numeric column to provide the view a way to
         * consider the input domain for normalization. */
    m_numColSpec = inData[0].getDataTableSpec().getColumnSpec(numericIndex);
    return new BufferedDataTable[] { createOutputTable(inData[0].getDataTableSpec(), colSpecs, exec).getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) StringValue(org.knime.core.data.StringValue) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 27 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class Normalizer3NodeModel method calculate.

/**
 * New normalized {@link org.knime.core.data.DataTable} is created depending on the mode.
 */
/**
 * @param inData The input data.
 * @param exec For BufferedDataTable creation and progress.
 * @return the result of the calculation
 * @throws Exception If the node calculation fails for any reason.
 */
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getSpec();
    // extract selected numeric columns
    String[] includedColumns = getIncludedComlumns(inSpec);
    Normalizer2 ntable = new Normalizer2(inTable, includedColumns);
    long rowcount = inTable.size();
    ExecutionContext prepareExec = exec.createSubExecutionContext(0.3);
    AffineTransTable outTable;
    boolean fixDomainBounds = false;
    switch(m_config.getMode()) {
        case MINMAX:
            fixDomainBounds = true;
            outTable = ntable.doMinMaxNorm(m_config.getMax(), m_config.getMin(), prepareExec);
            break;
        case Z_SCORE:
            outTable = ntable.doZScoreNorm(prepareExec);
            break;
        case DECIMALSCALING:
            outTable = ntable.doDecimalScaling(prepareExec);
            break;
        default:
            throw new InvalidSettingsException("No mode set");
    }
    if (outTable.getErrorMessage() != null) {
        // something went wrong, report and throw an exception
        throw new Exception(outTable.getErrorMessage());
    }
    if (ntable.getErrorMessage() != null) {
        // something went wrong during initialization, report.
        setWarningMessage(ntable.getErrorMessage());
    }
    DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, includedColumns);
    AffineTransConfiguration configuration = outTable.getConfiguration();
    DataTableSpec spec = outTable.getDataTableSpec();
    // the same transformation, which is not guaranteed to snap to min/max)
    if (fixDomainBounds) {
        DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
        for (int i = 0; i < newColSpecs.length; i++) {
            newColSpecs[i] = spec.getColumnSpec(i);
        }
        for (int i = 0; i < includedColumns.length; i++) {
            int index = spec.findColumnIndex(includedColumns[i]);
            DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
            DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
            domCreator.setLowerBound(new DoubleCell(m_config.getMin()));
            domCreator.setUpperBound(new DoubleCell(m_config.getMax()));
            creator.setDomain(domCreator.createDomain());
            newColSpecs[index] = creator.createSpec();
        }
        spec = new DataTableSpec(spec.getName(), newColSpecs);
    }
    ExecutionMonitor normExec = exec.createSubProgress(.7);
    BufferedDataContainer container = exec.createDataContainer(spec);
    long count = 1;
    for (DataRow row : outTable) {
        normExec.checkCanceled();
        normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
        container.addRowToTable(row);
        count++;
    }
    container.close();
    return new CalculationResult(container.getTable(), modelSpec, configuration);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) Normalizer2(org.knime.base.data.normalize.Normalizer2) DoubleCell(org.knime.core.data.def.DoubleCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) ExecutionContext(org.knime.core.node.ExecutionContext) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable) AffineTransTable(org.knime.base.data.normalize.AffineTransTable) AffineTransConfiguration(org.knime.base.data.normalize.AffineTransConfiguration) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 28 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class EditNominalDomainDicNodeModel method mergeTableSpecs.

/**
 * @param orgSpec
 * @param orgIndexToNewDomainValuesMap
 * @return
 */
private static DataTableSpecCreator mergeTableSpecs(final DataTableSpec orgSpec, final Map<Integer, Set<DataCell>> orgIndexToNewDomainValuesMap) {
    DataTableSpecCreator newSpecCreator = new DataTableSpecCreator(orgSpec).dropAllColumns();
    for (int i = 0; i < orgSpec.getNumColumns(); i++) {
        if (orgIndexToNewDomainValuesMap.containsKey(i)) {
            DataColumnSpec orgDataSpec = orgSpec.getColumnSpec(i);
            DataColumnSpecCreator dataColumnSpecCreator = new DataColumnSpecCreator(orgDataSpec);
            DataColumnDomainCreator yetAnotherCreator = new DataColumnDomainCreator(orgDataSpec.getDomain());
            yetAnotherCreator.setValues(orgIndexToNewDomainValuesMap.get(i));
            dataColumnSpecCreator.setDomain(yetAnotherCreator.createDomain());
            newSpecCreator.addColumns(dataColumnSpecCreator.createSpec());
        } else {
            newSpecCreator.addColumns(orgSpec.getColumnSpec(i));
        }
    }
    return newSpecCreator;
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataTableSpecCreator(org.knime.core.data.DataTableSpecCreator) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator)

Example 29 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class EditNumericDomainNodeModel method processDomainSettings.

private DataTableSpec processDomainSettings(final DataTableSpec dataTableSpec) throws InvalidSettingsException {
    if (m_configuration == null) {
        throw new InvalidSettingsException("Missing Configuration.");
    }
    EditNumericDomainConfiguration config = m_configuration;
    FilterResult filterResult = config.getColumnspecFilterConfig().applyTo(dataTableSpec);
    List<DataColumnSpec> newColumnSpecs = new ArrayList<DataColumnSpec>(dataTableSpec.getNumColumns());
    String[] columnNames = dataTableSpec.getColumnNames();
    Set<String> includeSet = new HashSet<String>();
    Collections.addAll(includeSet, filterResult.getIncludes());
    for (int i = 0; i < dataTableSpec.getNumColumns(); i++) {
        DataColumnSpec columnSpec = dataTableSpec.getColumnSpec(i);
        String columnName = columnNames[i];
        if (includeSet.contains(columnName)) {
            DataColumnSpecCreator columnSpecCreator = new DataColumnSpecCreator(columnSpec);
            DataColumnDomainCreator domainCreator = new // 
            DataColumnDomainCreator(// 
            createCell(columnName, columnSpec.getType(), config.getLowerBound()), createCell(columnName, columnSpec.getType(), config.getUpperBound()));
            domainCreator.setValues(columnSpec.getDomain().getValues());
            columnSpecCreator.setDomain(domainCreator.createDomain());
            newColumnSpecs.add(columnSpecCreator.createSpec());
        } else {
            newColumnSpecs.add(columnSpec);
        }
    }
    StringBuilder warnings = new StringBuilder();
    if (includeSet.isEmpty()) {
        warnings.append("No columns are included.");
    }
    if (filterResult.getRemovedFromIncludes().length > 0) {
        warnings.append("\nFollowing columns are configured but no longer exist: " + ConvenienceMethods.getShortStringFrom(Arrays.asList(filterResult.getRemovedFromIncludes()), 5));
    }
    if (warnings.length() > 0) {
        setWarningMessage(warnings.toString());
    }
    return new DataTableSpecCreator(dataTableSpec).dropAllColumns().addColumns(newColumnSpecs.toArray(new DataColumnSpec[newColumnSpecs.size()])).createSpec();
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataTableSpecCreator(org.knime.core.data.DataTableSpecCreator) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult) HashSet(java.util.HashSet)

Example 30 with DataColumnDomainCreator

use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.

the class PMCCPortObjectAndSpec method createOutSpec.

/**
 * Creates output spec for correlation table.
 * @param names the column names being analyzed.
 * @return The new output spec.
 * @since 2.6
 */
public static DataTableSpec createOutSpec(final String[] names) {
    DataColumnSpec[] colSpecs = new DataColumnSpec[names.length];
    for (int i = 0; i < colSpecs.length; i++) {
        DataColumnSpecCreator c = new DataColumnSpecCreator(names[i], DoubleCell.TYPE);
        c.setDomain(new DataColumnDomainCreator(MIN_VALUE_CELL, MAX_VALUE_CELL).createDomain());
        colSpecs[i] = c.createSpec();
    }
    return new DataTableSpec("Correlation values", colSpecs);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator)

Aggregations

DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)57 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)51 DataColumnSpec (org.knime.core.data.DataColumnSpec)43 DoubleCell (org.knime.core.data.def.DoubleCell)28 DataCell (org.knime.core.data.DataCell)27 DataTableSpec (org.knime.core.data.DataTableSpec)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)15 ArrayList (java.util.ArrayList)14 DataColumnDomain (org.knime.core.data.DataColumnDomain)12 DataRow (org.knime.core.data.DataRow)12 DataType (org.knime.core.data.DataType)12 DoubleValue (org.knime.core.data.DoubleValue)11 StringCell (org.knime.core.data.def.StringCell)8 BufferedDataTable (org.knime.core.node.BufferedDataTable)7 LinkedHashSet (java.util.LinkedHashSet)6 Coordinate (org.knime.base.util.coordinate.Coordinate)6 HashMap (java.util.HashMap)5 HashSet (java.util.HashSet)5 LinkedHashMap (java.util.LinkedHashMap)5 NumericCoordinate (org.knime.base.util.coordinate.NumericCoordinate)5