Search in sources :

Example 61 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class CAIMDiscretizationNodeModel method createResultTable.

/**
 * Creates {@link BufferedDataTable} from a given input table and an
 * appropriate {@link DiscretizationScheme}. The result table has replaced
 * columns according to the {@link DiscretizationScheme}.
 *
 * @param exec the context from which to create the
 *            {@link BufferedDataTable}
 * @param table the input data table
 * @param discretizationModel the {@link DiscretizationModel} that contains
 *            the mapping from numerical intervals to nominal String values
 *            for the included columns
 * @return the discretized input data
 */
public static BufferedDataTable createResultTable(final ExecutionContext exec, final BufferedDataTable table, final DiscretizationModel discretizationModel) {
    DiscretizationScheme[] dSchemes = discretizationModel.getSchemes();
    final String[] includedColumnNames = discretizationModel.getIncludedColumnNames();
    // filter the schemes so that only schemes for columns are included
    // which are also included in the table
    dSchemes = filterNotKnownSchemes(dSchemes, includedColumnNames, table.getDataTableSpec());
    DataTableSpec originalTableSpec = table.getDataTableSpec();
    DataColumnSpec[] newColumnSpecs = new DataColumnSpec[originalTableSpec.getNumColumns()];
    // remembers if an column index is included or not
    boolean[] included = new boolean[newColumnSpecs.length];
    int counter = 0;
    for (DataColumnSpec originalColumnSpec : originalTableSpec) {
        // if the column is included for discretizing, change the spec
        if (isIncluded(originalColumnSpec, includedColumnNames) > -1) {
            // creat a nominal string column spec
            newColumnSpecs[counter] = new DataColumnSpecCreator(originalColumnSpec.getName(), StringCell.TYPE).createSpec();
            included[counter] = true;
        } else {
            // add it as is
            newColumnSpecs[counter] = originalColumnSpec;
            included[counter] = false;
        }
        counter++;
    }
    // create the new table spec
    DataTableSpec newTableSpec = new DataTableSpec(newColumnSpecs);
    // create the result table
    BufferedDataContainer container = exec.createDataContainer(newTableSpec);
    // discretize the included column values
    double rowCounter = 0;
    double numRows = table.size();
    for (DataRow row : table) {
        if (rowCounter % 200 == 0) {
            exec.setProgress(rowCounter / numRows);
        }
        int i = 0;
        DataCell[] newCells = new DataCell[row.getNumCells()];
        int includedCounter = 0;
        for (DataCell cell : row) {
            if (included[i]) {
                // check for missing values
                if (cell.isMissing()) {
                    newCells[i] = cell;
                } else {
                    // transform the value to the discretized one
                    double value = ((DoubleValue) cell).getDoubleValue();
                    String discreteValue = dSchemes[includedCounter].getDiscreteValue(value);
                    newCells[i] = new StringCell(discreteValue);
                }
                includedCounter++;
            } else {
                newCells[i] = cell;
            }
            i++;
        }
        container.addRowToTable(new DefaultRow(row.getKey(), newCells));
        rowCounter++;
    }
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DiscretizationScheme(org.knime.base.node.preproc.discretization.caim2.DiscretizationScheme) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 62 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class ConditionalBoxPlotNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
    m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    double nrRows = inData[0].size();
    int rowCount = 0;
    int numericIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.numericColumn());
    int nominalIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.nominalColumn());
    Map<String, Map<Double, Set<RowKey>>> data = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    // some default values .. if one column only has missing values.
    for (DataCell d : inData[0].getDataTableSpec().getColumnSpec(nominalIndex).getDomain().getValues()) {
        String name = ((StringValue) d).getStringValue();
        m_mildOutliers.put(name, new HashMap<Double, Set<RowKey>>());
        m_extremeOutliers.put(name, new HashMap<Double, Set<RowKey>>());
    }
    for (DataRow r : inData[0]) {
        exec.checkCanceled();
        exec.setProgress(rowCount++ / nrRows, "Separating...");
        if (!m_settings.showMissingValues()) {
            if (r.getCell(nominalIndex).isMissing()) {
                // missing cell in nominal values is unwanted?
                continue;
            }
        }
        String nominal = replaceSpaces(r.getCell(nominalIndex).toString());
        if (r.getCell(numericIndex).isMissing()) {
            // ignore missing cells in numeric column
            continue;
        }
        DoubleValue numeric = (DoubleValue) r.getCell(numericIndex);
        Map<Double, Set<RowKey>> map = data.get(nominal);
        if (map == null) {
            map = new LinkedHashMap<Double, Set<RowKey>>();
        }
        Set<RowKey> set = map.get(numeric.getDoubleValue());
        if (set == null) {
            set = new HashSet<RowKey>();
        }
        set.add(r.getKey());
        map.put(numeric.getDoubleValue(), set);
        data.put(nominal, map);
    }
    List<String> keys = new ArrayList<String>(data.keySet());
    boolean ignoreMissingValues = false;
    if (m_settings.showMissingValues() && !keys.contains(DataType.getMissingCell().toString())) {
        // we promised to create data for missing values..
        // if there aren't any.. we have to create them ourselves
        setWarningMessage("No missing values found.");
        ignoreMissingValues = true;
    }
    Collections.sort(keys);
    DataColumnSpec[] colSpecs = createColumnSpec(inData[0].getDataTableSpec().getColumnSpec(nominalIndex), ignoreMissingValues);
    if (keys.size() == 0) {
        setWarningMessage("All classes are empty.");
    }
    int dataSetNr = 0;
    // for (String d : keys) {
    for (DataColumnSpec dcs : colSpecs) {
        String d = dcs.getName();
        if (data.get(d) == null || keys.size() == 0) {
            dataSetNr++;
            continue;
        }
        exec.checkCanceled();
        exec.setProgress(dataSetNr / (double) keys.size(), "Creating statistics");
        Map<Double, Set<RowKey>> extremeOutliers = new LinkedHashMap<Double, Set<RowKey>>();
        Map<Double, Set<RowKey>> mildOutliers = new LinkedHashMap<Double, Set<RowKey>>();
        double[] stats = calculateStatistic(data.get(d), mildOutliers, extremeOutliers);
        double minimum = stats[BoxPlotNodeModel.MIN];
        double maximum = stats[BoxPlotNodeModel.MAX];
        DataColumnSpecCreator creator = new DataColumnSpecCreator(colSpecs[dataSetNr]);
        creator.setDomain(new DataColumnDomainCreator(new DoubleCell(minimum), new DoubleCell(maximum)).createDomain());
        colSpecs[dataSetNr] = creator.createSpec();
        m_statistics.put(colSpecs[dataSetNr], stats);
        m_mildOutliers.put(d, mildOutliers);
        m_extremeOutliers.put(d, extremeOutliers);
        dataSetNr++;
    }
    DataTableSpec dts = new DataTableSpec("MyTempTable", colSpecs);
    DataContainer cont = new DataContainer(dts);
    cont.close();
    m_dataArray = new DefaultDataArray(cont.getTable(), 1, 2);
    cont.dispose();
    if (ignoreMissingValues) {
        DataColumnSpec[] temp = new DataColumnSpec[colSpecs.length + 1];
        DataColumnSpec missing = new DataColumnSpecCreator(DataType.getMissingCell().toString(), DataType.getMissingCell().getType()).createSpec();
        int i = 0;
        while (missing.getName().compareTo(colSpecs[i].getName()) > 0) {
            temp[i] = colSpecs[i];
            i++;
        }
        temp[i++] = missing;
        while (i < temp.length) {
            temp[i] = colSpecs[i - 1];
            i++;
        }
        colSpecs = temp;
    }
    /* Save inSpec of the numeric column to provide the view a way to
         * consider the input domain for normalization. */
    m_numColSpec = inData[0].getDataTableSpec().getColumnSpec(numericIndex);
    return new BufferedDataTable[] { createOutputTable(inData[0].getDataTableSpec(), colSpecs, exec).getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) StringValue(org.knime.core.data.StringValue) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 63 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class BoxPlotDrawingPane method paintOutlierLabels.

/**
 * Paints the label(value) of each outlier dot.
 * @param g graphics.
 */
protected void paintOutlierLabels(final Graphics g) {
    int fontHeight = g.getFontMetrics().getHeight();
    DotInfoArray dotArray = getDotInfoArray();
    DotInfo lastDot = null;
    for (DotInfo dot : dotArray.getDots()) {
        if (lastDot != null && dot.getXCoord() == lastDot.getXCoord()) {
            // check the y coordinates for enough space
            if (Math.abs(lastDot.getYCoord() - dot.getYCoord()) < fontHeight) {
                // lastDot = dot;
                continue;
            }
        }
        int y = dot.getYCoord() + fontHeight / 4;
        int x = dot.getXCoord() + DOT_SIZE;
        if (dot.getYDomainValue() != null) {
            double d = ((DoubleValue) dot.getYDomainValue()).getDoubleValue();
            g.drawString(LabelPaintUtil.getDoubleAsString(d, Box.ROUNDING_FACTOR), x, y);
        }
        lastDot = dot;
    }
}
Also used : DotInfo(org.knime.base.node.viz.plotter.scatter.DotInfo) DoubleValue(org.knime.core.data.DoubleValue) DotInfoArray(org.knime.base.node.viz.plotter.scatter.DotInfoArray)

Example 64 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class BoxPlotNodeModel method detectOutliers.

/**
 * Detects mild (= < 3 * IQR) and extreme (= > 3 * IQR) outliers.
 * @param table the sorted! table containing the values.
 * @param iqr the interquartile range
 * @param mild list to store mild outliers
 * @param extreme list to store extreme outliers
 * @param colIdx the index for the column of interest
 * @param q quartiles the lower quartile at 0,upper quartile at 1.
 * @param whiskers array to store the lower and upper whisker bar
 */
public void detectOutliers(final DataTable table, final double iqr, final double[] q, final Map<Double, Set<RowKey>> mild, final Map<Double, Set<RowKey>> extreme, final double[] whiskers, final int colIdx) {
    boolean searchLowerWhisker = true;
    boolean searchUpperWhisker = true;
    for (DataRow row : table) {
        DataCell cell = row.getCell(colIdx);
        if (cell.isMissing()) {
            continue;
        }
        double value = ((DoubleValue) cell).getDoubleValue();
        // lower outlier
        if (value < q[0] - (1.5 * iqr)) {
            // mild
            if (value > q[0] - (3.0 * iqr)) {
                Set<RowKey> keys = mild.get(value);
                if (keys == null) {
                    keys = new HashSet<RowKey>();
                }
                keys.add(row.getKey());
                mild.put(value, keys);
            } else {
                // extreme
                Set<RowKey> keys = mild.get(value);
                if (keys == null) {
                    keys = new HashSet<RowKey>();
                }
                keys.add(row.getKey());
                extreme.put(value, keys);
            }
        } else if (value > q[1] + (1.5 * iqr)) {
            // upper outlier
            searchUpperWhisker = false;
            if (value < q[1] + (3.0 * iqr)) {
                // mild
                Set<RowKey> keys = mild.get(value);
                if (keys == null) {
                    keys = new HashSet<RowKey>();
                }
                keys.add(row.getKey());
                mild.put(value, keys);
            } else {
                // extreme
                Set<RowKey> keys = mild.get(value);
                if (keys == null) {
                    keys = new HashSet<RowKey>();
                }
                keys.add(row.getKey());
                extreme.put(value, keys);
            }
        } else if (searchLowerWhisker) {
            whiskers[0] = value;
            searchLowerWhisker = false;
        }
        if (searchUpperWhisker) {
            whiskers[1] = value;
        }
    }
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) DoubleValue(org.knime.core.data.DoubleValue) RowKey(org.knime.core.data.RowKey) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow) HashSet(java.util.HashSet)

Example 65 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class BoxplotCalculator method calculateMultiple.

/**
 * Calculates the necessary statistics for a non-conditional boxplot.
 * @param table the input data
 * @param numCol array of names of numeric columns to plot
 * @param exec Execution context to report progress to
 * @return LinkedHashMap with the column name as key and statistics as value
 * @throws CanceledExecutionException when the user cancels the execution
 */
public LinkedHashMap<String, BoxplotStatistics> calculateMultiple(final BufferedDataTable table, final String[] numCol, final ExecutionContext exec) throws CanceledExecutionException {
    DataTableSpec spec = table.getSpec();
    int[] numColIdxs = new int[numCol.length];
    for (int i = 0; i < numCol.length; i++) {
        numColIdxs[i] = spec.findColumnIndex(numCol[i]);
    }
    LinkedHashMap<String, DataContainer> containers = new LinkedHashMap<String, DataContainer>();
    for (int i = 0; i < numCol.length; i++) {
        containers.put(numCol[i], exec.createDataContainer(new DataTableSpec(new String[] { "col" }, new DataType[] { DoubleCell.TYPE })));
    }
    ExecutionContext subExec = exec.createSilentSubExecutionContext(0.7);
    long[] numMissValPerCol = new long[numCol.length];
    int count = 0;
    for (DataRow row : table) {
        exec.checkCanceled();
        subExec.setProgress((double) count++ / table.size());
        for (int i = 0; i < numCol.length; i++) {
            DataCell cell = row.getCell(numColIdxs[i]);
            if (!cell.isMissing()) {
                containers.get(numCol[i]).addRowToTable(new DefaultRow(row.getKey(), cell));
            } else {
                numMissValPerCol[i]++;
            }
        }
    }
    LinkedHashMap<String, BoxplotStatistics> statsMap = new LinkedHashMap<>();
    ExecutionContext subExec2 = exec.createSilentSubExecutionContext(1.0);
    count = 0;
    List<String> excludedDataColList = new ArrayList<String>();
    for (Entry<String, DataContainer> entry : containers.entrySet()) {
        exec.checkCanceled();
        subExec2.setProgress((double) count++ / containers.size());
        Set<Outlier> extremeOutliers = new HashSet<Outlier>();
        Set<Outlier> mildOutliers = new HashSet<Outlier>();
        entry.getValue().close();
        BufferedDataTable catTable = (BufferedDataTable) entry.getValue().getTable();
        if (catTable.size() == 0) {
            excludedDataColList.add(entry.getKey());
            continue;
        }
        SortedTable st = new SortedTable(catTable, new Comparator<DataRow>() {

            @Override
            public int compare(final DataRow o1, final DataRow o2) {
                DataCell c1 = o1.getCell(0);
                DataCell c2 = o2.getCell(0);
                double d1 = ((DoubleValue) c1).getDoubleValue();
                double d2 = ((DoubleValue) c2).getDoubleValue();
                if (d1 == d2) {
                    return 0;
                } else {
                    return d1 < d2 ? -1 : 1;
                }
            }
        }, false, exec);
        double min = 0, max = 0, q1 = 0, q3 = 0, median = 0;
        boolean dq1 = catTable.size() % 4 == 0;
        long q1Idx = catTable.size() / 4;
        boolean dq3 = 3 * catTable.size() % 4 == 0;
        long q3Idx = 3 * catTable.size() / 4;
        boolean dMedian = catTable.size() % 2 == 0;
        long medianIdx = catTable.size() / 2;
        int counter = 0;
        for (DataRow row : st) {
            double val = ((DoubleValue) row.getCell(0)).getDoubleValue();
            if (counter == 0) {
                min = val;
            }
            if (counter == catTable.size() - 1) {
                max = val;
            }
            if (counter == q1Idx - 1 && dq1) {
                q1 = val;
            }
            if (counter == q1Idx || (counter == 0 && st.size() <= 3)) {
                if (dq1) {
                    q1 = (q1 + val) / 2.0;
                } else {
                    q1 = val;
                }
            }
            if (counter == medianIdx - 1 && dMedian) {
                median = val;
            }
            if (counter == medianIdx) {
                if (dMedian) {
                    median = (median + val) / 2;
                } else {
                    median = val;
                }
            }
            if (counter == q3Idx - 1 && dq3) {
                q3 = val;
            }
            if (counter == q3Idx || (counter == st.size() - 1 && st.size() <= 3)) {
                if (dq3) {
                    q3 = (q3 + val) / 2.0;
                } else {
                    q3 = val;
                }
            }
            counter++;
        }
        double iqr = q3 - q1;
        double lowerWhisker = min;
        double upperWhisker = max;
        double upperWhiskerFence = q3 + (1.5 * iqr);
        double lowerWhiskerFence = q1 - (1.5 * iqr);
        double lowerFence = q1 - (3 * iqr);
        double upperFence = q3 + (3 * iqr);
        for (DataRow row : st) {
            double value = ((DoubleValue) row.getCell(0)).getDoubleValue();
            String rowKey = row.getKey().getString();
            if (value < lowerFence) {
                extremeOutliers.add(new Outlier(value, rowKey));
            } else if (value < lowerWhiskerFence) {
                mildOutliers.add(new Outlier(value, rowKey));
            } else if (lowerWhisker < lowerWhiskerFence && value >= lowerWhiskerFence) {
                lowerWhisker = value;
            } else if (value <= upperWhiskerFence) {
                upperWhisker = value;
            } else if (value > upperFence) {
                extremeOutliers.add(new Outlier(value, rowKey));
            } else if (value > upperWhiskerFence) {
                mildOutliers.add(new Outlier(value, rowKey));
            }
        }
        statsMap.put(entry.getKey(), new BoxplotStatistics(mildOutliers, extremeOutliers, min, max, lowerWhisker, q1, median, q3, upperWhisker));
    }
    // missing values part
    m_excludedDataCols = excludedDataColList.toArray(new String[excludedDataColList.size()]);
    m_numMissValPerCol = new LinkedHashMap<String, Long>();
    for (int i = 0; i < numCol.length; i++) {
        if (numMissValPerCol[i] > 0 && !excludedDataColList.contains(numCol[i])) {
            m_numMissValPerCol.put(numCol[i], numMissValPerCol[i]);
        }
    }
    return statsMap;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) HashSet(java.util.HashSet) ExecutionContext(org.knime.core.node.ExecutionContext) DoubleValue(org.knime.core.data.DoubleValue) SortedTable(org.knime.base.data.sort.SortedTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

DoubleValue (org.knime.core.data.DoubleValue)154 DataCell (org.knime.core.data.DataCell)103 DataRow (org.knime.core.data.DataRow)71 DataColumnSpec (org.knime.core.data.DataColumnSpec)38 DataTableSpec (org.knime.core.data.DataTableSpec)38 DoubleCell (org.knime.core.data.def.DoubleCell)32 ArrayList (java.util.ArrayList)26 BufferedDataTable (org.knime.core.node.BufferedDataTable)26 DataType (org.knime.core.data.DataType)23 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)21 LinkedHashMap (java.util.LinkedHashMap)18 IntValue (org.knime.core.data.IntValue)15 HashMap (java.util.HashMap)14 RowIterator (org.knime.core.data.RowIterator)14 RowKey (org.knime.core.data.RowKey)13 DefaultRow (org.knime.core.data.def.DefaultRow)13 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)12 LongValue (org.knime.core.data.LongValue)10 StringValue (org.knime.core.data.StringValue)10 DateAndTimeValue (org.knime.core.data.date.DateAndTimeValue)10