Search in sources :

Example 11 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class ConditionalBoxPlotNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
    m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    double nrRows = inData[0].size();
    int rowCount = 0;
    int numericIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.numericColumn());
    int nominalIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.nominalColumn());
    Map<String, Map<Double, Set<RowKey>>> data = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    // some default values .. if one column only has missing values.
    for (DataCell d : inData[0].getDataTableSpec().getColumnSpec(nominalIndex).getDomain().getValues()) {
        String name = ((StringValue) d).getStringValue();
        m_mildOutliers.put(name, new HashMap<Double, Set<RowKey>>());
        m_extremeOutliers.put(name, new HashMap<Double, Set<RowKey>>());
    }
    for (DataRow r : inData[0]) {
        exec.checkCanceled();
        exec.setProgress(rowCount++ / nrRows, "Separating...");
        if (!m_settings.showMissingValues()) {
            if (r.getCell(nominalIndex).isMissing()) {
                // missing cell in nominal values is unwanted?
                continue;
            }
        }
        String nominal = replaceSpaces(r.getCell(nominalIndex).toString());
        if (r.getCell(numericIndex).isMissing()) {
            // ignore missing cells in numeric column
            continue;
        }
        DoubleValue numeric = (DoubleValue) r.getCell(numericIndex);
        Map<Double, Set<RowKey>> map = data.get(nominal);
        if (map == null) {
            map = new LinkedHashMap<Double, Set<RowKey>>();
        }
        Set<RowKey> set = map.get(numeric.getDoubleValue());
        if (set == null) {
            set = new HashSet<RowKey>();
        }
        set.add(r.getKey());
        map.put(numeric.getDoubleValue(), set);
        data.put(nominal, map);
    }
    List<String> keys = new ArrayList<String>(data.keySet());
    boolean ignoreMissingValues = false;
    if (m_settings.showMissingValues() && !keys.contains(DataType.getMissingCell().toString())) {
        // we promised to create data for missing values..
        // if there aren't any.. we have to create them ourselves
        setWarningMessage("No missing values found.");
        ignoreMissingValues = true;
    }
    Collections.sort(keys);
    DataColumnSpec[] colSpecs = createColumnSpec(inData[0].getDataTableSpec().getColumnSpec(nominalIndex), ignoreMissingValues);
    if (keys.size() == 0) {
        setWarningMessage("All classes are empty.");
    }
    int dataSetNr = 0;
    // for (String d : keys) {
    for (DataColumnSpec dcs : colSpecs) {
        String d = dcs.getName();
        if (data.get(d) == null || keys.size() == 0) {
            dataSetNr++;
            continue;
        }
        exec.checkCanceled();
        exec.setProgress(dataSetNr / (double) keys.size(), "Creating statistics");
        Map<Double, Set<RowKey>> extremeOutliers = new LinkedHashMap<Double, Set<RowKey>>();
        Map<Double, Set<RowKey>> mildOutliers = new LinkedHashMap<Double, Set<RowKey>>();
        double[] stats = calculateStatistic(data.get(d), mildOutliers, extremeOutliers);
        double minimum = stats[BoxPlotNodeModel.MIN];
        double maximum = stats[BoxPlotNodeModel.MAX];
        DataColumnSpecCreator creator = new DataColumnSpecCreator(colSpecs[dataSetNr]);
        creator.setDomain(new DataColumnDomainCreator(new DoubleCell(minimum), new DoubleCell(maximum)).createDomain());
        colSpecs[dataSetNr] = creator.createSpec();
        m_statistics.put(colSpecs[dataSetNr], stats);
        m_mildOutliers.put(d, mildOutliers);
        m_extremeOutliers.put(d, extremeOutliers);
        dataSetNr++;
    }
    DataTableSpec dts = new DataTableSpec("MyTempTable", colSpecs);
    DataContainer cont = new DataContainer(dts);
    cont.close();
    m_dataArray = new DefaultDataArray(cont.getTable(), 1, 2);
    cont.dispose();
    if (ignoreMissingValues) {
        DataColumnSpec[] temp = new DataColumnSpec[colSpecs.length + 1];
        DataColumnSpec missing = new DataColumnSpecCreator(DataType.getMissingCell().toString(), DataType.getMissingCell().getType()).createSpec();
        int i = 0;
        while (missing.getName().compareTo(colSpecs[i].getName()) > 0) {
            temp[i] = colSpecs[i];
            i++;
        }
        temp[i++] = missing;
        while (i < temp.length) {
            temp[i] = colSpecs[i - 1];
            i++;
        }
        colSpecs = temp;
    }
    /* Save inSpec of the numeric column to provide the view a way to
         * consider the input domain for normalization. */
    m_numColSpec = inData[0].getDataTableSpec().getColumnSpec(numericIndex);
    return new BufferedDataTable[] { createOutputTable(inData[0].getDataTableSpec(), colSpecs, exec).getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) StringValue(org.knime.core.data.StringValue) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 12 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class LiftCalculator method calculateLiftTables.

/**
 * Calculates the tables necessary for displaying a lift chart.
 * @param table the data table
 * @param exec the execution context to report progress to
 * @return warning messages or null
 * @throws CanceledExecutionException when the user cancels the execution
 */
public String calculateLiftTables(final BufferedDataTable table, final ExecutionContext exec) throws CanceledExecutionException {
    int predColIndex = table.getDataTableSpec().findColumnIndex(m_responseColumn);
    String warning = null;
    List<String> inclList = new LinkedList<String>();
    inclList.add(m_probabilityColumn);
    int probColInd = table.getDataTableSpec().findColumnIndex(m_probabilityColumn);
    boolean[] order = new boolean[] { false };
    m_sorted = new SortedTable(table, inclList, order, exec);
    long totalResponses = 0;
    double partWidth = m_intervalWidth;
    int nrParts = (int) Math.ceil(100.0 / partWidth);
    List<Integer> positiveResponses = new LinkedList<Integer>();
    int rowIndex = 0;
    for (DataRow row : m_sorted) {
        if (row.getCell(predColIndex).isMissing() || row.getCell(probColInd).isMissing()) {
            if (row.getCell(predColIndex).isMissing()) {
                // miss. values in class column we always ignore
                continue;
            }
            if (m_ignoreMissingValues) {
                continue;
            } else {
                warning = "Table contains missing values.";
            }
        }
        String response = ((StringValue) row.getCell(predColIndex)).getStringValue().trim();
        if (response.equalsIgnoreCase(m_responseLabel)) {
            totalResponses++;
            positiveResponses.add(rowIndex);
        }
        rowIndex++;
    }
    int[] counter = new int[nrParts];
    int partWidthAbsolute = (int) Math.ceil(rowIndex / (double) nrParts);
    double avgResponse = (double) positiveResponses.size() / rowIndex;
    for (int rIndex : positiveResponses) {
        int index = rIndex / partWidthAbsolute;
        counter[index]++;
    }
    DataColumnSpec[] colSpec = new DataColumnSpec[3];
    colSpec[0] = new DataColumnSpecCreator("Lift", DoubleCell.TYPE).createSpec();
    colSpec[1] = new DataColumnSpecCreator("Baseline", DoubleCell.TYPE).createSpec();
    colSpec[2] = new DataColumnSpecCreator("Cumulative Lift", DoubleCell.TYPE).createSpec();
    DataTableSpec tableSpec = new DataTableSpec(colSpec);
    // new DataContainer(tableSpec);
    DataContainer cont = exec.createDataContainer(tableSpec);
    colSpec = new DataColumnSpec[2];
    colSpec[0] = new DataColumnSpecCreator("Actual", DoubleCell.TYPE).createSpec();
    colSpec[1] = new DataColumnSpecCreator("Baseline", DoubleCell.TYPE).createSpec();
    tableSpec = new DataTableSpec(colSpec);
    // new DataContainer(tableSpec);
    DataContainer responseCont = exec.createDataContainer(tableSpec);
    long cumulativeCounter = 0;
    responseCont.addRowToTable(new DefaultRow(new RowKey("0"), 0.0, 0.0));
    for (int i = 0; i < counter.length; i++) {
        cumulativeCounter += counter[i];
        double responseRate = (double) counter[i] / partWidthAbsolute;
        double lift = responseRate / avgResponse;
        double cumResponseRate = (double) cumulativeCounter / totalResponses;
        long number = partWidthAbsolute * (i + 1);
        // well.. rounding problems
        if (number > rowIndex) {
            number = rowIndex;
        }
        double cumulativeLift = // (double)cumulativeCounter / (partWidthAbsolute * (i + 1));
        (double) cumulativeCounter / number;
        cumulativeLift /= avgResponse;
        // cumulativeLift = lifts / (i+1);
        double rowKey = ((i + 1) * partWidth);
        if (rowKey > 100) {
            rowKey = 100;
        }
        cont.addRowToTable(new DefaultRow(new RowKey("" + rowKey), lift, 1.0, cumulativeLift));
        double cumBaseline = (i + 1) * partWidth;
        if (cumBaseline > 100) {
            cumBaseline = 100;
        }
        responseCont.addRowToTable(new DefaultRow(new RowKey("" + rowKey), cumResponseRate * 100, cumBaseline));
    }
    cont.close();
    responseCont.close();
    m_lift = (BufferedDataTable) cont.getTable();
    m_response = (BufferedDataTable) responseCont.getTable();
    return warning;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) LinkedList(java.util.LinkedList) DataContainer(org.knime.core.data.container.DataContainer) DataColumnSpec(org.knime.core.data.DataColumnSpec) SortedTable(org.knime.base.data.sort.SortedTable) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 13 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class BoxplotCalculator method calculateMultiple.

/**
 * Calculates the necessary statistics for a non-conditional boxplot.
 * @param table the input data
 * @param numCol array of names of numeric columns to plot
 * @param exec Execution context to report progress to
 * @return LinkedHashMap with the column name as key and statistics as value
 * @throws CanceledExecutionException when the user cancels the execution
 */
public LinkedHashMap<String, BoxplotStatistics> calculateMultiple(final BufferedDataTable table, final String[] numCol, final ExecutionContext exec) throws CanceledExecutionException {
    DataTableSpec spec = table.getSpec();
    int[] numColIdxs = new int[numCol.length];
    for (int i = 0; i < numCol.length; i++) {
        numColIdxs[i] = spec.findColumnIndex(numCol[i]);
    }
    LinkedHashMap<String, DataContainer> containers = new LinkedHashMap<String, DataContainer>();
    for (int i = 0; i < numCol.length; i++) {
        containers.put(numCol[i], exec.createDataContainer(new DataTableSpec(new String[] { "col" }, new DataType[] { DoubleCell.TYPE })));
    }
    ExecutionContext subExec = exec.createSilentSubExecutionContext(0.7);
    long[] numMissValPerCol = new long[numCol.length];
    int count = 0;
    for (DataRow row : table) {
        exec.checkCanceled();
        subExec.setProgress((double) count++ / table.size());
        for (int i = 0; i < numCol.length; i++) {
            DataCell cell = row.getCell(numColIdxs[i]);
            if (!cell.isMissing()) {
                containers.get(numCol[i]).addRowToTable(new DefaultRow(row.getKey(), cell));
            } else {
                numMissValPerCol[i]++;
            }
        }
    }
    LinkedHashMap<String, BoxplotStatistics> statsMap = new LinkedHashMap<>();
    ExecutionContext subExec2 = exec.createSilentSubExecutionContext(1.0);
    count = 0;
    List<String> excludedDataColList = new ArrayList<String>();
    for (Entry<String, DataContainer> entry : containers.entrySet()) {
        exec.checkCanceled();
        subExec2.setProgress((double) count++ / containers.size());
        Set<Outlier> extremeOutliers = new HashSet<Outlier>();
        Set<Outlier> mildOutliers = new HashSet<Outlier>();
        entry.getValue().close();
        BufferedDataTable catTable = (BufferedDataTable) entry.getValue().getTable();
        if (catTable.size() == 0) {
            excludedDataColList.add(entry.getKey());
            continue;
        }
        SortedTable st = new SortedTable(catTable, new Comparator<DataRow>() {

            @Override
            public int compare(final DataRow o1, final DataRow o2) {
                DataCell c1 = o1.getCell(0);
                DataCell c2 = o2.getCell(0);
                double d1 = ((DoubleValue) c1).getDoubleValue();
                double d2 = ((DoubleValue) c2).getDoubleValue();
                if (d1 == d2) {
                    return 0;
                } else {
                    return d1 < d2 ? -1 : 1;
                }
            }
        }, false, exec);
        double min = 0, max = 0, q1 = 0, q3 = 0, median = 0;
        boolean dq1 = catTable.size() % 4 == 0;
        long q1Idx = catTable.size() / 4;
        boolean dq3 = 3 * catTable.size() % 4 == 0;
        long q3Idx = 3 * catTable.size() / 4;
        boolean dMedian = catTable.size() % 2 == 0;
        long medianIdx = catTable.size() / 2;
        int counter = 0;
        for (DataRow row : st) {
            double val = ((DoubleValue) row.getCell(0)).getDoubleValue();
            if (counter == 0) {
                min = val;
            }
            if (counter == catTable.size() - 1) {
                max = val;
            }
            if (counter == q1Idx - 1 && dq1) {
                q1 = val;
            }
            if (counter == q1Idx || (counter == 0 && st.size() <= 3)) {
                if (dq1) {
                    q1 = (q1 + val) / 2.0;
                } else {
                    q1 = val;
                }
            }
            if (counter == medianIdx - 1 && dMedian) {
                median = val;
            }
            if (counter == medianIdx) {
                if (dMedian) {
                    median = (median + val) / 2;
                } else {
                    median = val;
                }
            }
            if (counter == q3Idx - 1 && dq3) {
                q3 = val;
            }
            if (counter == q3Idx || (counter == st.size() - 1 && st.size() <= 3)) {
                if (dq3) {
                    q3 = (q3 + val) / 2.0;
                } else {
                    q3 = val;
                }
            }
            counter++;
        }
        double iqr = q3 - q1;
        double lowerWhisker = min;
        double upperWhisker = max;
        double upperWhiskerFence = q3 + (1.5 * iqr);
        double lowerWhiskerFence = q1 - (1.5 * iqr);
        double lowerFence = q1 - (3 * iqr);
        double upperFence = q3 + (3 * iqr);
        for (DataRow row : st) {
            double value = ((DoubleValue) row.getCell(0)).getDoubleValue();
            String rowKey = row.getKey().getString();
            if (value < lowerFence) {
                extremeOutliers.add(new Outlier(value, rowKey));
            } else if (value < lowerWhiskerFence) {
                mildOutliers.add(new Outlier(value, rowKey));
            } else if (lowerWhisker < lowerWhiskerFence && value >= lowerWhiskerFence) {
                lowerWhisker = value;
            } else if (value <= upperWhiskerFence) {
                upperWhisker = value;
            } else if (value > upperFence) {
                extremeOutliers.add(new Outlier(value, rowKey));
            } else if (value > upperWhiskerFence) {
                mildOutliers.add(new Outlier(value, rowKey));
            }
        }
        statsMap.put(entry.getKey(), new BoxplotStatistics(mildOutliers, extremeOutliers, min, max, lowerWhisker, q1, median, q3, upperWhisker));
    }
    // missing values part
    m_excludedDataCols = excludedDataColList.toArray(new String[excludedDataColList.size()]);
    m_numMissValPerCol = new LinkedHashMap<String, Long>();
    for (int i = 0; i < numCol.length; i++) {
        if (numMissValPerCol[i] > 0 && !excludedDataColList.contains(numCol[i])) {
            m_numMissValPerCol.put(numCol[i], numMissValPerCol[i]);
        }
    }
    return statsMap;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) HashSet(java.util.HashSet) ExecutionContext(org.knime.core.node.ExecutionContext) DoubleValue(org.knime.core.data.DoubleValue) SortedTable(org.knime.base.data.sort.SortedTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 14 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class MissingValueHandlerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getDataTableSpec();
    MissingCellReplacingDataTable mvTable = new MissingCellReplacingDataTable(inSpec, m_settings);
    // Calculate the statistics
    exec.setMessage("Calculating statistics");
    mvTable.init(inTable, exec.createSubExecutionContext(0.5));
    long rowCounter = 0;
    final long numOfRows = inTable.size();
    DataContainer container = exec.createDataContainer(mvTable.getDataTableSpec());
    ExecutionContext tableSubExec = exec.createSubExecutionContext(0.4);
    exec.setMessage("Replacing missing values");
    for (DataRow row : mvTable) {
        tableSubExec.checkCanceled();
        if (row != null) {
            tableSubExec.setProgress(++rowCounter / (double) numOfRows, "Processed row " + rowCounter + "/" + numOfRows + " (\"" + row.getKey() + "\")");
            container.addRowToTable(row);
        } else {
            tableSubExec.setProgress(++rowCounter / (double) numOfRows, "Processed row " + rowCounter + "/" + numOfRows);
        }
    }
    container.close();
    // Collect warning messages
    String warnings = mvTable.finish();
    // Handle the warnings
    if (warnings.length() > 0) {
        setWarningMessage(warnings);
    }
    exec.setMessage("Generating PMML");
    // Init PMML output port
    PMMLPortObject pmmlPort = new PMMLPortObject(new PMMLPortObjectSpecCreator(inSpec).createSpec());
    pmmlPort.addModelTranslater(mvTable.getPMMLTranslator());
    return new PortObject[] { (BufferedDataTable) container.getTable(), pmmlPort };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataContainer(org.knime.core.data.container.DataContainer) MissingCellReplacingDataTable(org.knime.base.node.preproc.pmml.missingval.MissingCellReplacingDataTable) ExecutionContext(org.knime.core.node.ExecutionContext) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataRow(org.knime.core.data.DataRow) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Example 15 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class MappingTableInterpolationStatistic method init.

/**
 * {@inheritDoc}
 */
@Override
protected void init(final DataTableSpec spec, final int amountOfColumns) {
    m_index = spec.findColumnIndex(m_columnName);
    m_nextCells = new DataContainer(new DataTableSpec(new DataColumnSpecCreator("value", spec.getColumnSpec(m_index).getType()).createSpec()));
    m_previous = DataType.getMissingCell();
}
Also used : DataContainer(org.knime.core.data.container.DataContainer) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator)

Aggregations

DataContainer (org.knime.core.data.container.DataContainer)35 DataTableSpec (org.knime.core.data.DataTableSpec)25 DefaultRow (org.knime.core.data.def.DefaultRow)21 DataRow (org.knime.core.data.DataRow)19 DataCell (org.knime.core.data.DataCell)17 BufferedDataTable (org.knime.core.node.BufferedDataTable)15 RowKey (org.knime.core.data.RowKey)10 ArrayList (java.util.ArrayList)9 DoubleCell (org.knime.core.data.def.DoubleCell)9 IntCell (org.knime.core.data.def.IntCell)8 LinkedHashMap (java.util.LinkedHashMap)7 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)7 HashSet (java.util.HashSet)6 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 RowIterator (org.knime.core.data.RowIterator)6 StringCell (org.knime.core.data.def.StringCell)6 Map (java.util.Map)5 Set (java.util.Set)5 SortedTable (org.knime.base.data.sort.SortedTable)5 DataTable (org.knime.core.data.DataTable)5