Search in sources :

Example 96 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class ConditionalBoxPlotNodeModel method createOutputTable.

private BufferedDataContainer createOutputTable(final DataTableSpec tableSpec, final DataColumnSpec[] colSpecs, final ExecutionContext exec) {
    BufferedDataContainer cont = exec.createDataContainer(createOutputSpec(tableSpec));
    RowKey[] rowKeys = new RowKey[BoxPlotNodeModel.SIZE];
    rowKeys[BoxPlotNodeModel.MIN] = new RowKey("Minimum");
    rowKeys[BoxPlotNodeModel.LOWER_WHISKER] = new RowKey("Lower Whisker");
    rowKeys[BoxPlotNodeModel.LOWER_QUARTILE] = new RowKey("Lower Quartile");
    rowKeys[BoxPlotNodeModel.MEDIAN] = new RowKey("Median");
    rowKeys[BoxPlotNodeModel.UPPER_QUARTILE] = new RowKey("Upper Quartile");
    rowKeys[BoxPlotNodeModel.UPPER_WHISKER] = new RowKey("Upper Whisker");
    rowKeys[BoxPlotNodeModel.MAX] = new RowKey("Maximum");
    for (int row = 0; row < rowKeys.length; row++) {
        DataCell[] cells = new DataCell[cont.getTableSpec().getNumColumns()];
        for (int i = 0; i < cells.length; i++) {
            double[] stats = m_statistics.get(colSpecs[i]);
            if (stats == null) {
                cells[i] = DataType.getMissingCell();
            } else {
                cells[i] = new DoubleCell(stats[row]);
            }
        }
        cont.addRowToTable(new DefaultRow(rowKeys[row], cells));
    }
    cont.close();
    return cont;
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 97 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class EnrichmentPlotterModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final double rowCount = inData[0].size();
    final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
    final BufferedDataContainer discrateOutCont = exec.createDataContainer(getDiscrateOutSpec());
    final double[] fractionSizes = m_settings.getFractionSizes();
    for (int i = 0; i < m_settings.getCurveCount(); i++) {
        final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
        exec.setMessage("Generating curve " + (i + 1));
        final Curve c = m_settings.getCurve(i);
        final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
        final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
        final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
        int k = 0, maxK = 0;
        for (DataRow row : inData[0]) {
            DataCell c1 = row.getCell(sortIndex);
            DataCell c2 = row.getCell(actIndex);
            if (k++ % 100 == 0) {
                sexec.checkCanceled();
                sexec.setProgress(k / rowCount);
            }
            if (c1.isMissing()) {
                continue;
            } else {
                curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
            }
            maxK++;
        }
        Arrays.sort(curve, 0, maxK);
        if (c.isSortDescending()) {
            for (int j = 0; j < maxK / 2; j++) {
                Helper h = curve[j];
                curve[j] = curve[maxK - j - 1];
                curve[maxK - j - 1] = h;
            }
        }
        // this is for down-sampling so that the view is faster;
        // plotting >100,000 points takes quite a long time
        final int size = Math.min(MAX_RESOLUTION, maxK);
        final double downSampleRate = maxK / (double) size;
        final double[] xValues = new double[size + 1];
        final double[] yValues = new double[size + 1];
        xValues[0] = 0;
        yValues[0] = 0;
        int lastK = 0;
        double y = 0, area = 0;
        int nextHitRatePoint = 0;
        final double[] hitRateValues = new double[fractionSizes.length];
        final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
        // set hit rate values for fractions that are smaller than 1 row to 0
        while ((maxK * fractionSizes[nextHitRatePoint] / 100) < 1) {
            hitRateValues[nextHitRatePoint++] = 0;
        }
        for (k = 1; k <= maxK; k++) {
            final Helper h = curve[k - 1];
            if (m_settings.plotMode() == PlotMode.PlotSum) {
                y += ((DoubleValue) h.b).getDoubleValue();
            } else if (m_settings.plotMode() == PlotMode.PlotHits) {
                if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
                    y++;
                }
            } else if (!h.b.isMissing()) {
                MutableInteger count = clusters.get(h.b);
                if (count == null) {
                    count = new MutableInteger(0);
                    clusters.put(h.b, count);
                }
                if (count.inc() == m_settings.minClusterMembers()) {
                    y++;
                }
            }
            area += y / maxK;
            if ((int) (k / downSampleRate) >= lastK + 1) {
                lastK++;
                xValues[lastK] = k;
                yValues[lastK] = y;
            }
            // thats why this needs to be a while
            while ((nextHitRatePoint < fractionSizes.length) && (k == (int) Math.floor(maxK * fractionSizes[nextHitRatePoint] / 100))) {
                hitRateValues[nextHitRatePoint] = y;
                nextHitRatePoint++;
            }
        }
        xValues[xValues.length - 1] = maxK;
        yValues[yValues.length - 1] = y;
        area /= y;
        m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
        areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
        for (int j = 0; j < hitRateValues.length; j++) {
            hitRateValues[j] /= y;
        }
        double[] enrichmentFactors = new double[hitRateValues.length];
        for (int j = 0; j < enrichmentFactors.length; j++) {
            enrichmentFactors[j] = calculateEnrichmentFactor(hitRateValues[j], fractionSizes[j]);
        }
        discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), ArrayUtils.addAll(hitRateValues, enrichmentFactors)));
    }
    areaOutCont.close();
    discrateOutCont.close();
    return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) HashMap(java.util.HashMap) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) MutableInteger(org.knime.core.util.MutableInteger) Curve(org.knime.base.node.viz.enrichment2.EnrichmentPlotterSettings.Curve) DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 98 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class UngroupOperation2 method compute.

/**
 * Performs the ungroup operation on the given data table.
 *
 * @param exec the execution context
 * @param table table to perform the ungroup operation on
 * @param trans the hilite translater, will be modified directly. Must be non-null if hiliting is enabled, can be
 *            <code>null</code> otherwise
 * @return the table with the ungrouped collections
 * @throws CanceledExecutionException if the execution has been canceled
 * @throws InterruptedException if the execution has been interrupted
 * @throws InvalidSettingsException thrown if the table doesn't contain a collection column at one of the column
 *             indices to be ungrouped
 * @throws IllegalArgumentException if hiliting is enabled and no hilite translater is given
 */
public BufferedDataTable compute(final ExecutionContext exec, final BufferedDataTable table, final HiLiteTranslator trans) throws CanceledExecutionException, InterruptedException, InvalidSettingsException {
    final BufferedDataContainer dc = exec.createDataContainer(createTableSpec(table.getDataTableSpec(), m_removeCollectionCol, m_colIndices));
    if (table.size() == 0) {
        dc.close();
        return dc.getTable();
    }
    DataTableRowInput in = new DataTableRowInput(table);
    BufferedDataTableRowOutput out = new BufferedDataTableRowOutput(dc);
    try {
        compute(in, out, exec, table.size(), trans);
    } finally {
        in.close();
        out.close();
    }
    return out.getDataTable();
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataTableRowInput(org.knime.core.node.streamable.DataTableRowInput) BufferedDataTableRowOutput(org.knime.core.node.streamable.BufferedDataTableRowOutput)

Example 99 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class ROCCalculator method calculateCurveData.

/**
 * Calculates the ROC curve.
 * @param table the table with the data
 * @param exec the execution context to use for reporting progress
 * @throws CanceledExecutionException when the user cancels the execution
 */
public void calculateCurveData(final BufferedDataTable table, final ExecutionContext exec) throws CanceledExecutionException {
    m_warningMessage = null;
    List<ROCCurve> curves = new ArrayList<ROCCurve>();
    int classIndex = table.getDataTableSpec().findColumnIndex(m_classCol);
    int curvesSize = m_curves.size();
    int size = table.getRowCount();
    if (size == 0) {
        m_warningMessage = "Input table contains no rows";
    }
    BufferedDataContainer outCont = exec.createDataContainer(OUT_SPEC);
    for (int i = 0; i < curvesSize; i++) {
        exec.checkCanceled();
        String c = m_curves.get(i);
        ExecutionContext subExec = exec.createSubExecutionContext(1.0 / curvesSize);
        SortedTable sortedTable = new SortedTable(table, Collections.singletonList(c), new boolean[] { false }, subExec);
        subExec.setProgress(1.0);
        int tp = 0, fp = 0;
        // these contain the coordinates for the plot
        double[] xValues = new double[size + 1];
        double[] yValues = new double[size + 1];
        int k = 0;
        final int scoreColIndex = sortedTable.getDataTableSpec().findColumnIndex(c);
        DataCell lastScore = null;
        for (DataRow row : sortedTable) {
            exec.checkCanceled();
            DataCell realClass = row.getCell(classIndex);
            if (realClass.isMissing() || row.getCell(scoreColIndex).isMissing()) {
                if (m_ignoreMissingValues) {
                    continue;
                } else {
                    m_warningMessage = "Table contains missing values.";
                }
            }
            if (realClass.toString().equals(m_posClass)) {
                tp++;
            } else {
                fp++;
            }
            // around ... the following lines circumvent this.
            if (!row.getCell(scoreColIndex).equals(lastScore)) {
                k++;
                lastScore = row.getCell(scoreColIndex);
            }
            xValues[k] = fp;
            yValues[k] = tp;
        }
        xValues = Arrays.copyOf(xValues, k + 1);
        yValues = Arrays.copyOf(yValues, k + 1);
        for (int j = 0; j <= k; j++) {
            xValues[j] /= fp;
            yValues[j] /= tp;
        }
        xValues[xValues.length - 1] = 1;
        yValues[yValues.length - 1] = 1;
        double area = 0;
        for (k = 1; k < xValues.length; k++) {
            if (xValues[k - 1] < xValues[k]) {
                // magical math: the rectangle + the triangle under
                // the segment xValues[k] to xValues[k - 1]
                area += 0.5 * (xValues[k] - xValues[k - 1]) * (yValues[k] + yValues[k - 1]);
            }
        }
        curves.add(new ROCCurve(c, xValues, yValues, area, m_maxPoints));
        outCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
    }
    m_outCurves = curves;
    outCont.close();
    m_outTable = outCont.getTable();
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) ExecutionContext(org.knime.core.node.ExecutionContext) SortedTable(org.knime.base.data.sort.SortedTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 100 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class ValueCounterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final int colIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.columnName());
    final double max = inData[0].getRowCount();
    int rowCount = 0;
    Map<DataCell, Set<RowKey>> hlMap = new HashMap<DataCell, Set<RowKey>>();
    Map<DataCell, MutableInteger> countMap = new HashMap<DataCell, MutableInteger>();
    for (DataRow row : inData[0]) {
        exec.checkCanceled();
        exec.setProgress(rowCount++ / max, countMap.size() + " different values found");
        DataCell cell = row.getCell(colIndex);
        MutableInteger count = countMap.get(cell);
        if (count == null) {
            count = new MutableInteger(0);
            countMap.put(cell, count);
        }
        count.inc();
        if (m_settings.hiliting()) {
            Set<RowKey> s = hlMap.get(cell);
            if (s == null) {
                s = new HashSet<RowKey>();
                hlMap.put(cell, s);
            }
            s.add(row.getKey());
        }
    }
    final DataValueComparator comp = inData[0].getDataTableSpec().getColumnSpec(colIndex).getType().getComparator();
    List<Map.Entry<DataCell, MutableInteger>> sorted = new ArrayList<Map.Entry<DataCell, MutableInteger>>(countMap.entrySet());
    Collections.sort(sorted, new Comparator<Map.Entry<DataCell, MutableInteger>>() {

        public int compare(final Map.Entry<DataCell, MutableInteger> o1, final Entry<DataCell, MutableInteger> o2) {
            return comp.compare(o1.getKey(), o2.getKey());
        }
    });
    BufferedDataContainer cont = exec.createDataContainer(TABLE_SPEC);
    for (Map.Entry<DataCell, MutableInteger> entry : sorted) {
        RowKey newKey = new RowKey(entry.getKey().toString());
        cont.addRowToTable(new DefaultRow(newKey, new int[] { entry.getValue().intValue() }));
    }
    cont.close();
    if (m_settings.hiliting()) {
        Map<RowKey, Set<RowKey>> temp = new HashMap<RowKey, Set<RowKey>>();
        for (Map.Entry<DataCell, Set<RowKey>> entry : hlMap.entrySet()) {
            RowKey newKey = new RowKey(entry.getKey().toString());
            temp.put(newKey, entry.getValue());
        }
        m_translator.setMapper(new DefaultHiLiteMapper(temp));
    }
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) RowKey(org.knime.core.data.RowKey) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) DataValueComparator(org.knime.core.data.DataValueComparator) Entry(java.util.Map.Entry) BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) MutableInteger(org.knime.core.util.MutableInteger) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

BufferedDataContainer (org.knime.core.node.BufferedDataContainer)157 BufferedDataTable (org.knime.core.node.BufferedDataTable)96 DefaultRow (org.knime.core.data.def.DefaultRow)93 DataCell (org.knime.core.data.DataCell)88 DataTableSpec (org.knime.core.data.DataTableSpec)88 DataRow (org.knime.core.data.DataRow)80 RowKey (org.knime.core.data.RowKey)38 DoubleCell (org.knime.core.data.def.DoubleCell)37 StringCell (org.knime.core.data.def.StringCell)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)24 ArrayList (java.util.ArrayList)23 DataColumnSpec (org.knime.core.data.DataColumnSpec)21 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)21 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)17 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 IOException (java.io.IOException)15 ExecutionContext (org.knime.core.node.ExecutionContext)15 LinkedHashMap (java.util.LinkedHashMap)14 HashSet (java.util.HashSet)13 IntCell (org.knime.core.data.def.IntCell)13