Search in sources :

Example 31 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class NormalizerNodeModel method calculate.

/**
 * New normalized {@link org.knime.core.data.DataTable} is created depending
 * on the mode.
 */
/**
 * @param inData The input data.
 * @param exec For BufferedDataTable creation and progress.
 * @return the result of the calculation
 * @throws Exception If the node calculation fails for any reason.
 */
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getSpec();
    // extract selected numeric columns
    updateNumericColumnSelection(inSpec);
    Normalizer ntable = new Normalizer(inTable, m_columns);
    long rowcount = inTable.size();
    ExecutionMonitor prepareExec = exec.createSubProgress(0.3);
    AffineTransTable outTable;
    boolean fixDomainBounds = false;
    switch(m_mode) {
        case NONORM_MODE:
            return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
        case MINMAX_MODE:
            fixDomainBounds = true;
            outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
            break;
        case ZSCORE_MODE:
            outTable = ntable.doZScoreNorm(prepareExec);
            break;
        case DECIMALSCALING_MODE:
            outTable = ntable.doDecimalScaling(prepareExec);
            break;
        default:
            throw new Exception("No mode set");
    }
    if (outTable.getErrorMessage() != null) {
        // something went wrong, report and throw an exception
        throw new Exception(outTable.getErrorMessage());
    }
    if (ntable.getErrorMessage() != null) {
        // something went wrong during initialization, report.
        setWarningMessage(ntable.getErrorMessage());
    }
    DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
    AffineTransConfiguration configuration = outTable.getConfiguration();
    DataTableSpec spec = outTable.getDataTableSpec();
    // the same transformation, which is not guaranteed to snap to min/max)
    if (fixDomainBounds) {
        DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
        for (int i = 0; i < newColSpecs.length; i++) {
            newColSpecs[i] = spec.getColumnSpec(i);
        }
        for (int i = 0; i < m_columns.length; i++) {
            int index = spec.findColumnIndex(m_columns[i]);
            DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
            DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
            domCreator.setLowerBound(new DoubleCell(m_min));
            domCreator.setUpperBound(new DoubleCell(m_max));
            creator.setDomain(domCreator.createDomain());
            newColSpecs[index] = creator.createSpec();
        }
        spec = new DataTableSpec(spec.getName(), newColSpecs);
    }
    ExecutionMonitor normExec = exec.createSubProgress(.7);
    BufferedDataContainer container = exec.createDataContainer(spec);
    long count = 1;
    for (DataRow row : outTable) {
        normExec.checkCanceled();
        normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
        container.addRowToTable(row);
        count++;
    }
    container.close();
    return new CalculationResult(container.getTable(), modelSpec, configuration);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) Normalizer(org.knime.base.data.normalize.Normalizer) DoubleCell(org.knime.core.data.def.DoubleCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) AffineTransTable(org.knime.base.data.normalize.AffineTransTable) AffineTransConfiguration(org.knime.base.data.normalize.AffineTransConfiguration) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 32 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class EnrichmentPlotterModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final double rowCount = inData[0].size();
    final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
    final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
    for (int i = 0; i < m_settings.getCurveCount(); i++) {
        final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
        exec.setMessage("Generating curve " + (i + 1));
        final Curve c = m_settings.getCurve(i);
        final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
        final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
        final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
        int k = 0, maxK = 0;
        for (DataRow row : inData[0]) {
            DataCell c1 = row.getCell(sortIndex);
            DataCell c2 = row.getCell(actIndex);
            if (k++ % 100 == 0) {
                sexec.checkCanceled();
                sexec.setProgress(k / rowCount);
            }
            if (c1.isMissing()) {
                continue;
            } else {
                curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
            }
            maxK++;
        }
        Arrays.sort(curve, 0, maxK);
        if (c.isSortDescending()) {
            for (int j = 0; j < maxK / 2; j++) {
                Helper h = curve[j];
                curve[j] = curve[maxK - j - 1];
                curve[maxK - j - 1] = h;
            }
        }
        // this is for down-sampling so that the view is faster;
        // plotting >100,000 points takes quite a long time
        final int size = Math.min(MAX_RESOLUTION, maxK);
        final double downSampleRate = maxK / (double) size;
        final double[] xValues = new double[size + 1];
        final double[] yValues = new double[size + 1];
        xValues[0] = 0;
        yValues[0] = 0;
        int lastK = 0;
        double y = 0, area = 0;
        int nextHitRatePoint = 0;
        final double[] hitRateValues = new double[DISCRATE_POINTS.length];
        final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
        for (k = 1; k <= maxK; k++) {
            final Helper h = curve[k - 1];
            if (m_settings.plotMode() == PlotMode.PlotSum) {
                y += ((DoubleValue) h.b).getDoubleValue();
            } else if (m_settings.plotMode() == PlotMode.PlotHits) {
                if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
                    y++;
                }
            } else if (!h.b.isMissing()) {
                MutableInteger count = clusters.get(h.b);
                if (count == null) {
                    count = new MutableInteger(0);
                    clusters.put(h.b, count);
                }
                if (count.inc() == m_settings.minClusterMembers()) {
                    y++;
                }
            }
            area += y / maxK;
            if ((int) (k / downSampleRate) >= lastK + 1) {
                lastK++;
                xValues[lastK] = k;
                yValues[lastK] = y;
            }
            if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
                hitRateValues[nextHitRatePoint] = y;
                nextHitRatePoint++;
            }
        }
        xValues[xValues.length - 1] = maxK;
        yValues[yValues.length - 1] = y;
        area /= y;
        m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
        areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
        for (int j = 0; j < hitRateValues.length; j++) {
            hitRateValues[j] /= y;
        }
        discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
    }
    areaOutCont.close();
    discrateOutCont.close();
    return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) HashMap(java.util.HashMap) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) MutableInteger(org.knime.core.util.MutableInteger) Curve(org.knime.base.node.viz.enrichment.EnrichmentPlotterSettings.Curve) DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 33 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class LogisticRegressionContent method createTablePortObject.

/**
 * Creates a BufferedDataTable with the
 * @param exec The execution context
 * @return a port object
 */
public BufferedDataTable createTablePortObject(final ExecutionContext exec) {
    DataTableSpec tableOutSpec = new DataTableSpec("Coefficients and Statistics", new String[] { "Logit", "Variable", "Coeff.", "Std. Err.", "z-score", "P>|z|" }, new DataType[] { StringCell.TYPE, StringCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE });
    BufferedDataContainer dc = exec.createDataContainer(tableOutSpec);
    List<DataCell> logits = this.getLogits();
    List<String> parameters = this.getParameters();
    int c = 0;
    for (DataCell logit : logits) {
        Map<String, Double> coefficients = this.getCoefficients(logit);
        Map<String, Double> stdErrs = this.getStandardErrors(logit);
        Map<String, Double> zScores = this.getZScores(logit);
        Map<String, Double> pValues = this.getPValues(logit);
        for (String parameter : parameters) {
            List<DataCell> cells = new ArrayList<DataCell>();
            cells.add(new StringCell(logit.toString()));
            cells.add(new StringCell(parameter));
            cells.add(new DoubleCell(coefficients.get(parameter)));
            cells.add(new DoubleCell(stdErrs.get(parameter)));
            cells.add(new DoubleCell(zScores.get(parameter)));
            cells.add(new DoubleCell(pValues.get(parameter)));
            c++;
            dc.addRowToTable(new DefaultRow("Row" + c, cells));
        }
        List<DataCell> cells = new ArrayList<DataCell>();
        cells.add(new StringCell(logit.toString()));
        cells.add(new StringCell("Constant"));
        cells.add(new DoubleCell(this.getIntercept(logit)));
        cells.add(new DoubleCell(this.getInterceptStdErr(logit)));
        cells.add(new DoubleCell(this.getInterceptZScore(logit)));
        cells.add(new DoubleCell(this.getInterceptPValue(logit)));
        c++;
        dc.addRowToTable(new DefaultRow("Row" + c, cells));
    }
    dc.close();
    return dc.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 34 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class LogisticRegressionContent method createTablePortObject.

/**
 * Creates a BufferedDataTable with the
 * @param exec The execution context
 * @return a port object
 */
public BufferedDataTable createTablePortObject(final ExecutionContext exec) {
    DataTableSpec tableOutSpec = new DataTableSpec("Coefficients and Statistics", new String[] { "Logit", "Variable", "Coeff.", "Std. Err.", "z-score", "P>|z|" }, new DataType[] { StringCell.TYPE, StringCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE });
    BufferedDataContainer dc = exec.createDataContainer(tableOutSpec);
    List<DataCell> logits = this.getLogits();
    List<String> parameters = this.getParameters();
    int c = 0;
    for (DataCell logit : logits) {
        Map<String, Double> coefficients = this.getCoefficients(logit);
        Map<String, Double> stdErrs = this.getStandardErrors(logit);
        Map<String, Double> zScores = this.getZScores(logit);
        Map<String, Double> pValues = this.getPValues(logit);
        for (String parameter : parameters) {
            List<DataCell> cells = new ArrayList<DataCell>();
            cells.add(new StringCell(logit.toString()));
            cells.add(new StringCell(parameter));
            cells.add(new DoubleCell(coefficients.get(parameter)));
            cells.add(new DoubleCell(stdErrs.get(parameter)));
            cells.add(new DoubleCell(zScores.get(parameter)));
            cells.add(new DoubleCell(pValues.get(parameter)));
            c++;
            dc.addRowToTable(new DefaultRow("Row" + c, cells));
        }
        List<DataCell> cells = new ArrayList<DataCell>();
        cells.add(new StringCell(logit.toString()));
        cells.add(new StringCell("Constant"));
        cells.add(new DoubleCell(this.getIntercept(logit)));
        cells.add(new DoubleCell(this.getInterceptStdErr(logit)));
        cells.add(new DoubleCell(this.getInterceptZScore(logit)));
        cells.add(new DoubleCell(this.getInterceptPValue(logit)));
        c++;
        dc.addRowToTable(new DefaultRow("Row" + c, cells));
    }
    dc.close();
    return dc.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 35 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class MissingValueHandling3Table method createMissingValueHandlingTable.

/**
 * Does missing value handling to the argument table given the col settings in an array and also reports progress.
 *
 * @param table the table to do missing value handling on
 * @param colSettings the settings
 * @param exec for progress/cancel and to create the buffered data table
 * @param warningBuffer To which potential warning messages are added.
 * @return a cache table, cleaned up
 * @throws CanceledExecutionException if canceled
 * @since 2.10
 */
public static BufferedDataTable createMissingValueHandlingTable(final BufferedDataTable table, final MissingValueHandling2ColSetting[] colSettings, final ExecutionContext exec, final StringBuilder warningBuffer) throws CanceledExecutionException {
    MissingValueHandling2ColSetting[] colSetting;
    try {
        colSetting = getColSetting(table.getDataTableSpec(), colSettings, false, warningBuffer);
    } catch (InvalidSettingsException ise) {
        LOGGER.coding("getColSetting method is not supposed to throw an exception, ignoring settings", ise);
        DataTableSpec s = table.getDataTableSpec();
        colSetting = new MissingValueHandling2ColSetting[s.getNumColumns()];
        for (int i = 0; i < s.getNumColumns(); i++) {
            colSetting[i] = new MissingValueHandling2ColSetting(s.getColumnSpec(i));
            colSetting[i].setMethod(MissingValueHandling2ColSetting.METHOD_NO_HANDLING);
        }
    }
    boolean needStatistics = false;
    final Set<Integer> mostFrequentColumns = new HashSet<Integer>();
    for (int i = 0; i < colSetting.length; i++) {
        MissingValueHandling2ColSetting c = colSetting[i];
        switch(c.getMethod()) {
            case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
                mostFrequentColumns.add(i);
            case MissingValueHandling2ColSetting.METHOD_MAX:
            case MissingValueHandling2ColSetting.METHOD_MIN:
            case MissingValueHandling2ColSetting.METHOD_MEAN:
                needStatistics = true;
                break;
            default:
        }
    }
    MyStatisticsTable myT;
    ExecutionMonitor e;
    if (needStatistics) {
        // for creating statistics table
        ExecutionContext subExec = exec.createSubExecutionContext(0.5);
        myT = new MyStatisticsTable(table, subExec) {

            // do not try to get this Iterable in the constructor, it will not work, as long as
            // Statistics3Table does the statistical computation in the constructor.
            @Override
            protected Iterable<Integer> getMostFrequentColumns() {
                return mostFrequentColumns;
            }
        };
        if (myT.m_warningMessage != null) {
            if (warningBuffer.length() > 0) {
                warningBuffer.append('\n');
            }
            warningBuffer.append(myT.m_warningMessage);
        }
        // for the iterator
        e = exec.createSubProgress(0.5);
    } else {
        myT = null;
        e = exec;
    }
    MissingValueHandling3Table mvht = new MissingValueHandling3Table(table, myT, colSetting);
    BufferedDataContainer container = exec.createDataContainer(mvht.getDataTableSpec());
    e.setMessage("Adding rows...");
    int count = 0;
    try {
        MissingValueHandling3TableIterator it = new MissingValueHandling3TableIterator(mvht, e);
        while (it.hasNext()) {
            DataRow next;
            next = it.next();
            e.setMessage("Adding row " + (count + 1) + " (\"" + next.getKey() + "\")");
            container.addRowToTable(next);
            count++;
        }
    } catch (MissingValueHandling3TableIterator.RuntimeCanceledExecutionException rcee) {
        throw rcee.getCause();
    } finally {
        container.close();
    }
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataRow(org.knime.core.data.DataRow) ExecutionContext(org.knime.core.node.ExecutionContext) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

BufferedDataContainer (org.knime.core.node.BufferedDataContainer)157 BufferedDataTable (org.knime.core.node.BufferedDataTable)96 DefaultRow (org.knime.core.data.def.DefaultRow)93 DataCell (org.knime.core.data.DataCell)88 DataTableSpec (org.knime.core.data.DataTableSpec)88 DataRow (org.knime.core.data.DataRow)80 RowKey (org.knime.core.data.RowKey)38 DoubleCell (org.knime.core.data.def.DoubleCell)37 StringCell (org.knime.core.data.def.StringCell)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)24 ArrayList (java.util.ArrayList)23 DataColumnSpec (org.knime.core.data.DataColumnSpec)21 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)21 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)17 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 IOException (java.io.IOException)15 ExecutionContext (org.knime.core.node.ExecutionContext)15 LinkedHashMap (java.util.LinkedHashMap)14 HashSet (java.util.HashSet)13 IntCell (org.knime.core.data.def.IntCell)13