Search in sources :

Example 1 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class OneWayANOVAStatistics method getGroupsTotalStatistics.

/**
 * Get descriptive statistics for all groups.
 * @return the descriptive statistics for all groups
 */
public List<DataCell> getGroupsTotalStatistics() {
    List<DataCell> cells = new ArrayList<DataCell>();
    cells.add(new StringCell(m_column));
    cells.add(new StringCell("Total"));
    SummaryStatistics stats = m_stats;
    cells.add(new IntCell((int) stats.getN()));
    int missingCount = 0;
    for (MutableInteger m : m_missing) {
        missingCount += m.intValue();
    }
    cells.add(new IntCell(missingCount));
    cells.add(new IntCell(m_missingGroup.intValue()));
    cells.add(new DoubleCell(stats.getMean()));
    cells.add(new DoubleCell(stats.getStandardDeviation()));
    cells.add(new DoubleCell(StatsUtil.getStandardError(stats)));
    cells.add(new DoubleCell(m_confidenceIntervalProp));
    long df = stats.getN() - 1;
    TDistribution distribution = new TDistribution(df);
    double tValue = FastMath.abs(distribution.inverseCumulativeProbability((1 - m_confidenceIntervalProp) / 2));
    double confidenceDelta = tValue * StatsUtil.getStandardError(stats);
    double confidenceLowerBound = stats.getMean() - confidenceDelta;
    double confidenceUpperBound = stats.getMean() + confidenceDelta;
    cells.add(new DoubleCell(confidenceLowerBound));
    cells.add(new DoubleCell(confidenceUpperBound));
    cells.add(new DoubleCell(stats.getMin()));
    cells.add(new DoubleCell(stats.getMax()));
    return cells;
}
Also used : StringCell(org.knime.core.data.def.StringCell) DoubleCell(org.knime.core.data.def.DoubleCell) MutableInteger(org.knime.core.util.MutableInteger) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) TDistribution(org.apache.commons.math3.distribution.TDistribution) IntCell(org.knime.core.data.def.IntCell)

Example 2 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class EnrichmentPlotterModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final double rowCount = inData[0].size();
    final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
    final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
    for (int i = 0; i < m_settings.getCurveCount(); i++) {
        final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
        exec.setMessage("Generating curve " + (i + 1));
        final Curve c = m_settings.getCurve(i);
        final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
        final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
        final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
        int k = 0, maxK = 0;
        for (DataRow row : inData[0]) {
            DataCell c1 = row.getCell(sortIndex);
            DataCell c2 = row.getCell(actIndex);
            if (k++ % 100 == 0) {
                sexec.checkCanceled();
                sexec.setProgress(k / rowCount);
            }
            if (c1.isMissing()) {
                continue;
            } else {
                curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
            }
            maxK++;
        }
        Arrays.sort(curve, 0, maxK);
        if (c.isSortDescending()) {
            for (int j = 0; j < maxK / 2; j++) {
                Helper h = curve[j];
                curve[j] = curve[maxK - j - 1];
                curve[maxK - j - 1] = h;
            }
        }
        // this is for down-sampling so that the view is faster;
        // plotting >100,000 points takes quite a long time
        final int size = Math.min(MAX_RESOLUTION, maxK);
        final double downSampleRate = maxK / (double) size;
        final double[] xValues = new double[size + 1];
        final double[] yValues = new double[size + 1];
        xValues[0] = 0;
        yValues[0] = 0;
        int lastK = 0;
        double y = 0, area = 0;
        int nextHitRatePoint = 0;
        final double[] hitRateValues = new double[DISCRATE_POINTS.length];
        final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
        for (k = 1; k <= maxK; k++) {
            final Helper h = curve[k - 1];
            if (m_settings.plotMode() == PlotMode.PlotSum) {
                y += ((DoubleValue) h.b).getDoubleValue();
            } else if (m_settings.plotMode() == PlotMode.PlotHits) {
                if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
                    y++;
                }
            } else if (!h.b.isMissing()) {
                MutableInteger count = clusters.get(h.b);
                if (count == null) {
                    count = new MutableInteger(0);
                    clusters.put(h.b, count);
                }
                if (count.inc() == m_settings.minClusterMembers()) {
                    y++;
                }
            }
            area += y / maxK;
            if ((int) (k / downSampleRate) >= lastK + 1) {
                lastK++;
                xValues[lastK] = k;
                yValues[lastK] = y;
            }
            if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
                hitRateValues[nextHitRatePoint] = y;
                nextHitRatePoint++;
            }
        }
        xValues[xValues.length - 1] = maxK;
        yValues[yValues.length - 1] = y;
        area /= y;
        m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
        areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
        for (int j = 0; j < hitRateValues.length; j++) {
            hitRateValues[j] /= y;
        }
        discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
    }
    areaOutCont.close();
    discrateOutCont.close();
    return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) HashMap(java.util.HashMap) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) MutableInteger(org.knime.core.util.MutableInteger) Curve(org.knime.base.node.viz.enrichment.EnrichmentPlotterSettings.Curve) DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 3 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class ClassAttributeModel method toString.

/**
 * {@inheritDoc}
 */
@Override
public String toString() {
    final StringBuilder buf = new StringBuilder();
    buf.append("Attribute name: ");
    buf.append(getAttributeName());
    buf.append("\t");
    buf.append("No of records: ");
    buf.append(m_totalNoOfRecs);
    buf.append("\n");
    for (final String classVal : m_recsCounterByClassVal.keySet()) {
        final MutableInteger integer = m_recsCounterByClassVal.get(classVal);
        buf.append(classVal);
        buf.append("|");
        buf.append(integer.intValue());
        buf.append("\n");
    }
    return buf.toString();
}
Also used : MutableInteger(org.knime.core.util.MutableInteger)

Example 4 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class KnnNodeModel method createRearranger.

/*
     * Creates a column rearranger. NOTE: This call possibly involves heavier calculations since the kd-tree is determined here based on the training data.
     * @param numRowsTable2 - can be -1 if can't be determined (streaming)
     */
private ColumnRearranger createRearranger(final BufferedDataTable trainData, final DataTableSpec inSpec2, final ExecutionContext exec, final long numRowsTable2) throws CanceledExecutionException, InvalidSettingsException {
    int classColIndex = trainData.getDataTableSpec().findColumnIndex(m_settings.classColumn());
    if (classColIndex == -1) {
        throw new InvalidSettingsException("Invalid class column chosen.");
    }
    List<Integer> featureColumns = new ArrayList<Integer>();
    Map<Integer, Integer> firstToSecond = new HashMap<Integer, Integer>();
    checkInputTables(new DataTableSpec[] { trainData.getDataTableSpec(), inSpec2 }, featureColumns, firstToSecond);
    KDTreeBuilder<DataCell> treeBuilder = new KDTreeBuilder<DataCell>(featureColumns.size());
    int count = 0;
    for (DataRow currentRow : trainData) {
        exec.checkCanceled();
        exec.setProgress(0.1 * count * trainData.size(), "Reading row " + currentRow.getKey());
        double[] features = createFeatureVector(currentRow, featureColumns);
        if (features == null) {
            setWarningMessage("Input table contains missing values, the " + "affected rows are ignored.");
        } else {
            DataCell thisClassCell = currentRow.getCell(classColIndex);
            // and finally add data
            treeBuilder.addPattern(features, thisClassCell);
            // compute the majority class for breaking possible ties later
            MutableInteger t = m_classDistribution.get(thisClassCell);
            if (t == null) {
                m_classDistribution.put(thisClassCell, new MutableInteger(1));
            } else {
                t.inc();
            }
        }
    }
    // and now use it to classify the test data...
    DataColumnSpec classColumnSpec = trainData.getDataTableSpec().getColumnSpec(classColIndex);
    exec.setMessage("Building kd-tree");
    KDTree<DataCell> tree = treeBuilder.buildTree(exec.createSubProgress(0.3));
    if (tree.size() < m_settings.k()) {
        setWarningMessage("There are only " + tree.size() + " patterns in the input table, but " + m_settings.k() + " nearest neighbours were requested for classification." + " The prediction will be the majority class for all" + " input patterns.");
    }
    exec.setMessage("Classifying");
    ColumnRearranger c = createRearranger(inSpec2, classColumnSpec, featureColumns, firstToSecond, tree, numRowsTable2);
    return c;
}
Also used : KDTreeBuilder(org.knime.base.util.kdtree.KDTreeBuilder) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) MutableInteger(org.knime.core.util.MutableInteger) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) MutableInteger(org.knime.core.util.MutableInteger) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataCell(org.knime.core.data.DataCell)

Example 5 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class BigGroupByTable method createGroupByTable.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
    LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
    final DataTableSpec origSpec = table.getDataTableSpec();
    // sort the data table in order to process the input table chunk wise
    final BufferedDataTable sortedTable;
    final ExecutionContext groupExec;
    final DataValueComparator[] comparators;
    if (groupColIdx.length < 1) {
        sortedTable = table;
        groupExec = exec;
        comparators = new DataValueComparator[0];
    } else {
        final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
        exec.setMessage("Sorting input table...");
        sortedTable = sortTable(sortExec, table, getGroupCols());
        sortExec.setProgress(1.0);
        groupExec = exec.createSubExecutionContext(0.4);
        comparators = new DataValueComparator[groupColIdx.length];
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
            comparators[i] = colSpec.getType().getComparator();
        }
    }
    final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
    exec.setMessage("Creating groups");
    final DataCell[] previousGroup = new DataCell[groupColIdx.length];
    final DataCell[] currentGroup = new DataCell[groupColIdx.length];
    final MutableInteger groupCounter = new MutableInteger(0);
    boolean firstRow = true;
    final double numOfRows = sortedTable.size();
    long rowCounter = 0;
    // In the rare case that the DataCell comparator return 0 for two
    // data cells that are not equal we have to maintain a map with all
    // rows with equal cells in the group columns per chunk.
    // This variable stores for each chunk these members. A chunk consists
    // of rows which return 0 for the pairwise group value comparison.
    // Usually only equal data cells return 0 when compared with each other
    // but in rare occasions also data cells that are NOT equal return 0 when
    // compared to each other
    // (such as cells that contain chemical structures).
    // In this rare case this map will contain for each group of data cells
    // that are pairwise equal in the chunk a separate entry.
    final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
    boolean logUnusualCells = true;
    String groupLabel = "";
    // cannot put init to the constructor, as the super() constructor directly calls the current function
    initMissingValuesMap();
    for (final DataRow row : sortedTable) {
        // fetch the current group column values
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            currentGroup[i] = row.getCell(groupColIdx[i]);
        }
        if (firstRow) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            firstRow = false;
        }
        // group column data cells
        if (!sameChunk(comparators, previousGroup, currentGroup)) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            createTableRows(dc, chunkMembers, groupCounter);
            // set the current group as previous group
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            if (logUnusualCells && chunkMembers.size() > 1) {
                // cause the problem
                if (LOGGER.isEnabledFor(LEVEL.INFO)) {
                    final StringBuilder buf = new StringBuilder();
                    buf.append("Data chunk with ");
                    buf.append(chunkMembers.size());
                    buf.append(" members occured in groupby node. " + "Involved classes are: ");
                    final GroupKey key = chunkMembers.keySet().iterator().next();
                    for (final DataCell cell : key.getGroupVals()) {
                        buf.append(cell.getClass().getCanonicalName());
                        buf.append(", ");
                    }
                    LOGGER.info(buf.toString());
                }
                logUnusualCells = false;
            }
            // reset the chunk members map
            chunkMembers.clear();
        }
        // process the row as one of the members of the current chunk
        Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
        if (member == null) {
            Set<RowKey> rowKeys;
            if (isEnableHilite()) {
                rowKeys = new HashSet<>();
            } else {
                rowKeys = Collections.emptySet();
            }
            member = new Pair<>(cloneColumnAggregators(), rowKeys);
            final DataCell[] groupKeys = new DataCell[currentGroup.length];
            System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
            chunkMembers.put(new GroupKey(groupKeys), member);
        }
        // compute the current row values
        for (final ColumnAggregator colAggr : member.getFirst()) {
            final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
            colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
        }
        if (isEnableHilite()) {
            member.getSecond().add(row.getKey());
        }
        groupExec.checkCanceled();
        groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
    }
    // create the final row for the last chunk after processing the last
    // table row
    createTableRows(dc, chunkMembers, groupCounter);
    dc.close();
    return dc.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) Pair(org.knime.core.util.Pair) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) MutableInteger(org.knime.core.util.MutableInteger) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) DataCell(org.knime.core.data.DataCell)

Aggregations

MutableInteger (org.knime.core.util.MutableInteger)33 DataCell (org.knime.core.data.DataCell)12 HashMap (java.util.HashMap)11 DataRow (org.knime.core.data.DataRow)8 RowKey (org.knime.core.data.RowKey)7 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)6 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)6 HashSet (java.util.HashSet)5 DataTableSpec (org.knime.core.data.DataTableSpec)5 DefaultRow (org.knime.core.data.def.DefaultRow)5 BufferedDataTable (org.knime.core.node.BufferedDataTable)5 ArrayList (java.util.ArrayList)4 Set (java.util.Set)4 DataColumnSpec (org.knime.core.data.DataColumnSpec)4 LinkedHashMap (java.util.LinkedHashMap)3 StringCell (org.knime.core.data.def.StringCell)3 File (java.io.File)2 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2