Search in sources :

Example 6 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class KnnNodeModel method createRearranger.

/*
     * Creates a column rearranger. NOTE: This call possibly involves heavier calculations since the kd-tree is determined here based on the training data.
     * @param numRowsTable2 - can be -1 if can't be determined (streaming)
     */
private ColumnRearranger createRearranger(final BufferedDataTable trainData, final DataTableSpec inSpec2, final ExecutionContext exec, final long numRowsTable2) throws CanceledExecutionException, InvalidSettingsException {
    int classColIndex = trainData.getDataTableSpec().findColumnIndex(m_settings.classColumn());
    if (classColIndex == -1) {
        throw new InvalidSettingsException("Invalid class column chosen.");
    }
    List<Integer> featureColumns = new ArrayList<Integer>();
    Map<Integer, Integer> firstToSecond = new HashMap<Integer, Integer>();
    checkInputTables(new DataTableSpec[] { trainData.getDataTableSpec(), inSpec2 }, featureColumns, firstToSecond);
    KDTreeBuilder<DataCell> treeBuilder = new KDTreeBuilder<DataCell>(featureColumns.size());
    int count = 0;
    for (DataRow currentRow : trainData) {
        exec.checkCanceled();
        exec.setProgress(0.1 * count * trainData.size(), "Reading row " + currentRow.getKey());
        double[] features = createFeatureVector(currentRow, featureColumns);
        if (features == null) {
            setWarningMessage("Input table contains missing values, the " + "affected rows are ignored.");
        } else {
            DataCell thisClassCell = currentRow.getCell(classColIndex);
            // and finally add data
            treeBuilder.addPattern(features, thisClassCell);
            // compute the majority class for breaking possible ties later
            MutableInteger t = m_classDistribution.get(thisClassCell);
            if (t == null) {
                m_classDistribution.put(thisClassCell, new MutableInteger(1));
            } else {
                t.inc();
            }
        }
    }
    // and now use it to classify the test data...
    DataColumnSpec classColumnSpec = trainData.getDataTableSpec().getColumnSpec(classColIndex);
    exec.setMessage("Building kd-tree");
    KDTree<DataCell> tree = treeBuilder.buildTree(exec.createSubProgress(0.3));
    if (tree.size() < m_settings.k()) {
        setWarningMessage("There are only " + tree.size() + " patterns in the input table, but " + m_settings.k() + " nearest neighbours were requested for classification." + " The prediction will be the majority class for all" + " input patterns.");
    }
    exec.setMessage("Classifying");
    ColumnRearranger c = createRearranger(inSpec2, classColumnSpec, featureColumns, firstToSecond, tree, numRowsTable2);
    return c;
}
Also used : KDTreeBuilder(org.knime.base.util.kdtree.KDTreeBuilder) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) MutableInteger(org.knime.core.util.MutableInteger) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) MutableInteger(org.knime.core.util.MutableInteger) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataCell(org.knime.core.data.DataCell)

Example 7 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class BigGroupByTable method createGroupByTable.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
    LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
    final DataTableSpec origSpec = table.getDataTableSpec();
    // sort the data table in order to process the input table chunk wise
    final BufferedDataTable sortedTable;
    final ExecutionContext groupExec;
    final DataValueComparator[] comparators;
    if (groupColIdx.length < 1) {
        sortedTable = table;
        groupExec = exec;
        comparators = new DataValueComparator[0];
    } else {
        final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
        exec.setMessage("Sorting input table...");
        sortedTable = sortTable(sortExec, table, getGroupCols());
        sortExec.setProgress(1.0);
        groupExec = exec.createSubExecutionContext(0.4);
        comparators = new DataValueComparator[groupColIdx.length];
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
            comparators[i] = colSpec.getType().getComparator();
        }
    }
    final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
    exec.setMessage("Creating groups");
    final DataCell[] previousGroup = new DataCell[groupColIdx.length];
    final DataCell[] currentGroup = new DataCell[groupColIdx.length];
    final MutableInteger groupCounter = new MutableInteger(0);
    boolean firstRow = true;
    final double numOfRows = sortedTable.size();
    long rowCounter = 0;
    // In the rare case that the DataCell comparator return 0 for two
    // data cells that are not equal we have to maintain a map with all
    // rows with equal cells in the group columns per chunk.
    // This variable stores for each chunk these members. A chunk consists
    // of rows which return 0 for the pairwise group value comparison.
    // Usually only equal data cells return 0 when compared with each other
    // but in rare occasions also data cells that are NOT equal return 0 when
    // compared to each other
    // (such as cells that contain chemical structures).
    // In this rare case this map will contain for each group of data cells
    // that are pairwise equal in the chunk a separate entry.
    final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
    boolean logUnusualCells = true;
    String groupLabel = "";
    // cannot put init to the constructor, as the super() constructor directly calls the current function
    initMissingValuesMap();
    for (final DataRow row : sortedTable) {
        // fetch the current group column values
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            currentGroup[i] = row.getCell(groupColIdx[i]);
        }
        if (firstRow) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            firstRow = false;
        }
        // group column data cells
        if (!sameChunk(comparators, previousGroup, currentGroup)) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            createTableRows(dc, chunkMembers, groupCounter);
            // set the current group as previous group
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            if (logUnusualCells && chunkMembers.size() > 1) {
                // cause the problem
                if (LOGGER.isEnabledFor(LEVEL.INFO)) {
                    final StringBuilder buf = new StringBuilder();
                    buf.append("Data chunk with ");
                    buf.append(chunkMembers.size());
                    buf.append(" members occured in groupby node. " + "Involved classes are: ");
                    final GroupKey key = chunkMembers.keySet().iterator().next();
                    for (final DataCell cell : key.getGroupVals()) {
                        buf.append(cell.getClass().getCanonicalName());
                        buf.append(", ");
                    }
                    LOGGER.info(buf.toString());
                }
                logUnusualCells = false;
            }
            // reset the chunk members map
            chunkMembers.clear();
        }
        // process the row as one of the members of the current chunk
        Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
        if (member == null) {
            Set<RowKey> rowKeys;
            if (isEnableHilite()) {
                rowKeys = new HashSet<>();
            } else {
                rowKeys = Collections.emptySet();
            }
            member = new Pair<>(cloneColumnAggregators(), rowKeys);
            final DataCell[] groupKeys = new DataCell[currentGroup.length];
            System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
            chunkMembers.put(new GroupKey(groupKeys), member);
        }
        // compute the current row values
        for (final ColumnAggregator colAggr : member.getFirst()) {
            final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
            colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
        }
        if (isEnableHilite()) {
            member.getSecond().add(row.getKey());
        }
        groupExec.checkCanceled();
        groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
    }
    // create the final row for the last chunk after processing the last
    // table row
    createTableRows(dc, chunkMembers, groupCounter);
    dc.close();
    return dc.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) Pair(org.knime.core.util.Pair) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) MutableInteger(org.knime.core.util.MutableInteger) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) DataCell(org.knime.core.data.DataCell)

Example 8 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class MostFrequentValueStatistic method consumeRow.

/**
 * {@inheritDoc}
 */
@Override
protected void consumeRow(final DataRow dataRow) {
    DataCell cell = dataRow.getCell(m_colIdx);
    if (cell.isMissing()) {
        return;
    }
    MutableInteger i = m_nominalValues.get(cell);
    if (i == null) {
        i = new MutableInteger(1);
        m_nominalValues.put(cell, i);
    } else {
        i.inc();
    }
    if (i.intValue() > m_maxCount) {
        m_maxCount = i.intValue();
        m_mostFrequent = cell;
    }
}
Also used : MutableInteger(org.knime.core.util.MutableInteger) DataCell(org.knime.core.data.DataCell)

Example 9 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class FileRowIterator method uniquifyRowHeader.

/*
     * checks if the newRowHeader is already in the hash set of all created row
     * headers and if so it adds some suffix to make it unique. It will return a
     * unique row header, which could be the same than the one passed in (and
     * adds any rowheader returned to the hash set).
     */
private String uniquifyRowHeader(final String newRowHeader) {
    Number oldSuffix = m_rowIDhash.put(newRowHeader, NOSUFFIX);
    if (oldSuffix == null) {
        // haven't seen the rowID so far.
        return newRowHeader;
    }
    String result = newRowHeader;
    while (oldSuffix != null) {
        // we have seen this rowID before!
        int idx = oldSuffix.intValue();
        assert idx >= NOSUFFIX.intValue();
        idx++;
        if (oldSuffix.equals(NOSUFFIX)) {
            // until now the NOSUFFIX placeholder was in the hash
            assert idx - 1 == NOSUFFIX.intValue();
            m_rowIDhash.put(result, new MutableInteger(idx));
        } else {
            assert oldSuffix instanceof MutableInteger;
            ((MutableInteger) oldSuffix).inc();
            assert idx == oldSuffix.intValue();
            // put back the old (incr.) suffix (overridden with NOSUFFIX).
            m_rowIDhash.put(result, oldSuffix);
        }
        result = result + "_" + idx;
        oldSuffix = m_rowIDhash.put(result, NOSUFFIX);
    }
    return result;
}
Also used : MutableInteger(org.knime.core.util.MutableInteger)

Example 10 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class NominalValue method getNominalValues.

/**
 * @param colIndex
 * @return nominal values of the column
 * @since 3.5
 */
public Map<DataValue, Integer> getNominalValues(final int colIndex) {
    Iterator it = m_nominalValues[colIndex].entrySet().iterator();
    Map<DataValue, Integer> output = new HashMap<DataValue, Integer>(m_nominalValues[colIndex].size());
    while (it.hasNext()) {
        @SuppressWarnings("unchecked") Map.Entry<DataCell, MutableInteger> pair = (Map.Entry<DataCell, MutableInteger>) it.next();
        // if (!pair.getKey().isMissing()) {
        output.put(pair.getKey(), pair.getValue().intValue());
        // } //else {
        // output.put(((MissingCell)pair.getKey()).toString(), pair.getValue().intValue());
        // }
        // System.out.println( + " = " + );
        // avoids a ConcurrentModificationException
        it.remove();
    }
    return output;
}
Also used : MutableInteger(org.knime.core.util.MutableInteger) DataValue(org.knime.core.data.DataValue) HashMap(java.util.HashMap) MutableInteger(org.knime.core.util.MutableInteger) Iterator(java.util.Iterator) DataCell(org.knime.core.data.DataCell) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

MutableInteger (org.knime.core.util.MutableInteger)32 DataCell (org.knime.core.data.DataCell)12 HashMap (java.util.HashMap)11 DataRow (org.knime.core.data.DataRow)8 RowKey (org.knime.core.data.RowKey)7 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)6 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)6 HashSet (java.util.HashSet)5 DataTableSpec (org.knime.core.data.DataTableSpec)5 DefaultRow (org.knime.core.data.def.DefaultRow)5 BufferedDataTable (org.knime.core.node.BufferedDataTable)5 Set (java.util.Set)4 DataColumnSpec (org.knime.core.data.DataColumnSpec)4 ArrayList (java.util.ArrayList)3 LinkedHashMap (java.util.LinkedHashMap)3 DoubleCell (org.knime.core.data.def.DoubleCell)3 StringCell (org.knime.core.data.def.StringCell)3 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2