Search in sources :

Example 6 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class KnnNodeModel method createRearranger.

     * Creates a column rearranger. NOTE: This call possibly involves heavier calculations since the kd-tree is determined here based on the training data.
     * @param numRowsTable2 - can be -1 if can't be determined (streaming)
private ColumnRearranger createRearranger(final BufferedDataTable trainData, final DataTableSpec inSpec2, final ExecutionContext exec, final long numRowsTable2) throws CanceledExecutionException, InvalidSettingsException {
    int classColIndex = trainData.getDataTableSpec().findColumnIndex(m_settings.classColumn());
    if (classColIndex == -1) {
        throw new InvalidSettingsException("Invalid class column chosen.");
    List<Integer> featureColumns = new ArrayList<Integer>();
    Map<Integer, Integer> firstToSecond = new HashMap<Integer, Integer>();
    checkInputTables(new DataTableSpec[] { trainData.getDataTableSpec(), inSpec2 }, featureColumns, firstToSecond);
    KDTreeBuilder<DataCell> treeBuilder = new KDTreeBuilder<DataCell>(featureColumns.size());
    int count = 0;
    for (DataRow currentRow : trainData) {
        exec.setProgress(0.1 * count * trainData.size(), "Reading row " + currentRow.getKey());
        double[] features = createFeatureVector(currentRow, featureColumns);
        if (features == null) {
            setWarningMessage("Input table contains missing values, the " + "affected rows are ignored.");
        } else {
            DataCell thisClassCell = currentRow.getCell(classColIndex);
            // and finally add data
            treeBuilder.addPattern(features, thisClassCell);
            // compute the majority class for breaking possible ties later
            MutableInteger t = m_classDistribution.get(thisClassCell);
            if (t == null) {
                m_classDistribution.put(thisClassCell, new MutableInteger(1));
            } else {
    // and now use it to classify the test data...
    DataColumnSpec classColumnSpec = trainData.getDataTableSpec().getColumnSpec(classColIndex);
    exec.setMessage("Building kd-tree");
    KDTree<DataCell> tree = treeBuilder.buildTree(exec.createSubProgress(0.3));
    if (tree.size() < m_settings.k()) {
        setWarningMessage("There are only " + tree.size() + " patterns in the input table, but " + m_settings.k() + " nearest neighbours were requested for classification." + " The prediction will be the majority class for all" + " input patterns.");
    ColumnRearranger c = createRearranger(inSpec2, classColumnSpec, featureColumns, firstToSecond, tree, numRowsTable2);
    return c;
Also used : KDTreeBuilder(org.knime.base.util.kdtree.KDTreeBuilder) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) MutableInteger(org.knime.core.util.MutableInteger) ArrayList(java.util.ArrayList) DataRow( MutableInteger(org.knime.core.util.MutableInteger) DataColumnSpec( ColumnRearranger( InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataCell(

Example 7 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class BigGroupByTable method createGroupByTable.

 * {@inheritDoc}
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
    LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
    final DataTableSpec origSpec = table.getDataTableSpec();
    // sort the data table in order to process the input table chunk wise
    final BufferedDataTable sortedTable;
    final ExecutionContext groupExec;
    final DataValueComparator[] comparators;
    if (groupColIdx.length < 1) {
        sortedTable = table;
        groupExec = exec;
        comparators = new DataValueComparator[0];
    } else {
        final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
        exec.setMessage("Sorting input table...");
        sortedTable = sortTable(sortExec, table, getGroupCols());
        groupExec = exec.createSubExecutionContext(0.4);
        comparators = new DataValueComparator[groupColIdx.length];
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
            comparators[i] = colSpec.getType().getComparator();
    final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
    exec.setMessage("Creating groups");
    final DataCell[] previousGroup = new DataCell[groupColIdx.length];
    final DataCell[] currentGroup = new DataCell[groupColIdx.length];
    final MutableInteger groupCounter = new MutableInteger(0);
    boolean firstRow = true;
    final double numOfRows = sortedTable.size();
    long rowCounter = 0;
    // In the rare case that the DataCell comparator return 0 for two
    // data cells that are not equal we have to maintain a map with all
    // rows with equal cells in the group columns per chunk.
    // This variable stores for each chunk these members. A chunk consists
    // of rows which return 0 for the pairwise group value comparison.
    // Usually only equal data cells return 0 when compared with each other
    // but in rare occasions also data cells that are NOT equal return 0 when
    // compared to each other
    // (such as cells that contain chemical structures).
    // In this rare case this map will contain for each group of data cells
    // that are pairwise equal in the chunk a separate entry.
    final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
    boolean logUnusualCells = true;
    String groupLabel = "";
    // cannot put init to the constructor, as the super() constructor directly calls the current function
    for (final DataRow row : sortedTable) {
        // fetch the current group column values
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            currentGroup[i] = row.getCell(groupColIdx[i]);
        if (firstRow) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            firstRow = false;
        // group column data cells
        if (!sameChunk(comparators, previousGroup, currentGroup)) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            createTableRows(dc, chunkMembers, groupCounter);
            // set the current group as previous group
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            if (logUnusualCells && chunkMembers.size() > 1) {
                // cause the problem
                if (LOGGER.isEnabledFor(LEVEL.INFO)) {
                    final StringBuilder buf = new StringBuilder();
                    buf.append("Data chunk with ");
                    buf.append(" members occured in groupby node. " + "Involved classes are: ");
                    final GroupKey key = chunkMembers.keySet().iterator().next();
                    for (final DataCell cell : key.getGroupVals()) {
                        buf.append(", ");
                logUnusualCells = false;
            // reset the chunk members map
        // process the row as one of the members of the current chunk
        Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
        if (member == null) {
            Set<RowKey> rowKeys;
            if (isEnableHilite()) {
                rowKeys = new HashSet<>();
            } else {
                rowKeys = Collections.emptySet();
            member = new Pair<>(cloneColumnAggregators(), rowKeys);
            final DataCell[] groupKeys = new DataCell[currentGroup.length];
            System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
            chunkMembers.put(new GroupKey(groupKeys), member);
        // compute the current row values
        for (final ColumnAggregator colAggr : member.getFirst()) {
            final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
            colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
        if (isEnableHilite()) {
        groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
    // create the final row for the last chunk after processing the last
    // table row
    createTableRows(dc, chunkMembers, groupCounter);
    return dc.getTable();
Also used : DataTableSpec( HashSet(java.util.HashSet) Set(java.util.Set) RowKey( DataValueComparator( DataRow( LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec( BufferedDataTable(org.knime.core.node.BufferedDataTable) Pair(org.knime.core.util.Pair) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) MutableInteger(org.knime.core.util.MutableInteger) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator( DataCell(

Example 8 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class MostFrequentValueStatistic method consumeRow.

 * {@inheritDoc}
protected void consumeRow(final DataRow dataRow) {
    DataCell cell = dataRow.getCell(m_colIdx);
    if (cell.isMissing()) {
    MutableInteger i = m_nominalValues.get(cell);
    if (i == null) {
        i = new MutableInteger(1);
        m_nominalValues.put(cell, i);
    } else {;
    if (i.intValue() > m_maxCount) {
        m_maxCount = i.intValue();
        m_mostFrequent = cell;
Also used : MutableInteger(org.knime.core.util.MutableInteger) DataCell(

Example 9 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class FileRowIterator method uniquifyRowHeader.

     * checks if the newRowHeader is already in the hash set of all created row
     * headers and if so it adds some suffix to make it unique. It will return a
     * unique row header, which could be the same than the one passed in (and
     * adds any rowheader returned to the hash set).
private String uniquifyRowHeader(final String newRowHeader) {
    Number oldSuffix = m_rowIDhash.put(newRowHeader, NOSUFFIX);
    if (oldSuffix == null) {
        // haven't seen the rowID so far.
        return newRowHeader;
    String result = newRowHeader;
    while (oldSuffix != null) {
        // we have seen this rowID before!
        int idx = oldSuffix.intValue();
        assert idx >= NOSUFFIX.intValue();
        if (oldSuffix.equals(NOSUFFIX)) {
            // until now the NOSUFFIX placeholder was in the hash
            assert idx - 1 == NOSUFFIX.intValue();
            m_rowIDhash.put(result, new MutableInteger(idx));
        } else {
            assert oldSuffix instanceof MutableInteger;
            ((MutableInteger) oldSuffix).inc();
            assert idx == oldSuffix.intValue();
            // put back the old (incr.) suffix (overridden with NOSUFFIX).
            m_rowIDhash.put(result, oldSuffix);
        result = result + "_" + idx;
        oldSuffix = m_rowIDhash.put(result, NOSUFFIX);
    return result;
Also used : MutableInteger(org.knime.core.util.MutableInteger)

Example 10 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class NominalValue method getNominalValues.

 * @param colIndex
 * @return nominal values of the column
 * @since 3.5
public Map<DataValue, Integer> getNominalValues(final int colIndex) {
    Iterator it = m_nominalValues[colIndex].entrySet().iterator();
    Map<DataValue, Integer> output = new HashMap<DataValue, Integer>(m_nominalValues[colIndex].size());
    while (it.hasNext()) {
        @SuppressWarnings("unchecked") Map.Entry<DataCell, MutableInteger> pair = (Map.Entry<DataCell, MutableInteger>);
        // if (!pair.getKey().isMissing()) {
        output.put(pair.getKey(), pair.getValue().intValue());
        // } //else {
        // output.put(((MissingCell)pair.getKey()).toString(), pair.getValue().intValue());
        // }
        // System.out.println( + " = " + );
        // avoids a ConcurrentModificationException
    return output;
Also used : MutableInteger(org.knime.core.util.MutableInteger) DataValue( HashMap(java.util.HashMap) MutableInteger(org.knime.core.util.MutableInteger) Iterator(java.util.Iterator) DataCell( HashMap(java.util.HashMap) Map(java.util.Map)


MutableInteger (org.knime.core.util.MutableInteger)32 DataCell ( HashMap (java.util.HashMap)11 DataRow ( RowKey ( BufferedDataContainer (org.knime.core.node.BufferedDataContainer)6 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)6 HashSet (java.util.HashSet)5 DataTableSpec ( DefaultRow ( BufferedDataTable (org.knime.core.node.BufferedDataTable)5 Set (java.util.Set)4 DataColumnSpec ( ArrayList (java.util.ArrayList)3 LinkedHashMap (java.util.LinkedHashMap)3 DoubleCell ( StringCell ( LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2