Search in sources :

Example 21 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class ValueCounterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final int colIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.columnName());
    final double max = inData[0].getRowCount();
    int rowCount = 0;
    Map<DataCell, Set<RowKey>> hlMap = new HashMap<DataCell, Set<RowKey>>();
    Map<DataCell, MutableInteger> countMap = new HashMap<DataCell, MutableInteger>();
    for (DataRow row : inData[0]) {
        exec.checkCanceled();
        exec.setProgress(rowCount++ / max, countMap.size() + " different values found");
        DataCell cell = row.getCell(colIndex);
        MutableInteger count = countMap.get(cell);
        if (count == null) {
            count = new MutableInteger(0);
            countMap.put(cell, count);
        }
        count.inc();
        if (m_settings.hiliting()) {
            Set<RowKey> s = hlMap.get(cell);
            if (s == null) {
                s = new HashSet<RowKey>();
                hlMap.put(cell, s);
            }
            s.add(row.getKey());
        }
    }
    final DataValueComparator comp = inData[0].getDataTableSpec().getColumnSpec(colIndex).getType().getComparator();
    List<Map.Entry<DataCell, MutableInteger>> sorted = new ArrayList<Map.Entry<DataCell, MutableInteger>>(countMap.entrySet());
    Collections.sort(sorted, new Comparator<Map.Entry<DataCell, MutableInteger>>() {

        public int compare(final Map.Entry<DataCell, MutableInteger> o1, final Entry<DataCell, MutableInteger> o2) {
            return comp.compare(o1.getKey(), o2.getKey());
        }
    });
    BufferedDataContainer cont = exec.createDataContainer(TABLE_SPEC);
    for (Map.Entry<DataCell, MutableInteger> entry : sorted) {
        RowKey newKey = new RowKey(entry.getKey().toString());
        cont.addRowToTable(new DefaultRow(newKey, new int[] { entry.getValue().intValue() }));
    }
    cont.close();
    if (m_settings.hiliting()) {
        Map<RowKey, Set<RowKey>> temp = new HashMap<RowKey, Set<RowKey>>();
        for (Map.Entry<DataCell, Set<RowKey>> entry : hlMap.entrySet()) {
            RowKey newKey = new RowKey(entry.getKey().toString());
            temp.put(newKey, entry.getValue());
        }
        m_translator.setMapper(new DefaultHiLiteMapper(temp));
    }
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) RowKey(org.knime.core.data.RowKey) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) DataValueComparator(org.knime.core.data.DataValueComparator) Entry(java.util.Map.Entry) BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) MutableInteger(org.knime.core.util.MutableInteger) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) HashMap(java.util.HashMap) Map(java.util.Map)

Example 22 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class RowKeyUtil2 method changeRowKey.

/**
 * <p>
 * Replaces the row key by the values of the column with the given name and appends a new column with the old key
 * values if the <code>newColName</code> variable is a non empty <code>String</code>.
 * </p>
 * <p>
 * Call the {@link RowKeyUtil2#getDuplicatesCounter()} and {@link RowKeyUtil2#getMissingValueCounter()} methods to
 * get information about the replaced duplicates and missing values after this method is completed.
 * </p>
 * @param inData in data rows a {@link RowInput}
 * @param outData output as {@link RowOutput}
 * @param exec the {@link ExecutionContext} to check for cancel and to provide status messages
 * @param selRowKeyColName the name of the column which should replace the row key or <code>null</code> if a new one
 *            should be created
 * @param appendColumn <code>true</code> if a new column should be created
 * @param newColSpec the {@link DataColumnSpec} of the new column or <code>null</code> if no column should be
 *            created at all
 * @param ensureUniqueness if set to <code>true</code> the method ensures the uniqueness of the row key even if the
 *            values of the selected row aren't unique
 * @param replaceMissingVals if set to <code>true</code> the method replaces missing values with ?
 * @param removeRowKeyCol removes the selected row key column if set to <code>true</code>
 * @param hiliteMap <code>true</code> if a map should be maintained that maps the new row id to the old row id
 * @param totalNoOfOutColumns number of columns in the output table
 * @param totalNoOfRows number of rows in the data table, -1 if not available
 * @throws Exception if the cancel button was pressed or the input data isn't valid.
 * @since 3.1
 */
void changeRowKey(final RowInput inData, final RowOutput outData, final ExecutionContext exec, final String selRowKeyColName, final boolean appendColumn, final DataColumnSpec newColSpec, final boolean ensureUniqueness, final boolean replaceMissingVals, final boolean removeRowKeyCol, final boolean hiliteMap, final int totalNoOfOutColumns, final int totalNoOfRows) throws Exception {
    LOGGER.debug("Entering changeRowKey(inData, exec, selRowKeyColName, " + "newColName) of class RowKeyUtil.");
    final DataTableSpec inSpec = inData.getDataTableSpec();
    final int newRowKeyColIdx;
    if (selRowKeyColName != null) {
        newRowKeyColIdx = inSpec.findColumnIndex(selRowKeyColName);
        if (newRowKeyColIdx < 0) {
            throw new InvalidSettingsException("Column name not found.");
        }
    } else {
        newRowKeyColIdx = -1;
    }
    if (hiliteMap) {
        m_hiliteMapping = new HashMap<RowKey, Set<RowKey>>(totalNoOfRows);
    }
    final Map<String, MutableInteger> vals = new HashMap<String, MutableInteger>(totalNoOfRows > 0 ? totalNoOfRows : 100);
    final double progressPerRow = 1.0 / totalNoOfRows;
    // update the progress monitor every percent
    final int checkPoint = Math.max((totalNoOfRows / 1000), 1);
    int rowCounter = 0;
    exec.setProgress(0.0, "Processing data...");
    m_missingValueCounter = 0;
    m_duplicatesCounter = 0;
    DataRow row;
    while ((row = inData.poll()) != null) {
        rowCounter++;
        final DataCell[] cells = new DataCell[totalNoOfOutColumns];
        int newCellCounter = 0;
        for (int i = 0, length = inSpec.getNumColumns(); i < length; i++) {
            if (removeRowKeyCol && i == newRowKeyColIdx) {
                continue;
            }
            cells[newCellCounter++] = row.getCell(i);
        }
        if (appendColumn) {
            cells[totalNoOfOutColumns - 1] = new StringCell(row.getKey().getString());
        }
        final RowKey newKeyVal;
        if (newRowKeyColIdx >= 0) {
            final DataCell keyCell = row.getCell(newRowKeyColIdx);
            String key = null;
            if (keyCell.isMissing()) {
                if (replaceMissingVals) {
                    key = MISSING_VALUE_REPLACEMENT;
                    m_missingValueCounter++;
                } else {
                    throw new InvalidSettingsException("Missing value found in row " + rowCounter);
                }
            } else {
                key = keyCell.toString();
            }
            if (ensureUniqueness) {
                if (vals.containsKey(key)) {
                    if (!keyCell.isMissing()) {
                        m_duplicatesCounter++;
                    }
                    StringBuilder uniqueKey = new StringBuilder(key);
                    final MutableInteger index = vals.get(uniqueKey.toString());
                    while (vals.containsKey(uniqueKey.toString())) {
                        index.inc();
                        uniqueKey = new StringBuilder(key);
                        uniqueKey.append("(");
                        uniqueKey.append(index.toString());
                        uniqueKey.append(")");
                    }
                    key = uniqueKey.toString();
                }
                // put the current key which is new into the values map
                final MutableInteger index = new MutableInteger(0);
                vals.put(key, index);
            }
            newKeyVal = new RowKey(key);
        } else {
            newKeyVal = RowKey.createRowKey(rowCounter - 1);
        }
        final DefaultRow newRow = new DefaultRow(newKeyVal, cells);
        outData.push(newRow);
        if (hiliteMap) {
            final Set<RowKey> oldKeys = new HashSet<RowKey>(1);
            oldKeys.add(row.getKey());
            m_hiliteMapping.put(newKeyVal, oldKeys);
        }
        exec.checkCanceled();
        if (rowCounter % checkPoint == 0) {
            if (totalNoOfRows > 0) {
                exec.setProgress(progressPerRow * rowCounter, rowCounter + " rows of " + totalNoOfRows + " rows processed.");
            } else {
                exec.setProgress(progressPerRow * rowCounter, rowCounter + " rows processed.");
            }
        }
    }
    exec.setProgress(1.0, "Finished");
    outData.close();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) MutableInteger(org.knime.core.util.MutableInteger) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) HashSet(java.util.HashSet)

Example 23 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class EntropyCalculator method entropy.

/**
 * Get entropy for one single cluster.
 *
 * @param ref the reference clustering
 * @param pats the single cluster to score
 * @return the (not-normalized) entropy of <code>pats</code> wrt.
 *         <code>ref</code>
 */
public static double entropy(final Map<RowKey, RowKey> ref, final Set<RowKey> pats) {
    // that will map the "original" cluster ID to a counter.
    HashMap<RowKey, MutableInteger> refClusID2Count = new HashMap<RowKey, MutableInteger>();
    for (RowKey pat : pats) {
        RowKey origCluster = ref.get(pat);
        MutableInteger countForClus = refClusID2Count.get(origCluster);
        // if we haven't had cluster id before ...
        if (countForClus == null) {
            // init the counter with 1
            refClusID2Count.put(origCluster, new MutableInteger(1));
        } else {
            countForClus.inc();
        }
    }
    final int size = pats.size();
    double e = 0.0;
    for (MutableInteger clusterCount : refClusID2Count.values()) {
        int count = clusterCount.intValue();
        double quot = count / (double) size;
        e -= quot * Math.log(quot) / Math.log(2.0);
    }
    return e;
}
Also used : RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) MutableInteger(org.knime.core.util.MutableInteger)

Example 24 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class ClassAttributeModel method addValueInternal.

/**
 * {@inheritDoc}
 */
@Override
void addValueInternal(final String classValue, final DataCell attrValue) throws TooManyValuesException {
    if (attrValue.isMissing()) {
        throw new IllegalArgumentException("Missing value not allowed as class value");
    }
    MutableInteger classCounter = m_recsCounterByClassVal.get(classValue);
    if (classCounter == null) {
        if (m_recsCounterByClassVal.size() > m_maxNoOfClassVals) {
            throw new TooManyValuesException("Class value " + classValue + " doesn't fit into model");
        }
        classCounter = new MutableInteger(0);
        m_recsCounterByClassVal.put(classValue, classCounter);
    }
    classCounter.inc();
    m_totalNoOfRecs++;
}
Also used : MutableInteger(org.knime.core.util.MutableInteger)

Example 25 with MutableInteger

use of org.knime.core.util.MutableInteger in project knime-core by knime.

the class NominalAttributeModel method exportToPMMLInternal.

/**
 * {@inheritDoc}
 */
@Override
void exportToPMMLInternal(final BayesInput bayesInput) {
    for (final String attributeValue : m_attributeVals) {
        PairCounts pairCounts = bayesInput.addNewPairCounts();
        pairCounts.setValue(attributeValue);
        final TargetValueCounts targetValueCounts = pairCounts.addNewTargetValueCounts();
        for (final NominalClassValue classVal : m_classValues.values()) {
            final TargetValueCount targetValueCount = targetValueCounts.addNewTargetValueCount();
            if (!ignoreMissingVals()) {
                PMMLNaiveBayesModelTranslator.setIntExtension(targetValueCount.addNewExtension(), NominalClassValue.MISSING_VALUE_COUNTER, classVal.getNoOfMissingValueRecs());
            }
            targetValueCount.setValue(classVal.getClassValue());
            final MutableInteger attrCount = classVal.m_recsByAttrValue.get(attributeValue);
            final int count;
            if (attrCount != null) {
                count = attrCount.intValue();
            } else {
                count = 0;
            }
            targetValueCount.setCount(count);
        }
    }
}
Also used : PairCounts(org.dmg.pmml.PairCountsDocument.PairCounts) TargetValueCounts(org.dmg.pmml.TargetValueCountsDocument.TargetValueCounts) MutableInteger(org.knime.core.util.MutableInteger) TargetValueCount(org.dmg.pmml.TargetValueCountDocument.TargetValueCount)

Aggregations

MutableInteger (org.knime.core.util.MutableInteger)32 DataCell (org.knime.core.data.DataCell)12 HashMap (java.util.HashMap)11 DataRow (org.knime.core.data.DataRow)8 RowKey (org.knime.core.data.RowKey)7 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)6 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)6 HashSet (java.util.HashSet)5 DataTableSpec (org.knime.core.data.DataTableSpec)5 DefaultRow (org.knime.core.data.def.DefaultRow)5 BufferedDataTable (org.knime.core.node.BufferedDataTable)5 Set (java.util.Set)4 DataColumnSpec (org.knime.core.data.DataColumnSpec)4 ArrayList (java.util.ArrayList)3 LinkedHashMap (java.util.LinkedHashMap)3 DoubleCell (org.knime.core.data.def.DoubleCell)3 StringCell (org.knime.core.data.def.StringCell)3 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2