Search in sources :

Example 1 with DataValueComparator

use of org.knime.core.data.DataValueComparator in project knime-core by knime.

the class StatisticsTable method calculateAllMoments.

/**
 * Calculates <b>all the statistical moments in one pass </b>. After the
 * call of this operation, the statistical moments can be obtained very fast
 * from all the other methods.
 *
 * @param rowCount Row count of table for progress, may be NaN if unknown.
 * @param exec object to check with if user canceled the operation
 * @throws CanceledExecutionException if user canceled
 * @throws IllegalArgumentException if rowCount argument < 0
 */
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
    if (rowCount < 0.0) {
        throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
    }
    DataTableSpec origSpec = m_table.getDataTableSpec();
    int numOfCols = origSpec.getNumColumns();
    // the number of non-missing cells in each column
    int[] validCount = new int[numOfCols];
    double[] sumsquare = new double[numOfCols];
    final DataValueComparator[] comp = new DataValueComparator[numOfCols];
    for (int i = 0; i < numOfCols; i++) {
        sumsquare[i] = 0.0;
        validCount[i] = 0;
        comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
        assert comp[i] != null;
    }
    int nrRows = 0;
    for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
        DataRow row = rowIt.next();
        if (exec != null) {
            double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
            exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
            // throws exception if user canceled
            exec.checkCanceled();
        }
        for (int c = 0; c < numOfCols; c++) {
            final DataCell cell = row.getCell(c);
            if (!(cell.isMissing())) {
                // keep the min and max for each column
                if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
                    m_minValues[c] = cell;
                }
                if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
                    m_maxValues[c] = cell;
                }
                // for double columns we calc the sum (for the mean calc)
                DataType type = origSpec.getColumnSpec(c).getType();
                if (type.isCompatible(DoubleValue.class)) {
                    double d = ((DoubleValue) cell).getDoubleValue();
                    if (Double.isNaN(m_sum[c])) {
                        m_sum[c] = d;
                    } else {
                        m_sum[c] += d;
                    }
                    sumsquare[c] += d * d;
                    validCount[c]++;
                }
            } else {
                m_missingValueCnt[c]++;
            }
        }
        calculateMomentInSubClass(row);
    }
    m_nrRows = nrRows;
    for (int j = 0; j < numOfCols; j++) {
        // missing values
        if (validCount[j] == 0 || m_minValues[j] == null) {
            DataCell mc = DataType.getMissingCell();
            m_minValues[j] = mc;
            m_maxValues[j] = mc;
            m_meanValues[j] = Double.NaN;
            m_varianceValues[j] = Double.NaN;
        } else {
            m_meanValues[j] = m_sum[j] / validCount[j];
            if (validCount[j] > 1) {
                m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
            } else {
                m_varianceValues[j] = 0.0;
            }
            // round-off errors resulting in negative variance values
            if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
                m_varianceValues[j] = 0.0;
            }
            assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
        }
    }
    // compute resulting table spec
    int nrCols = m_table.getDataTableSpec().getNumColumns();
    DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
    for (int c = 0; c < nrCols; c++) {
        DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
        // we create domains with our bounds.
        Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
        DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
        DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
        creator.setDomain(newDomain);
        cSpec[c] = creator.createSpec();
    }
    m_tSpec = new DataTableSpec(cSpec);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType)

Example 2 with DataValueComparator

use of org.knime.core.data.DataValueComparator in project knime-core by knime.

the class AccuracyScorerNodeModel method sort.

/**
 * @param order The cells to sort.
 */
private void sort(final DataCell[] order) {
    if (order.length == 0) {
        return;
    }
    DataType type = order[0].getType();
    for (DataCell dataCell : order) {
        type = DataType.getCommonSuperType(type, dataCell.getType());
    }
    final Comparator<DataCell> comparator;
    switch(m_sortingStrategy) {
        case InsertionOrder:
            if (m_sortingReversed) {
                reverse(order);
            }
            return;
        case Unsorted:
            return;
        case Lexical:
            if (StringCell.TYPE.isASuperTypeOf(type)) {
                Comparator<String> stringComparator;
                Collator instance = Collator.getInstance();
                // do not try to combine characters
                instance.setDecomposition(Collator.NO_DECOMPOSITION);
                // case and accents matter.
                instance.setStrength(Collator.IDENTICAL);
                @SuppressWarnings("unchecked") Comparator<String> collator = (Comparator<String>) (Comparator<?>) instance;
                stringComparator = collator;
                comparator = new StringValueComparator(stringComparator);
            } else if (DoubleCell.TYPE.isASuperTypeOf(type)) {
                comparator = new DataValueComparator() {

                    @Override
                    protected int compareDataValues(final DataValue v1, final DataValue v2) {
                        String s1 = v1.toString();
                        String s2 = v2.toString();
                        return s1.compareTo(s2);
                    }
                };
            } else {
                throw new IllegalStateException("Lexical sorting strategy is not supported.");
            }
            break;
        case Numeric:
            if (DoubleCell.TYPE.isASuperTypeOf(type)) {
                comparator = type.getComparator();
            } else {
                throw new IllegalStateException("Numerical sorting strategy is not supported.");
            }
            break;
        default:
            throw new IllegalStateException("Unrecognized sorting strategy: " + m_sortingStrategy);
    }
    Arrays.sort(order, comparator);
    if (m_sortingReversed) {
        reverse(order);
    }
}
Also used : DataValue(org.knime.core.data.DataValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DataValueComparator(org.knime.core.data.DataValueComparator) Collator(java.text.Collator) StringValueComparator(org.knime.base.util.StringValueComparator) DataValueComparator(org.knime.core.data.DataValueComparator) Comparator(java.util.Comparator) StringValueComparator(org.knime.base.util.StringValueComparator)

Example 3 with DataValueComparator

use of org.knime.core.data.DataValueComparator in project knime-core by knime.

the class ColumnRowFilterPanel method boundsChanged.

/**
 * Called when user changes the values for the lower or upper bounds.
 */
protected void boundsChanged() {
    // check if the entered value somehow goes along with the selected col.
    setErrMsg("");
    if (m_tSpec == null) {
        return;
    }
    if (getSelectedColumnName() == null) {
        return;
    }
    if (!m_useRange.isSelected()) {
        return;
    }
    DataCell lowBound = null;
    DataCell hiBound = null;
    try {
        lowBound = getLowerBoundCell();
        hiBound = getUpperBoundCell();
    } catch (InvalidSettingsException ise) {
        setErrMsg(ise.getMessage());
        return;
    }
    if ((lowBound == null) && (hiBound == null)) {
        setErrMsg("Specify at least one range boundary");
        return;
    }
    if ((lowBound != null) && (hiBound != null)) {
        DataValueComparator comp;
        comp = DataType.getCommonSuperType(lowBound.getType(), hiBound.getType()).getComparator();
        if (comp.compare(hiBound, lowBound) == -1) {
            setErrMsg("The lower bound must be smaller than the" + " upper bound");
            return;
        }
    }
    if (((lowBound != null) && (lowBound instanceof StringCell)) || ((hiBound != null) && (hiBound instanceof StringCell))) {
        setErrMsg("Warning: String comparison is used for " + "range checking. May not work as expected!");
    }
}
Also used : InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DataValueComparator(org.knime.core.data.DataValueComparator)

Example 4 with DataValueComparator

use of org.knime.core.data.DataValueComparator in project knime-core by knime.

the class BigGroupByTable method createGroupByTable.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
    LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
    final DataTableSpec origSpec = table.getDataTableSpec();
    // sort the data table in order to process the input table chunk wise
    final BufferedDataTable sortedTable;
    final ExecutionContext groupExec;
    final DataValueComparator[] comparators;
    if (groupColIdx.length < 1) {
        sortedTable = table;
        groupExec = exec;
        comparators = new DataValueComparator[0];
    } else {
        final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
        exec.setMessage("Sorting input table...");
        sortedTable = sortTable(sortExec, table, getGroupCols());
        sortExec.setProgress(1.0);
        groupExec = exec.createSubExecutionContext(0.4);
        comparators = new DataValueComparator[groupColIdx.length];
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
            comparators[i] = colSpec.getType().getComparator();
        }
    }
    final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
    exec.setMessage("Creating groups");
    final DataCell[] previousGroup = new DataCell[groupColIdx.length];
    final DataCell[] currentGroup = new DataCell[groupColIdx.length];
    final MutableInteger groupCounter = new MutableInteger(0);
    boolean firstRow = true;
    final double numOfRows = sortedTable.size();
    long rowCounter = 0;
    // In the rare case that the DataCell comparator return 0 for two
    // data cells that are not equal we have to maintain a map with all
    // rows with equal cells in the group columns per chunk.
    // This variable stores for each chunk these members. A chunk consists
    // of rows which return 0 for the pairwise group value comparison.
    // Usually only equal data cells return 0 when compared with each other
    // but in rare occasions also data cells that are NOT equal return 0 when
    // compared to each other
    // (such as cells that contain chemical structures).
    // In this rare case this map will contain for each group of data cells
    // that are pairwise equal in the chunk a separate entry.
    final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
    boolean logUnusualCells = true;
    String groupLabel = "";
    // cannot put init to the constructor, as the super() constructor directly calls the current function
    initMissingValuesMap();
    for (final DataRow row : sortedTable) {
        // fetch the current group column values
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            currentGroup[i] = row.getCell(groupColIdx[i]);
        }
        if (firstRow) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            firstRow = false;
        }
        // group column data cells
        if (!sameChunk(comparators, previousGroup, currentGroup)) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            createTableRows(dc, chunkMembers, groupCounter);
            // set the current group as previous group
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            if (logUnusualCells && chunkMembers.size() > 1) {
                // cause the problem
                if (LOGGER.isEnabledFor(LEVEL.INFO)) {
                    final StringBuilder buf = new StringBuilder();
                    buf.append("Data chunk with ");
                    buf.append(chunkMembers.size());
                    buf.append(" members occured in groupby node. " + "Involved classes are: ");
                    final GroupKey key = chunkMembers.keySet().iterator().next();
                    for (final DataCell cell : key.getGroupVals()) {
                        buf.append(cell.getClass().getCanonicalName());
                        buf.append(", ");
                    }
                    LOGGER.info(buf.toString());
                }
                logUnusualCells = false;
            }
            // reset the chunk members map
            chunkMembers.clear();
        }
        // process the row as one of the members of the current chunk
        Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
        if (member == null) {
            Set<RowKey> rowKeys;
            if (isEnableHilite()) {
                rowKeys = new HashSet<>();
            } else {
                rowKeys = Collections.emptySet();
            }
            member = new Pair<>(cloneColumnAggregators(), rowKeys);
            final DataCell[] groupKeys = new DataCell[currentGroup.length];
            System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
            chunkMembers.put(new GroupKey(groupKeys), member);
        }
        // compute the current row values
        for (final ColumnAggregator colAggr : member.getFirst()) {
            final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
            colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
        }
        if (isEnableHilite()) {
            member.getSecond().add(row.getKey());
        }
        groupExec.checkCanceled();
        groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
    }
    // create the final row for the last chunk after processing the last
    // table row
    createTableRows(dc, chunkMembers, groupCounter);
    dc.close();
    return dc.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) Pair(org.knime.core.util.Pair) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) MutableInteger(org.knime.core.util.MutableInteger) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) DataCell(org.knime.core.data.DataCell)

Example 5 with DataValueComparator

use of org.knime.core.data.DataValueComparator in project knime-core by knime.

the class RowComparator method compareCells.

private int compareCells(final DataRow dr1, final DataRow dr2, final int i) {
    int cellComparison;
    final DataCell c1 = dr1.getCell(m_indices[i]);
    final DataCell c2 = dr2.getCell(m_indices[i]);
    final boolean c1Missing = c1.isMissing();
    final boolean c2Missing = c2.isMissing();
    if (m_sortMissingsToEnd && (c1Missing || c2Missing)) {
        return sortMissingsToEnd(i, c1Missing, c2Missing);
    } else {
        final DataValueComparator comp = m_colComparators[i];
        cellComparison = comp.compare(c1, c2);
    }
    return cellComparison;
}
Also used : DataCell(org.knime.core.data.DataCell) DataValueComparator(org.knime.core.data.DataValueComparator)

Aggregations

DataValueComparator (org.knime.core.data.DataValueComparator)15 DataCell (org.knime.core.data.DataCell)12 DataRow (org.knime.core.data.DataRow)9 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 DataTableSpec (org.knime.core.data.DataTableSpec)5 DataType (org.knime.core.data.DataType)5 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)3 DoubleValue (org.knime.core.data.DoubleValue)3 RowKey (org.knime.core.data.RowKey)3 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)3 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)3 ParseException (java.text.ParseException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Set (java.util.Set)2 DefaultRow (org.knime.core.data.def.DefaultRow)2 BufferedDataTable (org.knime.core.node.BufferedDataTable)2 MutableInteger (org.knime.core.util.MutableInteger)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1