Search in sources :

Example 1 with DataValueComparator

use of in project knime-core by knime.

the class StatisticsTable method calculateAllMoments.

 * Calculates <b>all the statistical moments in one pass </b>. After the
 * call of this operation, the statistical moments can be obtained very fast
 * from all the other methods.
 * @param rowCount Row count of table for progress, may be NaN if unknown.
 * @param exec object to check with if user canceled the operation
 * @throws CanceledExecutionException if user canceled
 * @throws IllegalArgumentException if rowCount argument < 0
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
    if (rowCount < 0.0) {
        throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
    DataTableSpec origSpec = m_table.getDataTableSpec();
    int numOfCols = origSpec.getNumColumns();
    // the number of non-missing cells in each column
    int[] validCount = new int[numOfCols];
    double[] sumsquare = new double[numOfCols];
    final DataValueComparator[] comp = new DataValueComparator[numOfCols];
    for (int i = 0; i < numOfCols; i++) {
        sumsquare[i] = 0.0;
        validCount[i] = 0;
        comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
        assert comp[i] != null;
    int nrRows = 0;
    for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
        DataRow row =;
        if (exec != null) {
            double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
            exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
            // throws exception if user canceled
        for (int c = 0; c < numOfCols; c++) {
            final DataCell cell = row.getCell(c);
            if (!(cell.isMissing())) {
                // keep the min and max for each column
                if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
                    m_minValues[c] = cell;
                if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
                    m_maxValues[c] = cell;
                // for double columns we calc the sum (for the mean calc)
                DataType type = origSpec.getColumnSpec(c).getType();
                if (type.isCompatible(DoubleValue.class)) {
                    double d = ((DoubleValue) cell).getDoubleValue();
                    if (Double.isNaN(m_sum[c])) {
                        m_sum[c] = d;
                    } else {
                        m_sum[c] += d;
                    sumsquare[c] += d * d;
            } else {
    m_nrRows = nrRows;
    for (int j = 0; j < numOfCols; j++) {
        // missing values
        if (validCount[j] == 0 || m_minValues[j] == null) {
            DataCell mc = DataType.getMissingCell();
            m_minValues[j] = mc;
            m_maxValues[j] = mc;
            m_meanValues[j] = Double.NaN;
            m_varianceValues[j] = Double.NaN;
        } else {
            m_meanValues[j] = m_sum[j] / validCount[j];
            if (validCount[j] > 1) {
                m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
            } else {
                m_varianceValues[j] = 0.0;
            // round-off errors resulting in negative variance values
            if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
                m_varianceValues[j] = 0.0;
            assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
    // compute resulting table spec
    int nrCols = m_table.getDataTableSpec().getNumColumns();
    DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
    for (int c = 0; c < nrCols; c++) {
        DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
        // we create domains with our bounds.
        Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
        DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
        DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
        cSpec[c] = creator.createSpec();
    m_tSpec = new DataTableSpec(cSpec);
Also used : DataTableSpec( DataColumnSpecCreator( DataColumnDomainCreator( DataValueComparator( DataRow( DataColumnSpec( DataColumnDomain( DoubleValue( RowIterator( DataCell( DataType(

Example 2 with DataValueComparator

use of in project knime-core by knime.

the class AccuracyScorerNodeModel method sort.

 * @param order The cells to sort.
private void sort(final DataCell[] order) {
    if (order.length == 0) {
    DataType type = order[0].getType();
    for (DataCell dataCell : order) {
        type = DataType.getCommonSuperType(type, dataCell.getType());
    final Comparator<DataCell> comparator;
    switch(m_sortingStrategy) {
        case InsertionOrder:
            if (m_sortingReversed) {
        case Unsorted:
        case Lexical:
            if (StringCell.TYPE.isASuperTypeOf(type)) {
                Comparator<String> stringComparator;
                Collator instance = Collator.getInstance();
                // do not try to combine characters
                // case and accents matter.
                @SuppressWarnings("unchecked") Comparator<String> collator = (Comparator<String>) (Comparator<?>) instance;
                stringComparator = collator;
                comparator = new StringValueComparator(stringComparator);
            } else if (DoubleCell.TYPE.isASuperTypeOf(type)) {
                comparator = new DataValueComparator() {

                    protected int compareDataValues(final DataValue v1, final DataValue v2) {
                        String s1 = v1.toString();
                        String s2 = v2.toString();
                        return s1.compareTo(s2);
            } else {
                throw new IllegalStateException("Lexical sorting strategy is not supported.");
        case Numeric:
            if (DoubleCell.TYPE.isASuperTypeOf(type)) {
                comparator = type.getComparator();
            } else {
                throw new IllegalStateException("Numerical sorting strategy is not supported.");
            throw new IllegalStateException("Unrecognized sorting strategy: " + m_sortingStrategy);
    Arrays.sort(order, comparator);
    if (m_sortingReversed) {
Also used : DataValue( DataType( DataCell( DataValueComparator( Collator(java.text.Collator) StringValueComparator(org.knime.base.util.StringValueComparator) DataValueComparator( Comparator(java.util.Comparator) StringValueComparator(org.knime.base.util.StringValueComparator)

Example 3 with DataValueComparator

use of in project knime-core by knime.

the class ColumnRowFilterPanel method boundsChanged.

 * Called when user changes the values for the lower or upper bounds.
protected void boundsChanged() {
    // check if the entered value somehow goes along with the selected col.
    if (m_tSpec == null) {
    if (getSelectedColumnName() == null) {
    if (!m_useRange.isSelected()) {
    DataCell lowBound = null;
    DataCell hiBound = null;
    try {
        lowBound = getLowerBoundCell();
        hiBound = getUpperBoundCell();
    } catch (InvalidSettingsException ise) {
    if ((lowBound == null) && (hiBound == null)) {
        setErrMsg("Specify at least one range boundary");
    if ((lowBound != null) && (hiBound != null)) {
        DataValueComparator comp;
        comp = DataType.getCommonSuperType(lowBound.getType(), hiBound.getType()).getComparator();
        if (, lowBound) == -1) {
            setErrMsg("The lower bound must be smaller than the" + " upper bound");
    if (((lowBound != null) && (lowBound instanceof StringCell)) || ((hiBound != null) && (hiBound instanceof StringCell))) {
        setErrMsg("Warning: String comparison is used for " + "range checking. May not work as expected!");
Also used : InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell( DataCell( DataValueComparator(

Example 4 with DataValueComparator

use of in project knime-core by knime.

the class BigGroupByTable method createGroupByTable.

 * {@inheritDoc}
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
    LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
    final DataTableSpec origSpec = table.getDataTableSpec();
    // sort the data table in order to process the input table chunk wise
    final BufferedDataTable sortedTable;
    final ExecutionContext groupExec;
    final DataValueComparator[] comparators;
    if (groupColIdx.length < 1) {
        sortedTable = table;
        groupExec = exec;
        comparators = new DataValueComparator[0];
    } else {
        final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
        exec.setMessage("Sorting input table...");
        sortedTable = sortTable(sortExec, table, getGroupCols());
        groupExec = exec.createSubExecutionContext(0.4);
        comparators = new DataValueComparator[groupColIdx.length];
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
            comparators[i] = colSpec.getType().getComparator();
    final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
    exec.setMessage("Creating groups");
    final DataCell[] previousGroup = new DataCell[groupColIdx.length];
    final DataCell[] currentGroup = new DataCell[groupColIdx.length];
    final MutableInteger groupCounter = new MutableInteger(0);
    boolean firstRow = true;
    final double numOfRows = sortedTable.size();
    long rowCounter = 0;
    // In the rare case that the DataCell comparator return 0 for two
    // data cells that are not equal we have to maintain a map with all
    // rows with equal cells in the group columns per chunk.
    // This variable stores for each chunk these members. A chunk consists
    // of rows which return 0 for the pairwise group value comparison.
    // Usually only equal data cells return 0 when compared with each other
    // but in rare occasions also data cells that are NOT equal return 0 when
    // compared to each other
    // (such as cells that contain chemical structures).
    // In this rare case this map will contain for each group of data cells
    // that are pairwise equal in the chunk a separate entry.
    final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
    boolean logUnusualCells = true;
    String groupLabel = "";
    // cannot put init to the constructor, as the super() constructor directly calls the current function
    for (final DataRow row : sortedTable) {
        // fetch the current group column values
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            currentGroup[i] = row.getCell(groupColIdx[i]);
        if (firstRow) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            firstRow = false;
        // group column data cells
        if (!sameChunk(comparators, previousGroup, currentGroup)) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            createTableRows(dc, chunkMembers, groupCounter);
            // set the current group as previous group
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            if (logUnusualCells && chunkMembers.size() > 1) {
                // cause the problem
                if (LOGGER.isEnabledFor(LEVEL.INFO)) {
                    final StringBuilder buf = new StringBuilder();
                    buf.append("Data chunk with ");
                    buf.append(" members occured in groupby node. " + "Involved classes are: ");
                    final GroupKey key = chunkMembers.keySet().iterator().next();
                    for (final DataCell cell : key.getGroupVals()) {
                        buf.append(", ");
                logUnusualCells = false;
            // reset the chunk members map
        // process the row as one of the members of the current chunk
        Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
        if (member == null) {
            Set<RowKey> rowKeys;
            if (isEnableHilite()) {
                rowKeys = new HashSet<>();
            } else {
                rowKeys = Collections.emptySet();
            member = new Pair<>(cloneColumnAggregators(), rowKeys);
            final DataCell[] groupKeys = new DataCell[currentGroup.length];
            System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
            chunkMembers.put(new GroupKey(groupKeys), member);
        // compute the current row values
        for (final ColumnAggregator colAggr : member.getFirst()) {
            final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
            colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
        if (isEnableHilite()) {
        groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
    // create the final row for the last chunk after processing the last
    // table row
    createTableRows(dc, chunkMembers, groupCounter);
    return dc.getTable();
Also used : DataTableSpec( HashSet(java.util.HashSet) Set(java.util.Set) RowKey( DataValueComparator( DataRow( LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec( BufferedDataTable(org.knime.core.node.BufferedDataTable) Pair(org.knime.core.util.Pair) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) MutableInteger(org.knime.core.util.MutableInteger) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator( DataCell(

Example 5 with DataValueComparator

use of in project knime-core by knime.

the class RowComparator method compareCells.

private int compareCells(final DataRow dr1, final DataRow dr2, final int i) {
    int cellComparison;
    final DataCell c1 = dr1.getCell(m_indices[i]);
    final DataCell c2 = dr2.getCell(m_indices[i]);
    final boolean c1Missing = c1.isMissing();
    final boolean c2Missing = c2.isMissing();
    if (m_sortMissingsToEnd && (c1Missing || c2Missing)) {
        return sortMissingsToEnd(i, c1Missing, c2Missing);
    } else {
        final DataValueComparator comp = m_colComparators[i];
        cellComparison =, c2);
    return cellComparison;
Also used : DataCell( DataValueComparator(


DataValueComparator ( DataCell ( DataRow ( DataColumnSpec ( DataTableSpec ( DataType ( DataColumnSpecCreator ( DoubleValue ( RowKey ( BufferedDataContainer (org.knime.core.node.BufferedDataContainer)3 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)3 ParseException (java.text.ParseException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Set (java.util.Set)2 DefaultRow ( BufferedDataTable (org.knime.core.node.BufferedDataTable)2 MutableInteger (org.knime.core.util.MutableInteger)2 ByteArrayInputStream (