Search in sources :

Example 6 with DataValueComparator

use of org.knime.core.data.DataValueComparator in project knime-core by knime.

the class AbstractColumnTableSorter method toSortDescriptions.

private static SortingDescription[] toSortDescriptions(final DataTableSpec dataTableSpec, final String[] toSort) throws InvalidSettingsException {
    checkArgument(!ArrayUtils.contains(toSort, null), "Null values are not permitted.");
    SortingDescription[] toReturn = new SortingDescription[toSort.length];
    int index = 0;
    for (String so : toSort) {
        DataColumnSpec columnSpec = checkSettingNotNull(dataTableSpec.getColumnSpec(so), "Column: '%s' does not exist in input table.", so);
        final DataValueComparator comparator = columnSpec.getType().getComparator();
        toReturn[index++] = new SortingDescription(so) {

            @Override
            public int compare(final DataRow o1, final DataRow o2) {
                return comparator.compare(o1.getCell(0), o2.getCell(0));
            }
        };
    }
    return toReturn;
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataValueComparator(org.knime.core.data.DataValueComparator) BlobSupportDataRow(org.knime.core.data.container.BlobSupportDataRow) DataRow(org.knime.core.data.DataRow)

Example 7 with DataValueComparator

use of org.knime.core.data.DataValueComparator in project knime-core by knime.

the class DoubleVectorCellTest method testCompare.

@Test
public void testCompare() throws Exception {
    double[] d1 = IntStream.range(0, 10000).mapToDouble(i -> i).toArray();
    DataCell cell1 = DoubleVectorCellFactory.createCell(d1);
    double[] d2 = IntStream.range(0, 10000).mapToDouble(i -> i).toArray();
    d2[100] = 99.0;
    DataCell cell2 = DoubleVectorCellFactory.createCell(d2);
    DataValueComparator comparator = DoubleVectorCellFactory.TYPE.getComparator();
    Assert.assertThat("must be equal", comparator.compare(cell1, cell1), CoreMatchers.equalTo(0));
    Assert.assertThat("must be smaller", comparator.compare(cell1, cell2), OrderingComparison.greaterThan(0));
    Assert.assertThat("must be larger", comparator.compare(cell2, cell1), OrderingComparison.lessThan(0));
    Assert.assertThat("shorter array must be smaller", comparator.compare(DoubleVectorCellFactory.createCell(new double[0]), cell2), OrderingComparison.lessThan(0));
}
Also used : IntStream(java.util.stream.IntStream) CoreMatchers(org.hamcrest.CoreMatchers) DefaultRow(org.knime.core.data.def.DefaultRow) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataTableSpec(org.knime.core.data.DataTableSpec) Test(org.junit.Test) DataTable(org.knime.core.data.DataTable) DataValueComparator(org.knime.core.data.DataValueComparator) DataContainer(org.knime.core.data.container.DataContainer) ByteArrayInputStream(java.io.ByteArrayInputStream) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) OrderingComparison(org.hamcrest.number.OrderingComparison) Assert(org.junit.Assert) DataCell(org.knime.core.data.DataCell) ContainerTable(org.knime.core.data.container.ContainerTable) DataCell(org.knime.core.data.DataCell) DataValueComparator(org.knime.core.data.DataValueComparator) Test(org.junit.Test)

Example 8 with DataValueComparator

use of org.knime.core.data.DataValueComparator in project knime-core by knime.

the class Pivot2NodeModel method fillPivotTable.

private BufferedDataTable fillPivotTable(final BufferedDataTable groupTable, final DataTableSpec pivotSpec, final Map<String, Integer> pivotStarts, final ExecutionContext exec, final String orderPivotColumnName) throws CanceledExecutionException {
    final BufferedDataContainer buf = exec.createDataContainer(pivotSpec);
    final List<String> pivotCols = m_pivotCols.getIncludeList();
    final int pivotCount = pivotCols.size();
    final List<String> groupCols = new ArrayList<String>(getGroupByColumns());
    groupCols.removeAll(pivotCols);
    final int groupCount = groupCols.size();
    final DataTableSpec groupSpec = groupTable.getSpec();
    final int colCount = groupSpec.getNumColumns();
    final DataCell[] outcells = new DataCell[pivotSpec.getNumColumns()];
    final long totalRowCount = groupTable.size();
    long rowIndex = 0;
    for (final DataRow row : groupTable) {
        final RowKey origRowKey = row.getKey();
        String pivotColumn = null;
        for (int i = 0; i < colCount; i++) {
            final DataCell cell = row.getCell(i);
            // is a group column
            if (i < groupCount) {
                // diff group found: write out current group and cont.
                if (outcells[i] != null && !cell.equals(outcells[i])) {
                    // write row to out table
                    write(buf, outcells);
                    // reset pivot column name and out data row
                    pivotColumn = null;
                    for (int j = i + 1; j < outcells.length; j++) {
                        outcells[j] = null;
                    }
                }
                outcells[i] = cell;
            // is pivot column
            } else if (i < (groupCount + pivotCount)) {
                // check for missing pivots
                if (m_ignoreMissValues.getBooleanValue() && cell.isMissing()) {
                    for (int j = 0; j < outcells.length; j++) {
                        outcells[j] = null;
                    }
                    break;
                }
                // create pivot column
                if (pivotColumn == null) {
                    pivotColumn = cell.toString();
                } else {
                    pivotColumn += PIVOT_COLUMN_DELIMITER + cell.toString();
                }
            // is a aggregation column
            } else {
                final int idx = pivotStarts.get(pivotColumn);
                final int pivotIndex = i - pivotCount - groupCount;
                final int pivotCellIndex = idx + pivotIndex;
                if (// if retain order is off
                orderPivotColumnName == null || !groupSpec.getColumnSpec(i).getName().equals(orderPivotColumnName)) {
                    outcells[pivotCellIndex] = cell;
                } else {
                    // temp retain column (type:IntCell)
                    final int retainIndex = outcells.length - 1;
                    if (outcells[retainIndex] == null) {
                        outcells[retainIndex] = cell;
                    } else {
                        final DataValueComparator comp = pivotSpec.getColumnSpec(retainIndex).getType().getComparator();
                        if (comp.compare(outcells[retainIndex], cell) > 0) {
                            outcells[retainIndex] = cell;
                        }
                    }
                }
            }
        }
        exec.setProgress(rowIndex++ / (double) totalRowCount, String.format("Group \"%s\" (%d/%d)", origRowKey, rowIndex, totalRowCount));
        exec.checkCanceled();
    }
    // write last group - if any.
    if (outcells[0] != null) {
        write(buf, outcells);
    }
    buf.close();
    return buf.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) DataValueComparator(org.knime.core.data.DataValueComparator)

Example 9 with DataValueComparator

use of org.knime.core.data.DataValueComparator in project knime-core by knime.

the class BinByDictionaryNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final DataTableSpec[] ins, final BufferedDataTable port1Table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
    final BinByDictionaryConfiguration c = m_configuration;
    if (c == null) {
        throw new InvalidSettingsException("No configuration set");
    }
    String lowerColPort1 = c.getLowerBoundColumnPort1();
    String upperColPort1 = c.getUpperBoundColumnPort1();
    String labelCol = c.getLabelColumnPort1();
    String valueColumnPort0 = c.getValueColumnPort0();
    DataType valueType = null;
    final int valueColIndexPort0 = ins[0].findColumnIndex(valueColumnPort0);
    if (valueColIndexPort0 < 0) {
        throw new InvalidSettingsException("No such column in 1st input: " + valueColumnPort0);
    } else {
        valueType = ins[0].getColumnSpec(valueColIndexPort0).getType();
    }
    final boolean isLowerBoundInclusive = c.isLowerBoundInclusive();
    final boolean isUpperBoundInclusive = c.isUpperBoundInclusive();
    final int lowerBoundColIndex;
    final DataValueComparator lowerBoundComparator;
    if (lowerColPort1 == null) {
        // no lower bound specified
        lowerBoundComparator = null;
        lowerBoundColIndex = -1;
    } else {
        lowerBoundColIndex = ins[1].findColumnIndex(lowerColPort1);
        if (lowerBoundColIndex < 0) {
            throw new InvalidSettingsException("No such column in 2nd input: " + lowerColPort1);
        }
        DataType type = ins[1].getColumnSpec(lowerBoundColIndex).getType();
        if (valueType.equals(type)) {
            lowerBoundComparator = valueType.getComparator();
        } else {
            setWarningMessage("The types of the comparison and value " + "columns are not equal, comparison might be done " + "based on lexicographical string representation!");
            lowerBoundComparator = DataType.getCommonSuperType(valueType, type).getComparator();
        }
    }
    final int upperBoundColIndex;
    final DataValueComparator upperBoundComparator;
    if (upperColPort1 == null) {
        // no upper bound specified
        upperBoundColIndex = -1;
        upperBoundComparator = null;
    } else {
        upperBoundColIndex = ins[1].findColumnIndex(upperColPort1);
        if (upperBoundColIndex < 0) {
            throw new InvalidSettingsException("No such column in 2nd input: " + upperColPort1);
        }
        DataType type = ins[1].getColumnSpec(upperBoundColIndex).getType();
        if (valueType.equals(type)) {
            upperBoundComparator = valueType.getComparator();
        } else {
            setWarningMessage("The types of the comparison and value " + "columns are not equal, comparison might be done " + "based on lexicographical string representation!");
            upperBoundComparator = DataType.getCommonSuperType(valueType, type).getComparator();
        }
    }
    final int labelColIndex = ins[1].findColumnIndex(labelCol);
    if (labelColIndex < 0) {
        throw new InvalidSettingsException("No such column in 2nd input: " + labelCol);
    }
    DataColumnSpecCreator labelColSpecCreator = new DataColumnSpecCreator(ins[1].getColumnSpec(labelColIndex));
    labelColSpecCreator.removeAllHandlers();
    String name = DataTableSpec.getUniqueColumnName(ins[0], labelCol);
    labelColSpecCreator.setName(name);
    final DataColumnSpec labelColSpec = labelColSpecCreator.createSpec();
    final BinByDictionaryRuleSet ruleSet = new BinByDictionaryRuleSet(lowerBoundComparator, isLowerBoundInclusive, upperBoundComparator, isUpperBoundInclusive, c.isUseBinarySearch());
    if (port1Table != null) {
        // in execute
        long rowCount = port1Table.size();
        long current = 1;
        for (DataRow r : port1Table) {
            DataCell lower = lowerBoundColIndex < 0 ? null : r.getCell(lowerBoundColIndex);
            DataCell upper = upperBoundColIndex < 0 ? null : r.getCell(upperBoundColIndex);
            DataCell label = r.getCell(labelColIndex);
            ruleSet.addRule(lower, upper, label);
            exec.setProgress(/*no prog */
            0.0, "Indexing rule table " + (current++) + "/" + rowCount + " (\"" + r.getKey() + "\")");
            exec.checkCanceled();
        }
    }
    ruleSet.close();
    SingleCellFactory fac = new SingleCellFactory(ruleSet.getSize() > 100, labelColSpec) {

        @Override
        public DataCell getCell(final DataRow row) {
            DataCell value = row.getCell(valueColIndexPort0);
            if (value.isMissing()) {
                return DataType.getMissingCell();
            }
            DataCell result = ruleSet.search(value);
            if (result != null) {
                return result;
            }
            if (c.isFailIfNoRuleMatches()) {
                throw new RuntimeException("No rule matched for row \"" + row.getKey() + "\", value: \"" + value + "\"");
            }
            return DataType.getMissingCell();
        }
    };
    ColumnRearranger rearranger = new ColumnRearranger(ins[0]);
    rearranger.append(fac);
    return rearranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 10 with DataValueComparator

use of org.knime.core.data.DataValueComparator in project knime-core by knime.

the class SortedCorrelationComputer method getRanks.

/**
 * The Ranks are always only calculated for one column at a time. This might be slower but
 * is decreasing the amount of necessary memory.
 *
 * @param bdt the data table to convert
 * @param colIndex the column to rank
 * @param exec the execution context to report progress to
 * @return a Buffered Data Table where column colIndex is replaced with a numerical column containing its rank
 * @throws CanceledExecutionException
 */
private BufferedDataTable getRanks(final BufferedDataTable bdt, final int colIndex, final ExecutionContext exec) throws CanceledExecutionException {
    final DataValueComparator colComparators = bdt.getDataTableSpec().getColumnSpec(colIndex).getType().getComparator();
    LinkedList<SortablePair> myList = new LinkedList<SortablePair>();
    // read the data
    int counter = 0;
    for (DataRow row : bdt) {
        DataCell dCell = row.getCell(colIndex);
        myList.add(new SortablePair(counter++, dCell));
        exec.checkCanceled();
    }
    exec.setProgress(0.2);
    // sort the data by value
    Collections.sort(myList, new Comparator<SortablePair>() {

        @Override
        public int compare(final SortablePair dr1, final SortablePair dr2) {
            if (dr1 == dr2) {
                return 0;
            }
            if (dr1 == null) {
                return 1;
            }
            if (dr2 == null) {
                return -1;
            }
            return colComparators.compare(dr1.getDataCell(), dr2.getDataCell());
        }
    });
    exec.setProgress(0.4);
    // check for duplicates and adjust their rank
    counter = 1;
    DataCell lastCell = null;
    HashMap<DataCell, Double> duplicateValues = new HashMap<>();
    int nrOfDups = 0;
    for (SortablePair p : myList) {
        exec.checkCanceled();
        double rank = 1.0 * counter++;
        DataCell currentCell = p.getDataCell();
        if (lastCell != null) {
            // init last cell
            if (colComparators.compare(lastCell, currentCell) == 0) {
                if (duplicateValues.containsKey(lastCell)) {
                    nrOfDups++;
                    duplicateValues.put(lastCell, ((duplicateValues.get(lastCell) * (nrOfDups - 1) + rank) / nrOfDups));
                } else {
                    duplicateValues.put(lastCell, (rank - 0.5));
                    nrOfDups = 2;
                }
            } else {
                nrOfDups = 0;
            }
        }
        lastCell = p.getDataCell();
        p.setRank(rank);
    }
    exec.setProgress(0.6);
    // resolve duplicates
    if (duplicateValues.size() > 0) {
        // change the duplicates
        for (SortablePair p : myList) {
            exec.checkCanceled();
            Double d = duplicateValues.get(p.getDataCell());
            if (d != null) {
                p.setRank(d);
            }
        }
    }
    exec.setProgress(0.8);
    // sort the data by counter backwards
    Collections.sort(myList, new Comparator<SortablePair>() {

        @Override
        public int compare(final SortablePair dr1, final SortablePair dr2) {
            if (dr1 == dr2) {
                return 0;
            }
            if (dr1 == null) {
                return 1;
            }
            if (dr2 == null) {
                return -1;
            }
            return dr1.getOrignalOrder() - dr2.getOrignalOrder();
        }
    });
    return replace(bdt, colIndex, myList, exec.createSubExecutionContext(0.2));
}
Also used : HashMap(java.util.HashMap) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) LinkedList(java.util.LinkedList) DataCell(org.knime.core.data.DataCell)

Aggregations

DataValueComparator (org.knime.core.data.DataValueComparator)15 DataCell (org.knime.core.data.DataCell)12 DataRow (org.knime.core.data.DataRow)9 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 DataTableSpec (org.knime.core.data.DataTableSpec)5 DataType (org.knime.core.data.DataType)5 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)3 DoubleValue (org.knime.core.data.DoubleValue)3 RowKey (org.knime.core.data.RowKey)3 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)3 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)3 ParseException (java.text.ParseException)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Set (java.util.Set)2 DefaultRow (org.knime.core.data.def.DefaultRow)2 BufferedDataTable (org.knime.core.node.BufferedDataTable)2 MutableInteger (org.knime.core.util.MutableInteger)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1