use of org.knime.core.data.DataValueComparator in project knime-core by knime.
the class AbstractColumnTableSorter method toSortDescriptions.
private static SortingDescription[] toSortDescriptions(final DataTableSpec dataTableSpec, final String[] toSort) throws InvalidSettingsException {
checkArgument(!ArrayUtils.contains(toSort, null), "Null values are not permitted.");
SortingDescription[] toReturn = new SortingDescription[toSort.length];
int index = 0;
for (String so : toSort) {
DataColumnSpec columnSpec = checkSettingNotNull(dataTableSpec.getColumnSpec(so), "Column: '%s' does not exist in input table.", so);
final DataValueComparator comparator = columnSpec.getType().getComparator();
toReturn[index++] = new SortingDescription(so) {
@Override
public int compare(final DataRow o1, final DataRow o2) {
return comparator.compare(o1.getCell(0), o2.getCell(0));
}
};
}
return toReturn;
}
use of org.knime.core.data.DataValueComparator in project knime-core by knime.
the class DoubleVectorCellTest method testCompare.
@Test
public void testCompare() throws Exception {
double[] d1 = IntStream.range(0, 10000).mapToDouble(i -> i).toArray();
DataCell cell1 = DoubleVectorCellFactory.createCell(d1);
double[] d2 = IntStream.range(0, 10000).mapToDouble(i -> i).toArray();
d2[100] = 99.0;
DataCell cell2 = DoubleVectorCellFactory.createCell(d2);
DataValueComparator comparator = DoubleVectorCellFactory.TYPE.getComparator();
Assert.assertThat("must be equal", comparator.compare(cell1, cell1), CoreMatchers.equalTo(0));
Assert.assertThat("must be smaller", comparator.compare(cell1, cell2), OrderingComparison.greaterThan(0));
Assert.assertThat("must be larger", comparator.compare(cell2, cell1), OrderingComparison.lessThan(0));
Assert.assertThat("shorter array must be smaller", comparator.compare(DoubleVectorCellFactory.createCell(new double[0]), cell2), OrderingComparison.lessThan(0));
}
use of org.knime.core.data.DataValueComparator in project knime-core by knime.
the class Pivot2NodeModel method fillPivotTable.
private BufferedDataTable fillPivotTable(final BufferedDataTable groupTable, final DataTableSpec pivotSpec, final Map<String, Integer> pivotStarts, final ExecutionContext exec, final String orderPivotColumnName) throws CanceledExecutionException {
final BufferedDataContainer buf = exec.createDataContainer(pivotSpec);
final List<String> pivotCols = m_pivotCols.getIncludeList();
final int pivotCount = pivotCols.size();
final List<String> groupCols = new ArrayList<String>(getGroupByColumns());
groupCols.removeAll(pivotCols);
final int groupCount = groupCols.size();
final DataTableSpec groupSpec = groupTable.getSpec();
final int colCount = groupSpec.getNumColumns();
final DataCell[] outcells = new DataCell[pivotSpec.getNumColumns()];
final long totalRowCount = groupTable.size();
long rowIndex = 0;
for (final DataRow row : groupTable) {
final RowKey origRowKey = row.getKey();
String pivotColumn = null;
for (int i = 0; i < colCount; i++) {
final DataCell cell = row.getCell(i);
// is a group column
if (i < groupCount) {
// diff group found: write out current group and cont.
if (outcells[i] != null && !cell.equals(outcells[i])) {
// write row to out table
write(buf, outcells);
// reset pivot column name and out data row
pivotColumn = null;
for (int j = i + 1; j < outcells.length; j++) {
outcells[j] = null;
}
}
outcells[i] = cell;
// is pivot column
} else if (i < (groupCount + pivotCount)) {
// check for missing pivots
if (m_ignoreMissValues.getBooleanValue() && cell.isMissing()) {
for (int j = 0; j < outcells.length; j++) {
outcells[j] = null;
}
break;
}
// create pivot column
if (pivotColumn == null) {
pivotColumn = cell.toString();
} else {
pivotColumn += PIVOT_COLUMN_DELIMITER + cell.toString();
}
// is a aggregation column
} else {
final int idx = pivotStarts.get(pivotColumn);
final int pivotIndex = i - pivotCount - groupCount;
final int pivotCellIndex = idx + pivotIndex;
if (// if retain order is off
orderPivotColumnName == null || !groupSpec.getColumnSpec(i).getName().equals(orderPivotColumnName)) {
outcells[pivotCellIndex] = cell;
} else {
// temp retain column (type:IntCell)
final int retainIndex = outcells.length - 1;
if (outcells[retainIndex] == null) {
outcells[retainIndex] = cell;
} else {
final DataValueComparator comp = pivotSpec.getColumnSpec(retainIndex).getType().getComparator();
if (comp.compare(outcells[retainIndex], cell) > 0) {
outcells[retainIndex] = cell;
}
}
}
}
}
exec.setProgress(rowIndex++ / (double) totalRowCount, String.format("Group \"%s\" (%d/%d)", origRowKey, rowIndex, totalRowCount));
exec.checkCanceled();
}
// write last group - if any.
if (outcells[0] != null) {
write(buf, outcells);
}
buf.close();
return buf.getTable();
}
use of org.knime.core.data.DataValueComparator in project knime-core by knime.
the class BinByDictionaryNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final DataTableSpec[] ins, final BufferedDataTable port1Table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
final BinByDictionaryConfiguration c = m_configuration;
if (c == null) {
throw new InvalidSettingsException("No configuration set");
}
String lowerColPort1 = c.getLowerBoundColumnPort1();
String upperColPort1 = c.getUpperBoundColumnPort1();
String labelCol = c.getLabelColumnPort1();
String valueColumnPort0 = c.getValueColumnPort0();
DataType valueType = null;
final int valueColIndexPort0 = ins[0].findColumnIndex(valueColumnPort0);
if (valueColIndexPort0 < 0) {
throw new InvalidSettingsException("No such column in 1st input: " + valueColumnPort0);
} else {
valueType = ins[0].getColumnSpec(valueColIndexPort0).getType();
}
final boolean isLowerBoundInclusive = c.isLowerBoundInclusive();
final boolean isUpperBoundInclusive = c.isUpperBoundInclusive();
final int lowerBoundColIndex;
final DataValueComparator lowerBoundComparator;
if (lowerColPort1 == null) {
// no lower bound specified
lowerBoundComparator = null;
lowerBoundColIndex = -1;
} else {
lowerBoundColIndex = ins[1].findColumnIndex(lowerColPort1);
if (lowerBoundColIndex < 0) {
throw new InvalidSettingsException("No such column in 2nd input: " + lowerColPort1);
}
DataType type = ins[1].getColumnSpec(lowerBoundColIndex).getType();
if (valueType.equals(type)) {
lowerBoundComparator = valueType.getComparator();
} else {
setWarningMessage("The types of the comparison and value " + "columns are not equal, comparison might be done " + "based on lexicographical string representation!");
lowerBoundComparator = DataType.getCommonSuperType(valueType, type).getComparator();
}
}
final int upperBoundColIndex;
final DataValueComparator upperBoundComparator;
if (upperColPort1 == null) {
// no upper bound specified
upperBoundColIndex = -1;
upperBoundComparator = null;
} else {
upperBoundColIndex = ins[1].findColumnIndex(upperColPort1);
if (upperBoundColIndex < 0) {
throw new InvalidSettingsException("No such column in 2nd input: " + upperColPort1);
}
DataType type = ins[1].getColumnSpec(upperBoundColIndex).getType();
if (valueType.equals(type)) {
upperBoundComparator = valueType.getComparator();
} else {
setWarningMessage("The types of the comparison and value " + "columns are not equal, comparison might be done " + "based on lexicographical string representation!");
upperBoundComparator = DataType.getCommonSuperType(valueType, type).getComparator();
}
}
final int labelColIndex = ins[1].findColumnIndex(labelCol);
if (labelColIndex < 0) {
throw new InvalidSettingsException("No such column in 2nd input: " + labelCol);
}
DataColumnSpecCreator labelColSpecCreator = new DataColumnSpecCreator(ins[1].getColumnSpec(labelColIndex));
labelColSpecCreator.removeAllHandlers();
String name = DataTableSpec.getUniqueColumnName(ins[0], labelCol);
labelColSpecCreator.setName(name);
final DataColumnSpec labelColSpec = labelColSpecCreator.createSpec();
final BinByDictionaryRuleSet ruleSet = new BinByDictionaryRuleSet(lowerBoundComparator, isLowerBoundInclusive, upperBoundComparator, isUpperBoundInclusive, c.isUseBinarySearch());
if (port1Table != null) {
// in execute
long rowCount = port1Table.size();
long current = 1;
for (DataRow r : port1Table) {
DataCell lower = lowerBoundColIndex < 0 ? null : r.getCell(lowerBoundColIndex);
DataCell upper = upperBoundColIndex < 0 ? null : r.getCell(upperBoundColIndex);
DataCell label = r.getCell(labelColIndex);
ruleSet.addRule(lower, upper, label);
exec.setProgress(/*no prog */
0.0, "Indexing rule table " + (current++) + "/" + rowCount + " (\"" + r.getKey() + "\")");
exec.checkCanceled();
}
}
ruleSet.close();
SingleCellFactory fac = new SingleCellFactory(ruleSet.getSize() > 100, labelColSpec) {
@Override
public DataCell getCell(final DataRow row) {
DataCell value = row.getCell(valueColIndexPort0);
if (value.isMissing()) {
return DataType.getMissingCell();
}
DataCell result = ruleSet.search(value);
if (result != null) {
return result;
}
if (c.isFailIfNoRuleMatches()) {
throw new RuntimeException("No rule matched for row \"" + row.getKey() + "\", value: \"" + value + "\"");
}
return DataType.getMissingCell();
}
};
ColumnRearranger rearranger = new ColumnRearranger(ins[0]);
rearranger.append(fac);
return rearranger;
}
use of org.knime.core.data.DataValueComparator in project knime-core by knime.
the class SortedCorrelationComputer method getRanks.
/**
* The Ranks are always only calculated for one column at a time. This might be slower but
* is decreasing the amount of necessary memory.
*
* @param bdt the data table to convert
* @param colIndex the column to rank
* @param exec the execution context to report progress to
* @return a Buffered Data Table where column colIndex is replaced with a numerical column containing its rank
* @throws CanceledExecutionException
*/
private BufferedDataTable getRanks(final BufferedDataTable bdt, final int colIndex, final ExecutionContext exec) throws CanceledExecutionException {
final DataValueComparator colComparators = bdt.getDataTableSpec().getColumnSpec(colIndex).getType().getComparator();
LinkedList<SortablePair> myList = new LinkedList<SortablePair>();
// read the data
int counter = 0;
for (DataRow row : bdt) {
DataCell dCell = row.getCell(colIndex);
myList.add(new SortablePair(counter++, dCell));
exec.checkCanceled();
}
exec.setProgress(0.2);
// sort the data by value
Collections.sort(myList, new Comparator<SortablePair>() {
@Override
public int compare(final SortablePair dr1, final SortablePair dr2) {
if (dr1 == dr2) {
return 0;
}
if (dr1 == null) {
return 1;
}
if (dr2 == null) {
return -1;
}
return colComparators.compare(dr1.getDataCell(), dr2.getDataCell());
}
});
exec.setProgress(0.4);
// check for duplicates and adjust their rank
counter = 1;
DataCell lastCell = null;
HashMap<DataCell, Double> duplicateValues = new HashMap<>();
int nrOfDups = 0;
for (SortablePair p : myList) {
exec.checkCanceled();
double rank = 1.0 * counter++;
DataCell currentCell = p.getDataCell();
if (lastCell != null) {
// init last cell
if (colComparators.compare(lastCell, currentCell) == 0) {
if (duplicateValues.containsKey(lastCell)) {
nrOfDups++;
duplicateValues.put(lastCell, ((duplicateValues.get(lastCell) * (nrOfDups - 1) + rank) / nrOfDups));
} else {
duplicateValues.put(lastCell, (rank - 0.5));
nrOfDups = 2;
}
} else {
nrOfDups = 0;
}
}
lastCell = p.getDataCell();
p.setRank(rank);
}
exec.setProgress(0.6);
// resolve duplicates
if (duplicateValues.size() > 0) {
// change the duplicates
for (SortablePair p : myList) {
exec.checkCanceled();
Double d = duplicateValues.get(p.getDataCell());
if (d != null) {
p.setRank(d);
}
}
}
exec.setProgress(0.8);
// sort the data by counter backwards
Collections.sort(myList, new Comparator<SortablePair>() {
@Override
public int compare(final SortablePair dr1, final SortablePair dr2) {
if (dr1 == dr2) {
return 0;
}
if (dr1 == null) {
return 1;
}
if (dr2 == null) {
return -1;
}
return dr1.getOrignalOrder() - dr2.getOrignalOrder();
}
});
return replace(bdt, colIndex, myList, exec.createSubExecutionContext(0.2));
}
Aggregations