use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class KnnNodeModel method checkInputTables.
/**
* Checks if the two input tables are correct and fills the last two
* arguments with sensible values.
*
* @param inSpecs the input tables' specs
* @param featureColumns a list that gets filled with the feature columns'
* indices; all columns with {@link DoubleValue}s are used as
* features
* @param firstToSecond a map that afterwards maps the indices of the
* feature columns in the first table to the corresponding
* columns from the second table
* @throws InvalidSettingsException if the two tables are not compatible
*/
private void checkInputTables(final DataTableSpec[] inSpecs, final List<Integer> featureColumns, final Map<Integer, Integer> firstToSecond) throws InvalidSettingsException {
if (!inSpecs[0].containsCompatibleType(DoubleValue.class)) {
throw new InvalidSettingsException("First input table does not contain a numeric column.");
}
if (!inSpecs[0].containsCompatibleType(StringValue.class)) {
throw new InvalidSettingsException("First input table does not contain a class column of type " + "string.");
}
int i = 0;
for (DataColumnSpec cs : inSpecs[0]) {
if (cs.getType().isCompatible(DoubleValue.class)) {
featureColumns.add(i);
} else if (!cs.getName().equals(m_settings.classColumn())) {
setWarningMessage("Input table contains more than one non-numeric column; they will be ignored.");
}
i++;
}
for (int k : featureColumns) {
final DataColumnSpec cs0 = inSpecs[0].getColumnSpec(k);
int secondColIndex = inSpecs[1].findColumnIndex(cs0.getName());
if (secondColIndex == -1) {
throw new InvalidSettingsException("Second input table does not contain a column: '" + cs0.getName() + "'");
}
final DataColumnSpec cs1 = inSpecs[1].getColumnSpec(secondColIndex);
if (cs0.getName().equals(cs1.getName()) && cs1.getType().isCompatible(DoubleValue.class)) {
firstToSecond.put(k, secondColIndex);
} else {
throw new InvalidSettingsException("Column '" + cs1.getName() + "' from second table is not compatible " + "with corresponding column '" + cs0.getName() + "' from first table.");
}
}
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class Distances method getCosinusDistance.
/**
* Computes the cosinus distance between the given two rows, with given
* offset.
*
* @param row1 first row to compute the cosinus distance of
* @param row2 second row to compute the cosinus distance of
* @param offset offset to subtract cosinus distance from
* @param fuzzy if <code>true</code> only fuzzy data is respected, if
* <code>false</code> only number data
* @return the cosinus distance between the given two rows
*/
public static double getCosinusDistance(final DataRow row1, final DataRow row2, final double offset, final boolean fuzzy) {
double distance = 0;
double vectorMultRes = 0;
double vector1Length = 0;
double vector2Length = 0;
for (int i = 0; i < row1.getNumCells(); i++) {
DataType type1 = row1.getCell(i).getType();
DataType type2 = row2.getCell(i).getType();
if (SotaUtil.isNumberType(type1) && SotaUtil.isNumberType(type2) && !fuzzy) {
vectorMultRes += ((DoubleValue) row1.getCell(i)).getDoubleValue() * ((DoubleValue) row2.getCell(i)).getDoubleValue();
vector1Length += Math.pow(((DoubleValue) row1.getCell(i)).getDoubleValue(), 2);
vector2Length += Math.pow(((DoubleValue) row2.getCell(i)).getDoubleValue(), 2);
} else if (SotaUtil.isFuzzyIntervalType(type1) && SotaUtil.isFuzzyIntervalType(type2) && fuzzy) {
vectorMultRes += SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row1.getCell(i)) * SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row2.getCell(i));
vector1Length += Math.pow(SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row1.getCell(i)), 2);
vector2Length += Math.pow(SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row2.getCell(i)), 2);
}
}
vector1Length = Math.sqrt(vector1Length);
vector2Length = Math.sqrt(vector2Length);
distance = vectorMultRes / (vector1Length * vector2Length);
if (offset != 0) {
distance = offset - distance;
}
return distance;
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class CreateBitVectorNodeModel method calculateMeanValues.
private double[] calculateMeanValues(final ExecutionMonitor exec, final BufferedDataTable input, final int[] colIndices) throws CanceledExecutionException {
double[] meanValues = new double[input.getDataTableSpec().getNumColumns()];
long nrOfRows = 0;
final long rowCount = input.size();
for (DataRow row : input) {
exec.setProgress(nrOfRows / (double) rowCount, "Computing mean value. Processing row " + nrOfRows + " of " + rowCount);
exec.checkCanceled();
for (int i = 0; i < colIndices.length; i++) {
DataCell cell = row.getCell(colIndices[i]);
if (cell.isMissing()) {
continue;
}
if (cell instanceof DoubleValue) {
meanValues[i] += ((DoubleValue) cell).getDoubleValue();
} else {
throw new RuntimeException("Found incompatible type in row " + row.getKey().getString());
}
}
nrOfRows++;
}
for (int i = 0; i < meanValues.length; i++) {
meanValues[i] = meanValues[i] / nrOfRows;
}
return meanValues;
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class CAIMDiscretizationNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
// measure the time
long startTime = System.currentTimeMillis();
// empty model
if (m_includedColumnNames.getIncludeList() == null || m_includedColumnNames.getIncludeList().size() == 0) {
return new PortObject[] { inData[0], new DiscretizationModel() };
}
LOGGER.debug("Start discretizing.");
// as the algorithm is for binary class problems only
// (positive, negative) the algorithm is performed for each class value
// labeled as positive class and the rest as negative
exec.setProgress(0.0, "Preparing...");
// check input data
BufferedDataTable data = (BufferedDataTable) inData[0];
// get class column index
m_classifyColumnIndex = data.getDataTableSpec().findColumnIndex(m_classColumnName.getStringValue());
assert m_classifyColumnIndex > -1;
// create the class - index mapping
createClassFromToIndexMaps(data.getDataTableSpec());
// create the array with the result discretization schemes for
// each included column
DiscretizationScheme[] resultSchemes = new DiscretizationScheme[m_includedColumnNames.getIncludeList().size()];
// for all included columns do the discretization
int currentColumn = 0;
for (String includedColumnName : m_includedColumnNames.getIncludeList()) {
LOGGER.debug("Process column: " + includedColumnName);
exec.setProgress("Discretizing column '" + includedColumnName + "'");
ExecutionContext subExecPerColumn = exec.createSubExecutionContext(1.0D / m_includedColumnNames.getIncludeList().size());
subExecPerColumn.checkCanceled();
// never discretize the column index (should never happen)
if (m_classColumnName.getStringValue().equals(includedColumnName)) {
continue;
}
// determine the column index of the current column
int columnIndex = data.getDataTableSpec().findColumnIndex(includedColumnName);
DataColumnDomain domain = data.getDataTableSpec().getColumnSpec(columnIndex).getDomain();
double minValue = ((DoubleValue) domain.getLowerBound()).getDoubleValue();
double maxValue = ((DoubleValue) domain.getUpperBound()).getDoubleValue();
// find all distinct values of the column and create
// a table with all possible interval boundaries (midpoint value of
// adjacent values)
subExecPerColumn.setProgress("Find possible boundaries.");
BoundaryScheme boundaryScheme = null;
// create subExec for sorting
ExecutionContext subExecSort = subExecPerColumn.createSubExecutionContext(0.1);
// long t1 = System.currentTimeMillis();
if (m_classOptimizedVersion) {
boundaryScheme = createAllIntervalBoundaries(data, columnIndex, subExecSort);
} else {
boundaryScheme = createAllIntervalBoundaries2(data, columnIndex, subExecSort);
}
subExecSort.setProgress(1.0D);
// long t2 = System.currentTimeMillis() - t1;
// LOGGER.error("Create boundaries time: " + (t2 / 1000.0)
// + " optimized: " + m_classOptimizedVersion);
// LOGGER.error("Boundaries: " + boundaryScheme.getHead());
LinkedDouble allIntervalBoundaries = boundaryScheme.getHead();
// create the initial discretization scheme
DiscretizationScheme discretizationScheme = new DiscretizationScheme(new Interval(minValue, maxValue, true, true));
double globalCAIM = 0;
// performe the iterative search for the best intervals
int numInsertedBounds = 0;
double currentCAIM = 0;
// create subExec for inserted bounds
ExecutionContext subExecBounds = subExecPerColumn.createSubExecutionContext(0.9);
while (currentCAIM > globalCAIM || numInsertedBounds < m_classValues.length - 1) {
subExecPerColumn.checkCanceled();
// create subExec for counting
ExecutionContext subExecCount = subExecBounds.createSubExecutionContext(1.0D / m_classValues.length);
// LOGGER.debug("Inserted bounds: " + numInsertedBounds);
// LOGGER.debug("intervall boundaries: " +
// allIntervalBoundaries);
// for all possible interval boundaries
// insert each one, calculate the caim value and add
// the one with the biggest caim
LinkedDouble intervalBoundary = allIntervalBoundaries.m_next;
currentCAIM = 0;
LinkedDouble bestBoundary = null;
long currentCountedBoundaries = 0;
while (intervalBoundary != null) {
subExecPerColumn.checkCanceled();
// set progress
currentCountedBoundaries++;
subExecCount.setProgress((double) currentCountedBoundaries / (double) boundaryScheme.getNumBoundaries(), "Count for possible boundary " + currentCountedBoundaries + " of " + boundaryScheme.getNumBoundaries());
// LOGGER.debug("current caim: " + currentCAIM);
DiscretizationScheme tentativeDS = new DiscretizationScheme(discretizationScheme);
tentativeDS.insertBound(intervalBoundary.m_value);
// create the quanta matrix
QuantaMatrix2D quantaMatrix = new QuantaMatrix2D(tentativeDS, m_classValueToIndexMap);
// pass the data for filling the matrix
quantaMatrix.countData(data, columnIndex, m_classifyColumnIndex);
// calculate the caim
double caim = quantaMatrix.calculateCaim();
if (caim > currentCAIM) {
currentCAIM = caim;
bestBoundary = intervalBoundary;
}
intervalBoundary = intervalBoundary.m_next;
}
// if there is no best boundary, break the first while loop
if (bestBoundary == null) {
break;
}
// in this case accept the best discretization scheme
if (currentCAIM > globalCAIM || numInsertedBounds < m_classValues.length) {
int numIntervals = discretizationScheme.getNumIntervals();
discretizationScheme.insertBound(bestBoundary.m_value);
// remove the linked list element from the list
bestBoundary.remove();
globalCAIM = currentCAIM;
if (numIntervals < discretizationScheme.getNumIntervals()) {
numInsertedBounds++;
subExecPerColumn.setProgress("Inserted bound " + numInsertedBounds);
// LOGGER.debug("Inserted boundary: "
// + bestBoundary.m_value);
} else {
throw new IllegalStateException("Only usefull bounds should be inserted: " + bestBoundary.m_value);
}
}
subExecCount.setProgress(1.0D);
}
resultSchemes[currentColumn] = discretizationScheme;
subExecBounds.setProgress(1.0D);
// ensure the full progress is set for this iteration
subExecPerColumn.setProgress(1.0D);
currentColumn++;
}
// set the model
DataTableSpec modelSpec = createModelSpec(m_includedColumnNames, data.getDataTableSpec());
m_discretizationModel = new DiscretizationModel(resultSchemes, modelSpec);
// create an output table that replaces the included columns by
// interval values
BufferedDataTable resultTable = createResultTable(exec, data, m_discretizationModel);
// log the runtime of the execute method
long runtime = System.currentTimeMillis() - startTime;
LOGGER.debug("Binning runtime: " + (runtime / 1000.0) + " sec.");
return new PortObject[] { resultTable, m_discretizationModel };
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class CAIMDiscretizationNodeModel method createAllIntervalBoundaries2.
/**
* Sorts the data table in ascending order on the given column, then all
* distinct values are determined and finally a new table is created that
* holds the minimum, the maximum value and the midpoints of all adjacent
* values. These represent all possible boundaries.
*
* @param table the table with the data
* @param columnIndex the column of interest
* @param exec the execution context to set the progress
*/
private BoundaryScheme createAllIntervalBoundaries2(final BufferedDataTable table, final int columnIndex, final ExecutionContext exec) throws Exception {
// sort the data accordint to the column index
List<String> sortColumn = new ArrayList<String>();
sortColumn.add(table.getDataTableSpec().getColumnSpec(columnIndex).getName());
boolean[] sortOrder = new boolean[1];
// in ascending order
sortOrder[0] = true;
SortedTable sortedTable = new SortedTable(table, sortColumn, sortOrder, true, exec);
// the first different value is the minimum value of the sorted list
RowIterator rowIterator = sortedTable.iterator();
double lastDifferentValue = ((DoubleValue) rowIterator.next().getCell(columnIndex)).getDoubleValue();
// create the head of the linked double list
// marked by NaN
LinkedDouble head = new LinkedDouble(Double.NEGATIVE_INFINITY);
// set the last added element
LinkedDouble lastAdded = head;
// count the number of boundaries
int numBoundaries = 0;
while (rowIterator.hasNext()) {
DataRow row = rowIterator.next();
DataCell cell = row.getCell(columnIndex);
double value = ((DoubleValue) cell).getDoubleValue();
if (value != lastDifferentValue) {
// a new boundary is the midpoint
double newBoundary = (value + lastDifferentValue) / 2.0D;
lastDifferentValue = value;
// add the new midpoint boundary to the linked list
lastAdded.m_next = new LinkedDouble(newBoundary);
numBoundaries++;
lastAdded.m_next.m_previous = lastAdded;
lastAdded = lastAdded.m_next;
}
}
return new BoundaryScheme(head, numBoundaries);
}
Aggregations