use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class Distances method getStandardDeviation.
/**
* Returns the standard deviation of the given row.
*
* @param row the row to compute the standard deviation of.
* @param fuzzy if <code>true</code> only fuzzy data is respected, if
* <code>false</code> only number data
* @return the standard deviation of the given row
*/
public static double getStandardDeviation(final DataRow row, final boolean fuzzy) {
double dev = 0;
int count = 0;
double mean = Distances.getMean(row, fuzzy);
for (int i = 0; i < row.getNumCells(); i++) {
DataType type = row.getCell(i).getType();
if (SotaUtil.isNumberType(type) && !fuzzy) {
dev += Math.pow((((DoubleValue) row.getCell(i)).getDoubleValue() - mean), 2);
count++;
} else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
dev += Math.pow((SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i)) - mean), 2);
count++;
}
}
return Math.sqrt((dev / (count - 1)));
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class Distances method getCorrelationDistance.
/**
* Returns the coefficient of correlation distance between the rows with a
* given offset.
*
* @param row1 first row to compute the coefficient of correlation
* @param row2 second rell to compute the coefficient of correlation
* @param offset offset to substract coefficient of correlation from
* @param abs flags if correlations distance should be used absolute
* @param fuzzy if <code>true</code> only fuzzy data is respected, if
* <code>false</code> only number data
* @return the coefficient of correlation between given rows
*/
public static double getCorrelationDistance(final DataRow row1, final DataRow row2, final double offset, final boolean abs, final boolean fuzzy) {
double dist = 0;
double meanRow1 = Distances.getMean(row1, fuzzy);
double meanRow2 = Distances.getMean(row2, fuzzy);
double devRow1 = Distances.getStandardDeviation(row1, fuzzy);
double devRow2 = Distances.getStandardDeviation(row2, fuzzy);
if (devRow1 == 0 || devRow2 == 0) {
return (offset - 0);
}
int count = 0;
for (int i = 0; i < row1.getNumCells(); i++) {
DataType type = row1.getCell(i).getType();
if (SotaUtil.isNumberType(type) && !fuzzy) {
dist += (((DoubleValue) row1.getCell(i)).getDoubleValue() - meanRow1) * (((DoubleValue) row2.getCell(i)).getDoubleValue() - meanRow2);
count++;
} else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
dist += (SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row1.getCell(i)) - meanRow1) * (SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row2.getCell(i)) - meanRow2);
count++;
}
}
dist = offset - (dist / (count * devRow1 * devRow2));
if (abs) {
dist = Math.abs(dist);
}
return dist;
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class Distances method getCosinusDistance.
/**
* Returns the cosinus distance between the cells values and the number
* cells of the given row with a given offset.
*
* @param row row to compute the cosinus distance of
* @param cell cell to compute the cosinus distance of
* @param offset offset to substract cosinus distance from
* @param fuzzy if <code>true</code> only fuzzy data is respected, if
* <code>false</code> only number data
* @return the cosinus distance between given row and cell
*/
public static double getCosinusDistance(final DataRow row, final SotaTreeCell cell, final double offset, final boolean fuzzy) {
int col = 0;
double distance = 0;
double vectorMultRes = 0;
double vectorLength = 0;
double cellLength = 0;
for (int i = 0; i < row.getNumCells(); i++) {
DataType type = row.getCell(i).getType();
if (SotaUtil.isNumberType(type) && !fuzzy) {
if (col < cell.getData().length) {
vectorMultRes += cell.getData()[col].getValue() * ((DoubleValue) row.getCell(i)).getDoubleValue();
vectorLength += Math.pow(((DoubleValue) row.getCell(i)).getDoubleValue(), 2);
cellLength += Math.pow(cell.getData()[col].getValue(), 2);
col++;
}
} else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
if (col < cell.getData().length) {
vectorMultRes += cell.getData()[col].getValue() * SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i));
vectorLength += Math.pow(SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i)), 2);
cellLength += Math.pow(cell.getData()[col].getValue(), 2);
col++;
}
}
}
vectorLength = Math.sqrt(vectorLength);
cellLength = Math.sqrt(cellLength);
distance = vectorMultRes / (vectorLength * cellLength);
distance = offset - distance;
return distance;
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class Smoter method distance.
/* Determines the Euclidean distance of two rows. */
private double distance(final DataRow row1, final DataRow row2) {
double d = 0.0;
for (int i = 0; i < row1.getNumCells(); i++) {
DataType t = m_inTable.getDataTableSpec().getColumnSpec(i).getType();
if (t.isCompatible(DoubleValue.class)) {
double dis;
DataCell fCell = row1.getCell(i);
DataCell tCell = row2.getCell(i);
if (fCell.isMissing() || tCell.isMissing()) {
dis = 0.0;
} else {
DoubleValue cell1 = (DoubleValue) fCell;
DoubleValue cell2 = (DoubleValue) tCell;
dis = cell1.getDoubleValue() - cell2.getDoubleValue();
}
d += dis * dis;
}
}
return Math.sqrt(d);
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class AggregateOutputNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
// retrieve variables from the stack which the head of this
// loop hopefully put there:
int count;
int maxCount;
try {
count = peekFlowVariableInt("currentIteration");
maxCount = peekFlowVariableInt("maxIterations");
} catch (NoSuchElementException e) {
throw new Exception("No matching Loop Start node!", e);
}
if (count < 0 || count >= maxCount) {
throw new Exception("Conflicting loop variables, count is " + count + " and max count is " + maxCount);
}
final BufferedDataTable in = inData[0];
final DataTableSpec inSpec = in.getDataTableSpec();
if (count == 0) {
m_firstIterationSpec = in.getDataTableSpec();
m_predictionTable = exec.createDataContainer(createPredictionSpec(in.getDataTableSpec()));
} else if (m_predictionTable == null) {
throw new Exception("Loop Head claims this is NOT the first iteration" + " but the tail believes it is?!");
} else {
if (!inSpec.equalStructure(m_firstIterationSpec)) {
StringBuilder error = new StringBuilder("Input table's structure differs from reference " + "(first iteration) table: ");
if (inSpec.getNumColumns() != m_firstIterationSpec.getNumColumns()) {
error.append("different column counts ");
error.append(inSpec.getNumColumns());
error.append(" vs. ").append(m_firstIterationSpec.getNumColumns());
} else {
for (int i = 0; i < inSpec.getNumColumns(); i++) {
DataColumnSpec inCol = inSpec.getColumnSpec(i);
DataColumnSpec predCol = m_firstIterationSpec.getColumnSpec(i);
if (!inCol.equalStructure(predCol)) {
error.append("Column ").append(i).append(" [");
error.append(inCol).append("] vs. [");
error.append(predCol).append("]");
}
}
}
throw new IllegalArgumentException(error.toString());
}
}
final int rowCount = in.getRowCount();
final int targetColIndex = in.getDataTableSpec().findColumnIndex(m_settings.targetColumn());
final int predictColIndex = in.getDataTableSpec().findColumnIndex(m_settings.predictionColumn());
final boolean numericMode = in.getDataTableSpec().getColumnSpec(predictColIndex).getType().isCompatible(DoubleValue.class);
ExecutionMonitor subExec = exec.createSubProgress(count == maxCount - 1 ? 0.9 : 1);
final DataCell foldNumber = new IntCell(m_foldStatistics.size());
if (numericMode) {
double errorSum = 0;
int r = 0;
for (DataRow row : in) {
RowKey key = row.getKey();
DoubleValue target = (DoubleValue) row.getCell(targetColIndex);
DoubleValue predict = (DoubleValue) row.getCell(predictColIndex);
double d = (target.getDoubleValue() - predict.getDoubleValue());
errorSum += d * d;
r++;
if (m_settings.addFoldId()) {
m_predictionTable.addRowToTable(new AppendedColumnRow(row.getKey(), row, foldNumber));
} else {
m_predictionTable.addRowToTable(row);
}
subExec.setProgress(r / (double) rowCount, "Calculating output " + r + "/" + rowCount + " (\"" + key + "\")");
subExec.checkCanceled();
}
DataRow stats = new DefaultRow(new RowKey("fold " + m_foldStatistics.size()), new DoubleCell(errorSum), new DoubleCell(errorSum / rowCount), new IntCell(rowCount));
m_foldStatistics.add(stats);
} else {
int incorrect = 0;
int r = 0;
for (DataRow row : in) {
RowKey key = row.getKey();
DataCell target = row.getCell(targetColIndex);
DataCell predict = row.getCell(predictColIndex);
if (!target.equals(predict)) {
incorrect++;
}
r++;
if (m_settings.addFoldId()) {
m_predictionTable.addRowToTable(new AppendedColumnRow(row.getKey(), row, foldNumber));
} else {
m_predictionTable.addRowToTable(row);
}
subExec.setProgress(r / (double) rowCount, "Calculating output " + r + "/" + rowCount + " (\"" + key + "\")");
subExec.checkCanceled();
}
DataRow stats = new DefaultRow(new RowKey("fold " + m_foldStatistics.size()), new DoubleCell(100.0 * incorrect / rowCount), new IntCell(rowCount), new IntCell(incorrect));
m_foldStatistics.add(stats);
}
if (count < maxCount - 1) {
continueLoop();
return new BufferedDataTable[2];
} else {
BufferedDataContainer cont = exec.createDataContainer(numericMode ? NUMERIC_STATISTICS_SPEC : NOMINAL_STATISTICS_SPEC);
for (DataRow row : m_foldStatistics) {
cont.addRowToTable(row);
}
cont.close();
m_predictionTable.close();
return new BufferedDataTable[] { m_predictionTable.getTable(), cont.getTable() };
}
}
Aggregations