use of org.knime.core.data.DataType in project knime-core by knime.
the class Distances method getMinkowskiDistance.
/**
* Calculates the Minkowski distance between a regular <code>DataRow</code>
* and a <code>SotaTreeCell</code>. If fuzzy is set true only columns with
* cells containing numbers are used to compute the distance. If the number
* of columns, which are used to compute the distance, contained in the
* given <code>DataRow</code> is different to the number of cells contained
* in the given <code>SotaTreeCell</code>, only the first <i>n</i> columns
* of the <code>DataRow</code> or <i>n</i> cells of the
* <code>SotaTreeCell</code> are used to compute the distance. The rest is
* simply ignored.
* The given power specifies the distance kind, i.e. if power is set to 2
* the euclidean distance will be computed.
*
* @param power The power to use.
* @param row The row to compute the distance.
* @param cell The cell to compute the distance.
* @param fuzzy If true only fuzzy data is taken into account, if
* <code>false</code> only number data.
*
* @return Minkowski distance between the two rows.
*/
public static double getMinkowskiDistance(final int power, final DataRow row, final SotaTreeCell cell, final boolean fuzzy) {
int col = 0;
double distance = 0;
for (int i = 0; i < row.getNumCells(); i++) {
DataType type = row.getCell(i).getType();
if (SotaUtil.isNumberType(type) && !fuzzy) {
if (col < cell.getData().length) {
distance += Math.pow((cell.getData()[col].getValue() - ((DoubleValue) row.getCell(i)).getDoubleValue()), power);
col++;
}
} else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
if (col < cell.getData().length) {
distance += Math.pow(cell.getData()[col].getValue() - SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i)), power);
col++;
}
}
}
return Math.pow(distance, (double) 1 / (double) power);
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class Distances method getStandardDeviation.
/**
* Returns the standard deviation of the given row.
*
* @param row the row to compute the standard deviation of.
* @param fuzzy if <code>true</code> only fuzzy data is respected, if
* <code>false</code> only number data
* @return the standard deviation of the given row
*/
public static double getStandardDeviation(final DataRow row, final boolean fuzzy) {
double dev = 0;
int count = 0;
double mean = Distances.getMean(row, fuzzy);
for (int i = 0; i < row.getNumCells(); i++) {
DataType type = row.getCell(i).getType();
if (SotaUtil.isNumberType(type) && !fuzzy) {
dev += Math.pow((((DoubleValue) row.getCell(i)).getDoubleValue() - mean), 2);
count++;
} else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
dev += Math.pow((SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i)) - mean), 2);
count++;
}
}
return Math.sqrt((dev / (count - 1)));
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class Distances method getCorrelationDistance.
/**
* Returns the coefficient of correlation distance between the rows with a
* given offset.
*
* @param row1 first row to compute the coefficient of correlation
* @param row2 second rell to compute the coefficient of correlation
* @param offset offset to substract coefficient of correlation from
* @param abs flags if correlations distance should be used absolute
* @param fuzzy if <code>true</code> only fuzzy data is respected, if
* <code>false</code> only number data
* @return the coefficient of correlation between given rows
*/
public static double getCorrelationDistance(final DataRow row1, final DataRow row2, final double offset, final boolean abs, final boolean fuzzy) {
double dist = 0;
double meanRow1 = Distances.getMean(row1, fuzzy);
double meanRow2 = Distances.getMean(row2, fuzzy);
double devRow1 = Distances.getStandardDeviation(row1, fuzzy);
double devRow2 = Distances.getStandardDeviation(row2, fuzzy);
if (devRow1 == 0 || devRow2 == 0) {
return (offset - 0);
}
int count = 0;
for (int i = 0; i < row1.getNumCells(); i++) {
DataType type = row1.getCell(i).getType();
if (SotaUtil.isNumberType(type) && !fuzzy) {
dist += (((DoubleValue) row1.getCell(i)).getDoubleValue() - meanRow1) * (((DoubleValue) row2.getCell(i)).getDoubleValue() - meanRow2);
count++;
} else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
dist += (SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row1.getCell(i)) - meanRow1) * (SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row2.getCell(i)) - meanRow2);
count++;
}
}
dist = offset - (dist / (count * devRow1 * devRow2));
if (abs) {
dist = Math.abs(dist);
}
return dist;
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class Distances method getCosinusDistance.
/**
* Returns the cosinus distance between the cells values and the number
* cells of the given row with a given offset.
*
* @param row row to compute the cosinus distance of
* @param cell cell to compute the cosinus distance of
* @param offset offset to substract cosinus distance from
* @param fuzzy if <code>true</code> only fuzzy data is respected, if
* <code>false</code> only number data
* @return the cosinus distance between given row and cell
*/
public static double getCosinusDistance(final DataRow row, final SotaTreeCell cell, final double offset, final boolean fuzzy) {
int col = 0;
double distance = 0;
double vectorMultRes = 0;
double vectorLength = 0;
double cellLength = 0;
for (int i = 0; i < row.getNumCells(); i++) {
DataType type = row.getCell(i).getType();
if (SotaUtil.isNumberType(type) && !fuzzy) {
if (col < cell.getData().length) {
vectorMultRes += cell.getData()[col].getValue() * ((DoubleValue) row.getCell(i)).getDoubleValue();
vectorLength += Math.pow(((DoubleValue) row.getCell(i)).getDoubleValue(), 2);
cellLength += Math.pow(cell.getData()[col].getValue(), 2);
col++;
}
} else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
if (col < cell.getData().length) {
vectorMultRes += cell.getData()[col].getValue() * SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i));
vectorLength += Math.pow(SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i)), 2);
cellLength += Math.pow(cell.getData()[col].getValue(), 2);
col++;
}
}
}
vectorLength = Math.sqrt(vectorLength);
cellLength = Math.sqrt(cellLength);
distance = vectorMultRes / (vectorLength * cellLength);
distance = offset - distance;
return distance;
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class Smoter method createFinalSpec.
/**
* Creates the out spec when <i>smoting</i> the table with
* <code>inSpec</code>. It replaces the data types of all
* {@link DoubleValue}-compatible columns by {@link DoubleCell#TYPE}.
*
* @param inSpec the table spec of the input table
* @return the output table spec
*/
static DataTableSpec createFinalSpec(final DataTableSpec inSpec) {
final int colCount = inSpec.getNumColumns();
DataColumnSpec[] colSpecs = new DataColumnSpec[colCount];
for (int i = 0; i < colSpecs.length; i++) {
DataColumnSpec cur = inSpec.getColumnSpec(i);
// column really once the Smoter adds new records
if (cur.getType().isCompatible(DoubleValue.class)) {
DataColumnSpecCreator colspeccreator = new DataColumnSpecCreator(cur);
DataType oldType = cur.getType();
// may be there was some strange double value type in the
// column, use supertype of old type and DoubleCell.TYPE
DataType newType = DataType.getCommonSuperType(oldType, DoubleCell.TYPE);
colspeccreator.setType(newType);
// domain isn't change becaust it's a convex operation
// (may be I should validate this statement - min and
// max depends on the comparator being used)
colSpecs[i] = colspeccreator.createSpec();
} else {
colSpecs[i] = cur;
}
}
return new DataTableSpec(colSpecs);
}
Aggregations