Search in sources :

Example 86 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class Distances method getMinkowskiDistance.

/**
 * Calculates the Minkowski distance between a regular <code>DataRow</code>
 * and a <code>SotaTreeCell</code>. If fuzzy is set true only columns with
 * cells containing numbers are used to compute the distance. If the number
 * of columns, which are used to compute the distance, contained in the
 * given <code>DataRow</code> is different to the number of cells contained
 * in the given <code>SotaTreeCell</code>, only the first <i>n</i> columns
 * of the <code>DataRow</code> or <i>n</i> cells of the
 * <code>SotaTreeCell</code> are used to compute the distance. The rest is
 * simply ignored.
 * The given power specifies the distance kind, i.e. if power is set to 2
 * the euclidean distance will be computed.
 *
 * @param power The power to use.
 * @param row The row to compute the distance.
 * @param cell The cell to compute the distance.
 * @param fuzzy If true only fuzzy data is taken into account, if
 * <code>false</code> only number data.
 *
 * @return Minkowski distance between the two rows.
 */
public static double getMinkowskiDistance(final int power, final DataRow row, final SotaTreeCell cell, final boolean fuzzy) {
    int col = 0;
    double distance = 0;
    for (int i = 0; i < row.getNumCells(); i++) {
        DataType type = row.getCell(i).getType();
        if (SotaUtil.isNumberType(type) && !fuzzy) {
            if (col < cell.getData().length) {
                distance += Math.pow((cell.getData()[col].getValue() - ((DoubleValue) row.getCell(i)).getDoubleValue()), power);
                col++;
            }
        } else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
            if (col < cell.getData().length) {
                distance += Math.pow(cell.getData()[col].getValue() - SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i)), power);
                col++;
            }
        }
    }
    return Math.pow(distance, (double) 1 / (double) power);
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType)

Example 87 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class Distances method getStandardDeviation.

/**
 * Returns the standard deviation of the given row.
 *
 * @param row the row to compute the standard deviation of.
 * @param fuzzy if <code>true</code> only fuzzy data is respected, if
 *            <code>false</code> only number data
 * @return the standard deviation of the given row
 */
public static double getStandardDeviation(final DataRow row, final boolean fuzzy) {
    double dev = 0;
    int count = 0;
    double mean = Distances.getMean(row, fuzzy);
    for (int i = 0; i < row.getNumCells(); i++) {
        DataType type = row.getCell(i).getType();
        if (SotaUtil.isNumberType(type) && !fuzzy) {
            dev += Math.pow((((DoubleValue) row.getCell(i)).getDoubleValue() - mean), 2);
            count++;
        } else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
            dev += Math.pow((SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i)) - mean), 2);
            count++;
        }
    }
    return Math.sqrt((dev / (count - 1)));
}
Also used : FuzzyIntervalValue(org.knime.core.data.FuzzyIntervalValue) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType)

Example 88 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class Distances method getCorrelationDistance.

/**
 * Returns the coefficient of correlation distance between the rows with a
 * given offset.
 *
 * @param row1 first row to compute the coefficient of correlation
 * @param row2 second rell to compute the coefficient of correlation
 * @param offset offset to substract coefficient of correlation from
 * @param abs flags if correlations distance should be used absolute
 * @param fuzzy if <code>true</code> only fuzzy data is respected, if
 *            <code>false</code> only number data
 * @return the coefficient of correlation between given rows
 */
public static double getCorrelationDistance(final DataRow row1, final DataRow row2, final double offset, final boolean abs, final boolean fuzzy) {
    double dist = 0;
    double meanRow1 = Distances.getMean(row1, fuzzy);
    double meanRow2 = Distances.getMean(row2, fuzzy);
    double devRow1 = Distances.getStandardDeviation(row1, fuzzy);
    double devRow2 = Distances.getStandardDeviation(row2, fuzzy);
    if (devRow1 == 0 || devRow2 == 0) {
        return (offset - 0);
    }
    int count = 0;
    for (int i = 0; i < row1.getNumCells(); i++) {
        DataType type = row1.getCell(i).getType();
        if (SotaUtil.isNumberType(type) && !fuzzy) {
            dist += (((DoubleValue) row1.getCell(i)).getDoubleValue() - meanRow1) * (((DoubleValue) row2.getCell(i)).getDoubleValue() - meanRow2);
            count++;
        } else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
            dist += (SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row1.getCell(i)) - meanRow1) * (SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row2.getCell(i)) - meanRow2);
            count++;
        }
    }
    dist = offset - (dist / (count * devRow1 * devRow2));
    if (abs) {
        dist = Math.abs(dist);
    }
    return dist;
}
Also used : FuzzyIntervalValue(org.knime.core.data.FuzzyIntervalValue) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType)

Example 89 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class Distances method getCosinusDistance.

/**
 * Returns the cosinus distance between the cells values and the number
 * cells of the given row with a given offset.
 *
 * @param row row to compute the cosinus distance of
 * @param cell cell to compute the cosinus distance of
 * @param offset offset to substract cosinus distance from
 * @param fuzzy if <code>true</code> only fuzzy data is respected, if
 *            <code>false</code> only number data
 * @return the cosinus distance between given row and cell
 */
public static double getCosinusDistance(final DataRow row, final SotaTreeCell cell, final double offset, final boolean fuzzy) {
    int col = 0;
    double distance = 0;
    double vectorMultRes = 0;
    double vectorLength = 0;
    double cellLength = 0;
    for (int i = 0; i < row.getNumCells(); i++) {
        DataType type = row.getCell(i).getType();
        if (SotaUtil.isNumberType(type) && !fuzzy) {
            if (col < cell.getData().length) {
                vectorMultRes += cell.getData()[col].getValue() * ((DoubleValue) row.getCell(i)).getDoubleValue();
                vectorLength += Math.pow(((DoubleValue) row.getCell(i)).getDoubleValue(), 2);
                cellLength += Math.pow(cell.getData()[col].getValue(), 2);
                col++;
            }
        } else if (SotaUtil.isFuzzyIntervalType(type) && fuzzy) {
            if (col < cell.getData().length) {
                vectorMultRes += cell.getData()[col].getValue() * SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i));
                vectorLength += Math.pow(SotaFuzzyMath.getCenterOfCoreRegion((FuzzyIntervalValue) row.getCell(i)), 2);
                cellLength += Math.pow(cell.getData()[col].getValue(), 2);
                col++;
            }
        }
    }
    vectorLength = Math.sqrt(vectorLength);
    cellLength = Math.sqrt(cellLength);
    distance = vectorMultRes / (vectorLength * cellLength);
    distance = offset - distance;
    return distance;
}
Also used : FuzzyIntervalValue(org.knime.core.data.FuzzyIntervalValue) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType)

Example 90 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class Smoter method createFinalSpec.

/**
 * Creates the out spec when <i>smoting</i> the table with
 * <code>inSpec</code>. It replaces the data types of all
 * {@link DoubleValue}-compatible columns by {@link DoubleCell#TYPE}.
 *
 * @param inSpec the table spec of the input table
 * @return the output table spec
 */
static DataTableSpec createFinalSpec(final DataTableSpec inSpec) {
    final int colCount = inSpec.getNumColumns();
    DataColumnSpec[] colSpecs = new DataColumnSpec[colCount];
    for (int i = 0; i < colSpecs.length; i++) {
        DataColumnSpec cur = inSpec.getColumnSpec(i);
        // column really once the Smoter adds new records
        if (cur.getType().isCompatible(DoubleValue.class)) {
            DataColumnSpecCreator colspeccreator = new DataColumnSpecCreator(cur);
            DataType oldType = cur.getType();
            // may be there was some strange double value type in the
            // column, use supertype of old type and DoubleCell.TYPE
            DataType newType = DataType.getCommonSuperType(oldType, DoubleCell.TYPE);
            colspeccreator.setType(newType);
            // domain isn't change becaust it's a convex operation
            // (may be I should validate this statement - min and
            // max depends on the comparator being used)
            colSpecs[i] = colspeccreator.createSpec();
        } else {
            colSpecs[i] = cur;
        }
    }
    return new DataTableSpec(colSpecs);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataType(org.knime.core.data.DataType)

Aggregations

DataType (org.knime.core.data.DataType)330 DataColumnSpec (org.knime.core.data.DataColumnSpec)142 DataTableSpec (org.knime.core.data.DataTableSpec)101 DataCell (org.knime.core.data.DataCell)96 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)95 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)71 DoubleValue (org.knime.core.data.DoubleValue)67 DataRow (org.knime.core.data.DataRow)61 ArrayList (java.util.ArrayList)55 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)34 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)32 DefaultRow (org.knime.core.data.def.DefaultRow)24 HashSet (java.util.HashSet)23 HashMap (java.util.HashMap)20 StringCell (org.knime.core.data.def.StringCell)20 NominalValue (org.knime.core.data.NominalValue)18 DoubleCell (org.knime.core.data.def.DoubleCell)18 IntCell (org.knime.core.data.def.IntCell)18 BitVectorValue (org.knime.core.data.vector.bitvector.BitVectorValue)18 ByteVectorValue (org.knime.core.data.vector.bytevector.ByteVectorValue)18