Search in sources :

Example 1 with Variance

use of org.knime.base.data.statistics.calculation.Variance in project knime-core by knime.

the class StatisticCalculatorTest method testGeneralStats.

/**
 * Tests common statistics.
 *
 * @throws InvalidSettingsException
 * @throws CanceledExecutionException
 */
@Test
public void testGeneralStats() throws InvalidSettingsException, CanceledExecutionException {
    BufferedDataTable createRandomTable = createRandomTableWithMissingValues(20, 8);
    Skewness skewness = new Skewness();
    MinMax minMax = new MinMax();
    DoubleMinMax doubleMinMax = new DoubleMinMax(true);
    Kurtosis kurtosis = new Kurtosis();
    Mean mean = new Mean();
    Variance variance = new Variance();
    StandardDeviation sDev = new StandardDeviation();
    MissingValue missingValue = new MissingValue();
    SpecialDoubleCells sdc = new SpecialDoubleCells();
    StatisticCalculator statisticCalculator = new StatisticCalculator(createRandomTable.getDataTableSpec(), createRandomTable.getSpec().getColumnNames(), skewness, minMax, kurtosis, mean, variance, missingValue, sDev, doubleMinMax, sdc);
    statisticCalculator.evaluate(createRandomTable, EXEC_CONTEXT);
    Statistics3Table statistics3Table = new Statistics3Table(createRandomTable, false, 0, Collections.<String>emptyList(), EXEC_CONTEXT);
    for (int i = 0; i < createRandomTable.getDataTableSpec().getNumColumns(); i++) {
        String colName = "" + i;
        assertEquals(statistics3Table.getMean(i), mean.getResult(colName), 0.0001);
        assertEquals(statistics3Table.getKurtosis(i), kurtosis.getResult(colName), 0.0001);
        checkValueOrMissingIfNaN(statistics3Table.getMin()[i], minMax.getMin(colName));
        assertEquals(statistics3Table.getMinCells()[i], minMax.getMin(colName));
        checkValueOrMissingIfNaN(doubleMinMax.getMin(colName), statistics3Table.getNonInfMin(i));
        checkValueOrMissingIfNaN(statistics3Table.getMax()[i], minMax.getMax(colName));
        assertEquals(statistics3Table.getMaxCells()[i], minMax.getMax(colName));
        checkValueOrMissingIfNaN(doubleMinMax.getMax(colName), statistics3Table.getNonInfMax(i));
        assertEquals(statistics3Table.getVariance(i), variance.getResult(colName), 0.0001);
        assertEquals(statistics3Table.getStandardDeviation(i), sDev.getResult(colName), 0.0001);
        assertEquals(statistics3Table.getNumberMissingValues(i), missingValue.getNumberMissingValues(colName), 0.0001);
        assertEquals(statistics3Table.getNumberNegativeInfiniteValues(i), sdc.getNumberNegativeInfiniteValues(colName), 0.0001);
        assertEquals(statistics3Table.getNumberPositiveInfiniteValues(i), sdc.getNumberPositiveInfiniteValues(colName), 0.0001);
        assertEquals(statistics3Table.getNumberNaNValues(i), sdc.getNumberNaNValues(colName), 0.0001);
    }
}
Also used : Mean(org.knime.base.data.statistics.calculation.Mean) MissingValue(org.knime.base.data.statistics.calculation.MissingValue) Kurtosis(org.knime.base.data.statistics.calculation.Kurtosis) DoubleMinMax(org.knime.base.data.statistics.calculation.DoubleMinMax) MinMax(org.knime.base.data.statistics.calculation.MinMax) Variance(org.knime.base.data.statistics.calculation.Variance) DoubleMinMax(org.knime.base.data.statistics.calculation.DoubleMinMax) Skewness(org.knime.base.data.statistics.calculation.Skewness) BufferedDataTable(org.knime.core.node.BufferedDataTable) SpecialDoubleCells(org.knime.base.data.statistics.calculation.SpecialDoubleCells) StandardDeviation(org.knime.base.data.statistics.calculation.StandardDeviation) Test(org.junit.Test)

Example 2 with Variance

use of org.knime.base.data.statistics.calculation.Variance in project knime-core by knime.

the class CronbachNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    final BufferedDataTable in = (BufferedDataTable) inData[0];
    final DataTableSpec inSpec = in.getDataTableSpec();
    ColumnRearranger filteredTableRearranger = new ColumnRearranger(inSpec);
    String[] includeNames = m_columnFilterModel.applyTo(inSpec).getIncludes();
    filteredTableRearranger.keepOnly(includeNames);
    final BufferedDataTable filteredTable = exec.createColumnRearrangeTable(in, filteredTableRearranger, exec.createSilentSubExecutionContext(0.0));
    final DataTableSpec filteredTableSpec = filteredTable.getDataTableSpec();
    // step1 get variance for all columns
    Variance my = new Variance(filteredTableSpec.getColumnNames());
    StatisticCalculator sc = new StatisticCalculator(filteredTableSpec, my);
    sc.evaluate(filteredTable, exec.createSubExecutionContext(0.5));
    double[] sum = new double[filteredTable.getRowCount()];
    // step2 get variance for the overall sum
    ExecutionContext exec2 = exec.createSubExecutionContext(0.5);
    int rowCount = filteredTable.getRowCount();
    int i = 0;
    for (DataRow row : filteredTable) {
        sum[i] = 0;
        exec2.checkCanceled();
        exec2.setProgress(i * 1.0 / rowCount, "Statisics calculation row " + i + " of " + rowCount);
        for (DataCell cell : row) {
            if (!cell.isMissing()) {
                double value = ((DoubleValue) cell).getDoubleValue();
                sum[i] += value;
            } else {
                throw new InvalidSettingsException("Missing Values are not supported. " + "Please resolve them with the Missing Value node.");
            }
        }
        i++;
    }
    exec.setMessage("Caluating Crombach over all Columns");
    double cronbach = 0;
    for (String s : filteredTableSpec.getColumnNames()) {
        cronbach += my.getResult(s);
        exec.checkCanceled();
    }
    org.apache.commons.math3.stat.descriptive.moment.Variance v = new org.apache.commons.math3.stat.descriptive.moment.Variance();
    cronbach /= v.evaluate(sum);
    double k = filteredTableSpec.getNumColumns();
    cronbach = k / (k - 1) * (1.0 - cronbach);
    BufferedDataContainer out = exec.createDataContainer(getDataTableSpec());
    if (in.getRowCount() <= 0) {
        setWarningMessage("Empty input table, no value calculated!");
    }
    DataRow r = new DefaultRow(new RowKey("Cronbach"), new DoubleCell(cronbach));
    out.addRowToTable(r);
    out.close();
    return new BufferedDataTable[] { out.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) StatisticCalculator(org.knime.base.data.statistics.StatisticCalculator) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DataRow(org.knime.core.data.DataRow) Variance(org.knime.base.data.statistics.calculation.Variance) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

Variance (org.knime.base.data.statistics.calculation.Variance)2 BufferedDataTable (org.knime.core.node.BufferedDataTable)2 Test (org.junit.Test)1 StatisticCalculator (org.knime.base.data.statistics.StatisticCalculator)1 DoubleMinMax (org.knime.base.data.statistics.calculation.DoubleMinMax)1 Kurtosis (org.knime.base.data.statistics.calculation.Kurtosis)1 Mean (org.knime.base.data.statistics.calculation.Mean)1 MinMax (org.knime.base.data.statistics.calculation.MinMax)1 MissingValue (org.knime.base.data.statistics.calculation.MissingValue)1 Skewness (org.knime.base.data.statistics.calculation.Skewness)1 SpecialDoubleCells (org.knime.base.data.statistics.calculation.SpecialDoubleCells)1 StandardDeviation (org.knime.base.data.statistics.calculation.StandardDeviation)1 DataCell (org.knime.core.data.DataCell)1 DataRow (org.knime.core.data.DataRow)1 DataTableSpec (org.knime.core.data.DataTableSpec)1 DoubleValue (org.knime.core.data.DoubleValue)1 RowKey (org.knime.core.data.RowKey)1 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)1 DefaultRow (org.knime.core.data.def.DefaultRow)1 DoubleCell (org.knime.core.data.def.DoubleCell)1