use of org.knime.base.data.statistics.calculation.Variance in project knime-core by knime.
the class StatisticCalculatorTest method testGeneralStats.
/**
* Tests common statistics.
*
* @throws InvalidSettingsException
* @throws CanceledExecutionException
*/
@Test
public void testGeneralStats() throws InvalidSettingsException, CanceledExecutionException {
BufferedDataTable createRandomTable = createRandomTableWithMissingValues(20, 8);
Skewness skewness = new Skewness();
MinMax minMax = new MinMax();
DoubleMinMax doubleMinMax = new DoubleMinMax(true);
Kurtosis kurtosis = new Kurtosis();
Mean mean = new Mean();
Variance variance = new Variance();
StandardDeviation sDev = new StandardDeviation();
MissingValue missingValue = new MissingValue();
SpecialDoubleCells sdc = new SpecialDoubleCells();
StatisticCalculator statisticCalculator = new StatisticCalculator(createRandomTable.getDataTableSpec(), createRandomTable.getSpec().getColumnNames(), skewness, minMax, kurtosis, mean, variance, missingValue, sDev, doubleMinMax, sdc);
statisticCalculator.evaluate(createRandomTable, EXEC_CONTEXT);
Statistics3Table statistics3Table = new Statistics3Table(createRandomTable, false, 0, Collections.<String>emptyList(), EXEC_CONTEXT);
for (int i = 0; i < createRandomTable.getDataTableSpec().getNumColumns(); i++) {
String colName = "" + i;
assertEquals(statistics3Table.getMean(i), mean.getResult(colName), 0.0001);
assertEquals(statistics3Table.getKurtosis(i), kurtosis.getResult(colName), 0.0001);
checkValueOrMissingIfNaN(statistics3Table.getMin()[i], minMax.getMin(colName));
assertEquals(statistics3Table.getMinCells()[i], minMax.getMin(colName));
checkValueOrMissingIfNaN(doubleMinMax.getMin(colName), statistics3Table.getNonInfMin(i));
checkValueOrMissingIfNaN(statistics3Table.getMax()[i], minMax.getMax(colName));
assertEquals(statistics3Table.getMaxCells()[i], minMax.getMax(colName));
checkValueOrMissingIfNaN(doubleMinMax.getMax(colName), statistics3Table.getNonInfMax(i));
assertEquals(statistics3Table.getVariance(i), variance.getResult(colName), 0.0001);
assertEquals(statistics3Table.getStandardDeviation(i), sDev.getResult(colName), 0.0001);
assertEquals(statistics3Table.getNumberMissingValues(i), missingValue.getNumberMissingValues(colName), 0.0001);
assertEquals(statistics3Table.getNumberNegativeInfiniteValues(i), sdc.getNumberNegativeInfiniteValues(colName), 0.0001);
assertEquals(statistics3Table.getNumberPositiveInfiniteValues(i), sdc.getNumberPositiveInfiniteValues(colName), 0.0001);
assertEquals(statistics3Table.getNumberNaNValues(i), sdc.getNumberNaNValues(colName), 0.0001);
}
}
use of org.knime.base.data.statistics.calculation.Variance in project knime-core by knime.
the class CronbachNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
final BufferedDataTable in = (BufferedDataTable) inData[0];
final DataTableSpec inSpec = in.getDataTableSpec();
ColumnRearranger filteredTableRearranger = new ColumnRearranger(inSpec);
String[] includeNames = m_columnFilterModel.applyTo(inSpec).getIncludes();
filteredTableRearranger.keepOnly(includeNames);
final BufferedDataTable filteredTable = exec.createColumnRearrangeTable(in, filteredTableRearranger, exec.createSilentSubExecutionContext(0.0));
final DataTableSpec filteredTableSpec = filteredTable.getDataTableSpec();
// step1 get variance for all columns
Variance my = new Variance(filteredTableSpec.getColumnNames());
StatisticCalculator sc = new StatisticCalculator(filteredTableSpec, my);
sc.evaluate(filteredTable, exec.createSubExecutionContext(0.5));
double[] sum = new double[filteredTable.getRowCount()];
// step2 get variance for the overall sum
ExecutionContext exec2 = exec.createSubExecutionContext(0.5);
int rowCount = filteredTable.getRowCount();
int i = 0;
for (DataRow row : filteredTable) {
sum[i] = 0;
exec2.checkCanceled();
exec2.setProgress(i * 1.0 / rowCount, "Statisics calculation row " + i + " of " + rowCount);
for (DataCell cell : row) {
if (!cell.isMissing()) {
double value = ((DoubleValue) cell).getDoubleValue();
sum[i] += value;
} else {
throw new InvalidSettingsException("Missing Values are not supported. " + "Please resolve them with the Missing Value node.");
}
}
i++;
}
exec.setMessage("Caluating Crombach over all Columns");
double cronbach = 0;
for (String s : filteredTableSpec.getColumnNames()) {
cronbach += my.getResult(s);
exec.checkCanceled();
}
org.apache.commons.math3.stat.descriptive.moment.Variance v = new org.apache.commons.math3.stat.descriptive.moment.Variance();
cronbach /= v.evaluate(sum);
double k = filteredTableSpec.getNumColumns();
cronbach = k / (k - 1) * (1.0 - cronbach);
BufferedDataContainer out = exec.createDataContainer(getDataTableSpec());
if (in.getRowCount() <= 0) {
setWarningMessage("Empty input table, no value calculated!");
}
DataRow r = new DefaultRow(new RowKey("Cronbach"), new DoubleCell(cronbach));
out.addRowToTable(r);
out.close();
return new BufferedDataTable[] { out.getTable() };
}
Aggregations