use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class StatisticsTable method calculateAllMoments.
/**
* Calculates <b>all the statistical moments in one pass </b>. After the
* call of this operation, the statistical moments can be obtained very fast
* from all the other methods.
*
* @param rowCount Row count of table for progress, may be NaN if unknown.
* @param exec object to check with if user canceled the operation
* @throws CanceledExecutionException if user canceled
* @throws IllegalArgumentException if rowCount argument < 0
*/
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
if (rowCount < 0.0) {
throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
}
DataTableSpec origSpec = m_table.getDataTableSpec();
int numOfCols = origSpec.getNumColumns();
// the number of non-missing cells in each column
int[] validCount = new int[numOfCols];
double[] sumsquare = new double[numOfCols];
final DataValueComparator[] comp = new DataValueComparator[numOfCols];
for (int i = 0; i < numOfCols; i++) {
sumsquare[i] = 0.0;
validCount[i] = 0;
comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
assert comp[i] != null;
}
int nrRows = 0;
for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
DataRow row = rowIt.next();
if (exec != null) {
double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
// throws exception if user canceled
exec.checkCanceled();
}
for (int c = 0; c < numOfCols; c++) {
final DataCell cell = row.getCell(c);
if (!(cell.isMissing())) {
// keep the min and max for each column
if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
m_minValues[c] = cell;
}
if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
m_maxValues[c] = cell;
}
// for double columns we calc the sum (for the mean calc)
DataType type = origSpec.getColumnSpec(c).getType();
if (type.isCompatible(DoubleValue.class)) {
double d = ((DoubleValue) cell).getDoubleValue();
if (Double.isNaN(m_sum[c])) {
m_sum[c] = d;
} else {
m_sum[c] += d;
}
sumsquare[c] += d * d;
validCount[c]++;
}
} else {
m_missingValueCnt[c]++;
}
}
calculateMomentInSubClass(row);
}
m_nrRows = nrRows;
for (int j = 0; j < numOfCols; j++) {
// missing values
if (validCount[j] == 0 || m_minValues[j] == null) {
DataCell mc = DataType.getMissingCell();
m_minValues[j] = mc;
m_maxValues[j] = mc;
m_meanValues[j] = Double.NaN;
m_varianceValues[j] = Double.NaN;
} else {
m_meanValues[j] = m_sum[j] / validCount[j];
if (validCount[j] > 1) {
m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
} else {
m_varianceValues[j] = 0.0;
}
// round-off errors resulting in negative variance values
if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
m_varianceValues[j] = 0.0;
}
assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
}
}
// compute resulting table spec
int nrCols = m_table.getDataTableSpec().getNumColumns();
DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
for (int c = 0; c < nrCols; c++) {
DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
// we create domains with our bounds.
Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
creator.setDomain(newDomain);
cSpec[c] = creator.createSpec();
}
m_tSpec = new DataTableSpec(cSpec);
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class SVMPredictor method getCells.
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
ArrayList<Double> values = new ArrayList<Double>();
for (int i = 0; i < m_colindices.length; i++) {
if (row.getCell(m_colindices[i]).isMissing()) {
return new DataCell[] { DataType.getMissingCell() };
}
DoubleValue dv = (DoubleValue) row.getCell(m_colindices[i]);
values.add(dv.getDoubleValue());
}
String classvalue = doPredict(values);
return new DataCell[] { new StringCell(classvalue) };
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class AutoBinner method calcDomainBoundsIfNeccessary.
/**
* Determines the per column min/max values of the given data if not already
* present in the domain.
* @param data the data
* @param exec the execution context
* @param recalcValuesFor The columns
* @return The data with extended domain information
* @throws InvalidSettingsException
* @throws CanceledExecutionException
*/
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
return data;
}
List<Integer> valuesI = new ArrayList<Integer>();
for (String colName : recalcValuesFor) {
DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
if (!colSpec.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
}
if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
}
}
if (valuesI.isEmpty()) {
return data;
}
Map<Integer, Double> min = new HashMap<Integer, Double>();
Map<Integer, Double> max = new HashMap<Integer, Double>();
for (int col : valuesI) {
min.put(col, Double.MAX_VALUE);
max.put(col, Double.MIN_VALUE);
}
int c = 0;
for (DataRow row : data) {
c++;
exec.checkCanceled();
exec.setProgress(c / (double) data.getRowCount());
for (int col : valuesI) {
double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
if (min.get(col) > val) {
min.put(col, val);
}
if (max.get(col) < val) {
min.put(col, val);
}
}
}
List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
int cc = 0;
for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
if (recalcValuesFor.contains(columnSpec.getName())) {
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
specCreator.setDomain(domainCreator.createDomain());
DataColumnSpec newColSpec = specCreator.createSpec();
newColSpecList.add(newColSpec);
} else {
newColSpecList.add(columnSpec);
}
cc++;
}
DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
return newDataTable;
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class AutoBinner method createEdgesFromQuantiles.
private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
double[] edges = new double[sampleQuantiles.length];
long n = data.size();
long c = 0;
int cc = 0;
RowIterator iter = data.iterator();
DataRow rowQ = null;
DataRow rowQ1 = null;
if (iter.hasNext()) {
rowQ1 = iter.next();
rowQ = rowQ1;
}
for (double p : sampleQuantiles) {
double h = (n - 1) * p + 1;
int q = (int) Math.floor(h);
while ((1.0 == p || c < q) && iter.hasNext()) {
rowQ = rowQ1;
rowQ1 = iter.next();
c++;
exec.setProgress(c / (double) n);
exec.checkCanceled();
}
rowQ = 1.0 != p ? rowQ : rowQ1;
final DataCell xqCell = rowQ.getCell(0);
final DataCell xq1Cell = rowQ1.getCell(0);
// data first?)
if (xqCell.isMissing() || xq1Cell.isMissing()) {
throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
}
// for quantile calculation see also
// http://en.wikipedia.org/wiki/
// Quantile#Estimating_the_quantiles_of_a_population.
// this implements R-7
double xq = ((DoubleValue) xqCell).getDoubleValue();
double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
double quantile = xq + (h - q) * (xq1 - xq);
edges[cc] = quantile;
cc++;
}
return edges;
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class AutoBinner method createEdgesFromQuantiles.
@SuppressWarnings("null")
private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
double[] edges = new double[sampleQuantiles.length];
long n = data.size();
long c = 0;
int cc = 0;
RowIterator iter = data.iterator();
DataRow rowQ = null;
DataRow rowQ1 = null;
if (iter.hasNext()) {
rowQ1 = iter.next();
rowQ = rowQ1;
}
for (double p : sampleQuantiles) {
double h = (n - 1) * p + 1;
int q = (int) Math.floor(h);
while ((1.0 == p || c < q) && iter.hasNext()) {
rowQ = rowQ1;
rowQ1 = iter.next();
c++;
exec.setProgress(c / (double) n);
exec.checkCanceled();
}
rowQ = 1.0 != p ? rowQ : rowQ1;
final DataCell xqCell = rowQ.getCell(0);
final DataCell xq1Cell = rowQ1.getCell(0);
// data first?)
if (xqCell.isMissing() || xq1Cell.isMissing()) {
throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
}
// for quantile calculation see also
// http://en.wikipedia.org/wiki/
// Quantile#Estimating_the_quantiles_of_a_population.
// this implements R-7
double xq = ((DoubleValue) xqCell).getDoubleValue();
double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
double quantile = xq + (h - q) * (xq1 - xq);
edges[cc] = quantile;
cc++;
}
return edges;
}
Aggregations