use of org.knime.core.data.DataRow in project knime-core by knime.
the class AutoBinner method calcDomainBoundsIfNeccessary.
/**
* Determines the per column min/max values of the given data if not already
* present in the domain.
* @param data the data
* @param exec the execution context
* @param recalcValuesFor The columns
* @return The data with extended domain information
* @throws InvalidSettingsException
* @throws CanceledExecutionException
*/
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
return data;
}
List<Integer> valuesI = new ArrayList<Integer>();
for (String colName : recalcValuesFor) {
DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
if (!colSpec.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
}
if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
}
}
if (valuesI.isEmpty()) {
return data;
}
Map<Integer, Double> min = new HashMap<Integer, Double>();
Map<Integer, Double> max = new HashMap<Integer, Double>();
for (int col : valuesI) {
min.put(col, Double.MAX_VALUE);
max.put(col, Double.MIN_VALUE);
}
int c = 0;
for (DataRow row : data) {
c++;
exec.checkCanceled();
exec.setProgress(c / (double) data.getRowCount());
for (int col : valuesI) {
double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
if (min.get(col) > val) {
min.put(col, val);
}
if (max.get(col) < val) {
min.put(col, val);
}
}
}
List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
int cc = 0;
for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
if (recalcValuesFor.contains(columnSpec.getName())) {
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
specCreator.setDomain(domainCreator.createDomain());
DataColumnSpec newColSpec = specCreator.createSpec();
newColSpecList.add(newColSpec);
} else {
newColSpecList.add(columnSpec);
}
cc++;
}
DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
return newDataTable;
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class AutoBinner method createEdgesFromQuantiles.
private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
double[] edges = new double[sampleQuantiles.length];
long n = data.size();
long c = 0;
int cc = 0;
RowIterator iter = data.iterator();
DataRow rowQ = null;
DataRow rowQ1 = null;
if (iter.hasNext()) {
rowQ1 = iter.next();
rowQ = rowQ1;
}
for (double p : sampleQuantiles) {
double h = (n - 1) * p + 1;
int q = (int) Math.floor(h);
while ((1.0 == p || c < q) && iter.hasNext()) {
rowQ = rowQ1;
rowQ1 = iter.next();
c++;
exec.setProgress(c / (double) n);
exec.checkCanceled();
}
rowQ = 1.0 != p ? rowQ : rowQ1;
final DataCell xqCell = rowQ.getCell(0);
final DataCell xq1Cell = rowQ1.getCell(0);
// data first?)
if (xqCell.isMissing() || xq1Cell.isMissing()) {
throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
}
// for quantile calculation see also
// http://en.wikipedia.org/wiki/
// Quantile#Estimating_the_quantiles_of_a_population.
// this implements R-7
double xq = ((DoubleValue) xqCell).getDoubleValue();
double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
double quantile = xq + (h - q) * (xq1 - xq);
edges[cc] = quantile;
cc++;
}
return edges;
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class ColCombineNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final DataTableSpec spec) {
ColumnRearranger result = new ColumnRearranger(spec);
DataColumnSpec append = new DataColumnSpecCreator(m_newColName, StringCell.TYPE).createSpec();
final int[] indices = new int[m_columns.length];
List<String> colNames = Arrays.asList(m_columns);
int j = 0;
for (int k = 0; k < spec.getNumColumns(); k++) {
DataColumnSpec cs = spec.getColumnSpec(k);
if (colNames.contains(cs.getName())) {
indices[j] = k;
j++;
}
}
// ", " -> ","
// " " -> " " (do not let the resulting string be empty)
// " bla bla " -> "bla bla"
final String delimTrim = trimDelimString(m_delimString);
result.append(new SingleCellFactory(append) {
@Override
public DataCell getCell(final DataRow row) {
String[] cellContents = new String[indices.length];
for (int i = 0; i < indices.length; i++) {
DataCell c = row.getCell(indices[i]);
String s = c instanceof StringValue ? ((StringValue) c).getStringValue() : c.toString();
cellContents[i] = s;
}
return new StringCell(handleContent(cellContents, delimTrim));
}
});
return result;
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class AutoBinner method createEdgesFromQuantiles.
@SuppressWarnings("null")
private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
double[] edges = new double[sampleQuantiles.length];
long n = data.size();
long c = 0;
int cc = 0;
RowIterator iter = data.iterator();
DataRow rowQ = null;
DataRow rowQ1 = null;
if (iter.hasNext()) {
rowQ1 = iter.next();
rowQ = rowQ1;
}
for (double p : sampleQuantiles) {
double h = (n - 1) * p + 1;
int q = (int) Math.floor(h);
while ((1.0 == p || c < q) && iter.hasNext()) {
rowQ = rowQ1;
rowQ1 = iter.next();
c++;
exec.setProgress(c / (double) n);
exec.checkCanceled();
}
rowQ = 1.0 != p ? rowQ : rowQ1;
final DataCell xqCell = rowQ.getCell(0);
final DataCell xq1Cell = rowQ1.getCell(0);
// data first?)
if (xqCell.isMissing() || xq1Cell.isMissing()) {
throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
}
// for quantile calculation see also
// http://en.wikipedia.org/wiki/
// Quantile#Estimating_the_quantiles_of_a_population.
// this implements R-7
double xq = ((DoubleValue) xqCell).getDoubleValue();
double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
double quantile = xq + (h - q) * (xq1 - xq);
edges[cc] = quantile;
cc++;
}
return edges;
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class AutoBinner method calcDomainBoundsIfNeccessary.
/**
* Determines the per column min/max values of the given data if not already present in the domain.
*
* @param data the data
* @param exec the execution context
* @param recalcValuesFor The columns
* @return The data with extended domain information
* @throws InvalidSettingsException ...
* @throws CanceledExecutionException ...
*/
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
return data;
}
List<Integer> valuesI = new ArrayList<Integer>();
for (String colName : recalcValuesFor) {
DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
if (!colSpec.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
}
if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
}
}
if (valuesI.isEmpty()) {
return data;
}
Map<Integer, Double> min = new HashMap<Integer, Double>();
Map<Integer, Double> max = new HashMap<Integer, Double>();
for (int col : valuesI) {
min.put(col, Double.MAX_VALUE);
max.put(col, Double.MIN_VALUE);
}
int c = 0;
for (DataRow row : data) {
c++;
exec.checkCanceled();
exec.setProgress(c / (double) data.getRowCount());
for (int col : valuesI) {
double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
if (min.get(col) > val) {
min.put(col, val);
}
if (max.get(col) < val) {
min.put(col, val);
}
}
}
List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
int cc = 0;
for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
if (recalcValuesFor.contains(columnSpec.getName())) {
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
specCreator.setDomain(domainCreator.createDomain());
DataColumnSpec newColSpec = specCreator.createSpec();
newColSpecList.add(newColSpec);
} else {
newColSpecList.add(columnSpec);
}
cc++;
}
DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
return newDataTable;
}
Aggregations