use of org.knime.core.data.DataCell in project knime-core by knime.
the class DateShiftConfigure method getColumnbasedCellFactory.
/**
* @param spec the output column spec
* @param col1Idx the column index of the numerical column to add
* @param g the time field to modify (as defined by calendar constants)
* @param conf the configuration object
* @param col2Idx the time column
* @return the cell factory
*/
public static SingleCellFactory getColumnbasedCellFactory(final DataColumnSpec spec, final int col1Idx, final int col2Idx, final int g, final DateShiftConfigure conf) {
return new SingleCellFactory(spec) {
/**
* Value for the new column is based on the values of two column of the row (first and second date column),
* the selected granularity, and the fraction digits for rounding.
*
* @param row the current row
* @return the difference between the two date values with the given granularity and rounding
*/
@Override
public DataCell getCell(final DataRow row) {
final int value;
DataCell cell2 = row.getCell(col2Idx);
if (cell2.isMissing()) {
return DataType.getMissingCell();
}
String typeofshift = conf.gettypeofshift().getStringValue();
if (typeofshift.equals(DateShiftNodeDialog.CFG_COLUMN_SHIFT)) {
DataCell cell1 = row.getCell(col1Idx);
if ((cell1.isMissing())) {
return DataType.getMissingCell();
}
value = ((IntValue) cell1).getIntValue();
} else {
value = conf.getvalueofshift().getIntValue();
}
Calendar c = ((DateAndTimeValue) cell2).getUTCCalendarClone();
c.add(g, value);
return new DateAndTimeCell(c.getTimeInMillis(), conf.getHasDate().getBooleanValue(), conf.getHasTime().getBooleanValue(), conf.getHasMiliSeconds().getBooleanValue());
}
};
}
use of org.knime.core.data.DataCell in project knime-core by knime.
the class StatisticsTable method calculateAllMoments.
/**
* Calculates <b>all the statistical moments in one pass </b>. After the
* call of this operation, the statistical moments can be obtained very fast
* from all the other methods.
*
* @param rowCount Row count of table for progress, may be NaN if unknown.
* @param exec object to check with if user canceled the operation
* @throws CanceledExecutionException if user canceled
* @throws IllegalArgumentException if rowCount argument < 0
*/
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
if (rowCount < 0.0) {
throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
}
DataTableSpec origSpec = m_table.getDataTableSpec();
int numOfCols = origSpec.getNumColumns();
// the number of non-missing cells in each column
int[] validCount = new int[numOfCols];
double[] sumsquare = new double[numOfCols];
final DataValueComparator[] comp = new DataValueComparator[numOfCols];
for (int i = 0; i < numOfCols; i++) {
sumsquare[i] = 0.0;
validCount[i] = 0;
comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
assert comp[i] != null;
}
int nrRows = 0;
for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
DataRow row = rowIt.next();
if (exec != null) {
double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
// throws exception if user canceled
exec.checkCanceled();
}
for (int c = 0; c < numOfCols; c++) {
final DataCell cell = row.getCell(c);
if (!(cell.isMissing())) {
// keep the min and max for each column
if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
m_minValues[c] = cell;
}
if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
m_maxValues[c] = cell;
}
// for double columns we calc the sum (for the mean calc)
DataType type = origSpec.getColumnSpec(c).getType();
if (type.isCompatible(DoubleValue.class)) {
double d = ((DoubleValue) cell).getDoubleValue();
if (Double.isNaN(m_sum[c])) {
m_sum[c] = d;
} else {
m_sum[c] += d;
}
sumsquare[c] += d * d;
validCount[c]++;
}
} else {
m_missingValueCnt[c]++;
}
}
calculateMomentInSubClass(row);
}
m_nrRows = nrRows;
for (int j = 0; j < numOfCols; j++) {
// missing values
if (validCount[j] == 0 || m_minValues[j] == null) {
DataCell mc = DataType.getMissingCell();
m_minValues[j] = mc;
m_maxValues[j] = mc;
m_meanValues[j] = Double.NaN;
m_varianceValues[j] = Double.NaN;
} else {
m_meanValues[j] = m_sum[j] / validCount[j];
if (validCount[j] > 1) {
m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
} else {
m_varianceValues[j] = 0.0;
}
// round-off errors resulting in negative variance values
if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
m_varianceValues[j] = 0.0;
}
assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
}
}
// compute resulting table spec
int nrCols = m_table.getDataTableSpec().getNumColumns();
DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
for (int c = 0; c < nrCols; c++) {
DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
// we create domains with our bounds.
Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
creator.setDomain(newDomain);
cSpec[c] = creator.createSpec();
}
m_tSpec = new DataTableSpec(cSpec);
}
use of org.knime.core.data.DataCell in project knime-core by knime.
the class Statistics2Table method createNominalValueTable.
/**
* Create nominal value table containing all possible values together with
* their occurrences.
* @param nominal value output table
* @return data table with nominal values for each column
*/
public DataTable createNominalValueTable(final List<String> nominal) {
DataTableSpec outSpec = createOutSpecNominal(m_spec, nominal);
Iterator[] it = new Iterator[outSpec.getNumColumns() / 2];
int idx = 0;
for (int i = 0; i < m_nominalValues.length; i++) {
if (m_nominalValues[i] != null) {
it[idx++] = m_nominalValues[i].entrySet().iterator();
}
}
DataContainer cont = new DataContainer(outSpec);
int rowIndex = 0;
do {
boolean addEnd = true;
DataCell[] cells = new DataCell[2 * it.length];
for (int i = 0; i < it.length; i++) {
if (it[i] != null && it[i].hasNext()) {
Map.Entry<DataCell, Integer> e = (Map.Entry<DataCell, Integer>) it[i].next();
cells[2 * i] = e.getKey();
cells[2 * i + 1] = new IntCell(e.getValue());
addEnd = false;
} else {
cells[2 * i] = DataType.getMissingCell();
cells[2 * i + 1] = DataType.getMissingCell();
}
}
if (addEnd) {
break;
}
cont.addRowToTable(new DefaultRow(RowKey.createRowKey(rowIndex++), cells));
} while (true);
cont.close();
return cont.getTable();
}
use of org.knime.core.data.DataCell in project knime-core by knime.
the class Statistics2Table method load.
/**
* Load a new statistic table by the given settings object.
* @param sett to load this table from
* @return a new statistic table
* @throws InvalidSettingsException if the settings are corrupt
*/
public static Statistics2Table load(final NodeSettingsRO sett) throws InvalidSettingsException {
DataTableSpec spec = DataTableSpec.load(sett.getConfig("spec"));
Map<DataCell, Integer>[] nominalValues = new Map[spec.getNumColumns()];
for (int c = 0; c < nominalValues.length; c++) {
String name = spec.getColumnSpec(c).getName();
if (!sett.containsKey(name)) {
nominalValues[c] = null;
} else {
nominalValues[c] = new LinkedHashMap<DataCell, Integer>();
NodeSettingsRO subSett = sett.getNodeSettings(name);
for (String key : subSett.keySet()) {
NodeSettingsRO nomSett = subSett.getNodeSettings(key);
nominalValues[c].put(nomSett.getDataCell("key"), nomSett.getInt("value"));
}
}
}
double[] min = sett.getDoubleArray("minimum");
double[] max = sett.getDoubleArray("maximum");
double[] mean = sett.getDoubleArray("mean");
double[] var = sett.getDoubleArray("variance");
double[] median = sett.getDoubleArray("median");
double[] missings = sett.getDoubleArray("missings");
double[] sums = sett.getDoubleArray("sums");
// added with 2.7, fallback -1
int rowCount = sett.getInt("row_count", -1);
return new Statistics2Table(spec, min, max, mean, median, var, sums, missings, nominalValues, rowCount);
}
use of org.knime.core.data.DataCell in project knime-core by knime.
the class ReadPNGFromURLNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final DataTableSpec in, final AtomicLong failCounter) throws InvalidSettingsException {
String colName = m_config.getUrlColName();
if (colName == null) {
// throws ISE
m_config.guessDefaults(in);
colName = m_config.getUrlColName();
setWarningMessage("Auto-configuration: Guessing column \"" + colName + "\" to contain locations");
}
final int colIndex = in.findColumnIndex(colName);
if (colIndex < 0) {
throw new InvalidSettingsException("No such column in input: " + colName);
}
DataColumnSpec colSpec = in.getColumnSpec(colIndex);
if (!colSpec.getType().isCompatible(StringValue.class)) {
throw new InvalidSettingsException("Selected column \"" + colName + "\" is not string-compatible");
}
final String newColName = m_config.getNewColumnName();
DataColumnSpecCreator colSpecCreator;
if (newColName != null) {
String newName = DataTableSpec.getUniqueColumnName(in, newColName);
colSpecCreator = new DataColumnSpecCreator(newName, PNGImageContent.TYPE);
} else {
colSpecCreator = new DataColumnSpecCreator(colSpec);
colSpecCreator.setType(PNGImageContent.TYPE);
colSpecCreator.removeAllHandlers();
colSpecCreator.setDomain(null);
}
DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
ColumnRearranger rearranger = new ColumnRearranger(in);
CellFactory fac = new SingleCellFactory(outColumnSpec) {
@Override
public DataCell getCell(final DataRow row) {
DataCell cell = row.getCell(colIndex);
if (cell.isMissing()) {
return DataType.getMissingCell();
} else {
String url = ((StringValue) cell).getStringValue();
try {
return toPNGCell(url);
} catch (Exception e) {
if (m_config.isFailOnInvalid()) {
if (e instanceof RuntimeException) {
throw (RuntimeException) e;
} else {
throw new RuntimeException(e.getMessage(), e);
}
} else {
String message = "Failed to read png content from " + "\"" + url + "\": " + e.getMessage();
LOGGER.warn(message, e);
failCounter.incrementAndGet();
return DataType.getMissingCell();
}
}
}
}
};
if (newColName == null) {
rearranger.replace(fac, colIndex);
} else {
rearranger.append(fac);
}
return rearranger;
}
Aggregations