use of org.knime.core.data.DataType in project knime-core by knime.
the class ModifyTimeNodeModel method createColumnRearranger.
/**
* {@inheritDoc}
*/
@Override
protected ColumnRearranger createColumnRearranger(final DataTableSpec inSpec) {
final ColumnRearranger rearranger = new ColumnRearranger(inSpec);
final String[] includeList = m_colSelect.applyTo(inSpec).getIncludes();
final int[] includeIndices = Arrays.stream(m_colSelect.applyTo(inSpec).getIncludes()).mapToInt(s -> inSpec.findColumnIndex(s)).toArray();
// determine the data type of output
DataType dataType;
if (m_modifyAction.getStringValue().equals(MODIFY_OPTION_REMOVE)) {
dataType = LocalDateCellFactory.TYPE;
} else {
if (m_modifyAction.getStringValue().equals(MODIFY_OPTION_CHANGE)) {
dataType = LocalDateTimeCellFactory.TYPE;
} else {
if (m_timeZone.useZone()) {
dataType = ZonedDateTimeCellFactory.TYPE;
} else {
dataType = LocalDateTimeCellFactory.TYPE;
}
}
}
int i = 0;
for (final String includedCol : includeList) {
if (inSpec.getColumnSpec(includedCol).getType().equals(ZonedDateTimeCellFactory.TYPE) && m_modifyAction.getStringValue().equals(MODIFY_OPTION_CHANGE)) {
dataType = ZonedDateTimeCellFactory.TYPE;
}
if (m_isReplaceOrAppend.getStringValue().equals(OPTION_REPLACE)) {
final DataColumnSpecCreator dataColumnSpecCreator = new DataColumnSpecCreator(includedCol, dataType);
final SingleCellFactory cellFac = createCellFactory(dataColumnSpecCreator.createSpec(), includeIndices[i++], m_timeZone.getZone());
rearranger.replace(cellFac, includedCol);
} else {
final DataColumnSpec dataColSpec = new UniqueNameGenerator(inSpec).newColumn(includedCol + m_suffix.getStringValue(), dataType);
final SingleCellFactory cellFac = createCellFactory(dataColSpec, includeIndices[i++], m_timeZone.getZone());
rearranger.append(cellFac);
}
}
return rearranger;
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class ModifyDateNodeModel method createColumnRearranger.
/**
* @param inSpec table input spec
* @return the CR describing the output
*/
@Override
protected ColumnRearranger createColumnRearranger(final DataTableSpec inSpec) {
final ColumnRearranger rearranger = new ColumnRearranger(inSpec);
final String[] includeList = m_colSelect.applyTo(inSpec).getIncludes();
final int[] includeIndices = Arrays.stream(m_colSelect.applyTo(inSpec).getIncludes()).mapToInt(s -> inSpec.findColumnIndex(s)).toArray();
// determine the data type of output
DataType dataType;
if (m_modifyAction.getStringValue().equals(MODIFY_OPTION_REMOVE)) {
dataType = LocalTimeCellFactory.TYPE;
} else {
if (m_modifyAction.getStringValue().equals(MODIFY_OPTION_CHANGE)) {
dataType = LocalDateTimeCellFactory.TYPE;
} else {
if (m_timeZone.useZone()) {
dataType = ZonedDateTimeCellFactory.TYPE;
} else {
dataType = LocalDateTimeCellFactory.TYPE;
}
}
}
final ZoneId zone = m_timeZone.getZone();
int i = 0;
for (final String includedCol : includeList) {
if (inSpec.getColumnSpec(includedCol).getType().equals(ZonedDateTimeCellFactory.TYPE) && m_modifyAction.getStringValue().equals(MODIFY_OPTION_CHANGE)) {
dataType = ZonedDateTimeCellFactory.TYPE;
}
if (m_isReplaceOrAppend.getStringValue().equals(OPTION_REPLACE)) {
final DataColumnSpecCreator dataColumnSpecCreator = new DataColumnSpecCreator(includedCol, dataType);
final SingleCellFactory cellFac = createCellFactory(dataColumnSpecCreator.createSpec(), includeIndices[i++], zone);
rearranger.replace(cellFac, includedCol);
} else {
final DataColumnSpec dataColSpec = new UniqueNameGenerator(inSpec).newColumn(includedCol + m_suffix.getStringValue(), dataType);
final SingleCellFactory cellFac = createCellFactory(dataColSpec, includeIndices[i++], zone);
rearranger.append(cellFac);
}
}
return rearranger;
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class StatisticsTable method calculateAllMoments.
/**
* Calculates <b>all the statistical moments in one pass </b>. After the
* call of this operation, the statistical moments can be obtained very fast
* from all the other methods.
*
* @param rowCount Row count of table for progress, may be NaN if unknown.
* @param exec object to check with if user canceled the operation
* @throws CanceledExecutionException if user canceled
* @throws IllegalArgumentException if rowCount argument < 0
*/
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
if (rowCount < 0.0) {
throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
}
DataTableSpec origSpec = m_table.getDataTableSpec();
int numOfCols = origSpec.getNumColumns();
// the number of non-missing cells in each column
int[] validCount = new int[numOfCols];
double[] sumsquare = new double[numOfCols];
final DataValueComparator[] comp = new DataValueComparator[numOfCols];
for (int i = 0; i < numOfCols; i++) {
sumsquare[i] = 0.0;
validCount[i] = 0;
comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
assert comp[i] != null;
}
int nrRows = 0;
for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
DataRow row = rowIt.next();
if (exec != null) {
double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
// throws exception if user canceled
exec.checkCanceled();
}
for (int c = 0; c < numOfCols; c++) {
final DataCell cell = row.getCell(c);
if (!(cell.isMissing())) {
// keep the min and max for each column
if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
m_minValues[c] = cell;
}
if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
m_maxValues[c] = cell;
}
// for double columns we calc the sum (for the mean calc)
DataType type = origSpec.getColumnSpec(c).getType();
if (type.isCompatible(DoubleValue.class)) {
double d = ((DoubleValue) cell).getDoubleValue();
if (Double.isNaN(m_sum[c])) {
m_sum[c] = d;
} else {
m_sum[c] += d;
}
sumsquare[c] += d * d;
validCount[c]++;
}
} else {
m_missingValueCnt[c]++;
}
}
calculateMomentInSubClass(row);
}
m_nrRows = nrRows;
for (int j = 0; j < numOfCols; j++) {
// missing values
if (validCount[j] == 0 || m_minValues[j] == null) {
DataCell mc = DataType.getMissingCell();
m_minValues[j] = mc;
m_maxValues[j] = mc;
m_meanValues[j] = Double.NaN;
m_varianceValues[j] = Double.NaN;
} else {
m_meanValues[j] = m_sum[j] / validCount[j];
if (validCount[j] > 1) {
m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
} else {
m_varianceValues[j] = 0.0;
}
// round-off errors resulting in negative variance values
if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
m_varianceValues[j] = 0.0;
}
assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
}
}
// compute resulting table spec
int nrCols = m_table.getDataTableSpec().getNumColumns();
DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
for (int c = 0; c < nrCols; c++) {
DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
// we create domains with our bounds.
Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
creator.setDomain(newDomain);
cSpec[c] = creator.createSpec();
}
m_tSpec = new DataTableSpec(cSpec);
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class AppendVariableToTableNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final DataTableSpec spec) throws InvalidSettingsException {
ColumnRearranger arranger = new ColumnRearranger(spec);
Set<String> nameHash = new HashSet<String>();
for (DataColumnSpec c : spec) {
nameHash.add(c.getName());
}
List<Pair<String, FlowVariable.Type>> vars;
if (m_settings.getIncludeAll()) {
vars = getAllVariables();
} else {
vars = m_settings.getVariablesOfInterest();
}
if (vars.isEmpty()) {
throw new InvalidSettingsException("No variables selected");
}
DataColumnSpec[] specs = new DataColumnSpec[vars.size()];
final DataCell[] values = new DataCell[vars.size()];
for (int i = 0; i < vars.size(); i++) {
Pair<String, FlowVariable.Type> c = vars.get(i);
String name = c.getFirst();
DataType type;
switch(c.getSecond()) {
case DOUBLE:
type = DoubleCell.TYPE;
try {
double dValue = peekFlowVariableDouble(name);
values[i] = new DoubleCell(dValue);
} catch (NoSuchElementException e) {
throw new InvalidSettingsException("No such flow variable (of type double): " + name);
}
break;
case INTEGER:
type = IntCell.TYPE;
try {
int iValue = peekFlowVariableInt(name);
values[i] = new IntCell(iValue);
} catch (NoSuchElementException e) {
throw new InvalidSettingsException("No such flow variable (of type int): " + name);
}
break;
case STRING:
type = StringCell.TYPE;
try {
String sValue = peekFlowVariableString(name);
sValue = sValue == null ? "" : sValue;
values[i] = new StringCell(sValue);
} catch (NoSuchElementException e) {
throw new InvalidSettingsException("No such flow variable (of type String): " + name);
}
break;
default:
throw new InvalidSettingsException("Unsupported variable type: " + c.getSecond());
}
if (nameHash.contains(name) && !name.toLowerCase().endsWith("(variable)")) {
name = name.concat(" (variable)");
}
String newName = name;
int uniquifier = 1;
while (!nameHash.add(newName)) {
newName = name + " (#" + (uniquifier++) + ")";
}
specs[i] = new DataColumnSpecCreator(newName, type).createSpec();
}
arranger.append(new AbstractCellFactory(specs) {
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
return values;
}
});
return arranger;
}
use of org.knime.core.data.DataType in project knime-core by knime.
the class VariableToTableNodeModel method createOutSpec.
private DataTableSpec createOutSpec() throws InvalidSettingsException {
List<Pair<String, FlowVariable.Type>> vars;
if (m_settings.getIncludeAll()) {
vars = getAllVariables();
} else {
vars = m_settings.getVariablesOfInterest();
}
if (vars.isEmpty()) {
throw new InvalidSettingsException("No variables selected");
}
DataColumnSpec[] specs = new DataColumnSpec[vars.size()];
for (int i = 0; i < vars.size(); i++) {
Pair<String, FlowVariable.Type> c = vars.get(i);
DataType type;
switch(c.getSecond()) {
case DOUBLE:
type = DoubleCell.TYPE;
break;
case INTEGER:
type = IntCell.TYPE;
break;
case STRING:
type = StringCell.TYPE;
break;
default:
throw new InvalidSettingsException("Unsupported variable type: " + c.getSecond());
}
specs[i] = new DataColumnSpecCreator(c.getFirst(), type).createSpec();
}
return new DataTableSpec(specs);
}
Aggregations