use of org.knime.core.data.DataColumnSpec in project knime-core by knime.
the class NormalizerNodeModel method calculate.
/**
* New normalized {@link org.knime.core.data.DataTable} is created depending
* on the mode.
*/
/**
* @param inData The input data.
* @param exec For BufferedDataTable creation and progress.
* @return the result of the calculation
* @throws Exception If the node calculation fails for any reason.
*/
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getSpec();
// extract selected numeric columns
updateNumericColumnSelection(inSpec);
Normalizer ntable = new Normalizer(inTable, m_columns);
long rowcount = inTable.size();
ExecutionMonitor prepareExec = exec.createSubProgress(0.3);
AffineTransTable outTable;
boolean fixDomainBounds = false;
switch(m_mode) {
case NONORM_MODE:
return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
case MINMAX_MODE:
fixDomainBounds = true;
outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
break;
case ZSCORE_MODE:
outTable = ntable.doZScoreNorm(prepareExec);
break;
case DECIMALSCALING_MODE:
outTable = ntable.doDecimalScaling(prepareExec);
break;
default:
throw new Exception("No mode set");
}
if (outTable.getErrorMessage() != null) {
// something went wrong, report and throw an exception
throw new Exception(outTable.getErrorMessage());
}
if (ntable.getErrorMessage() != null) {
// something went wrong during initialization, report.
setWarningMessage(ntable.getErrorMessage());
}
DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
AffineTransConfiguration configuration = outTable.getConfiguration();
DataTableSpec spec = outTable.getDataTableSpec();
// the same transformation, which is not guaranteed to snap to min/max)
if (fixDomainBounds) {
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < newColSpecs.length; i++) {
newColSpecs[i] = spec.getColumnSpec(i);
}
for (int i = 0; i < m_columns.length; i++) {
int index = spec.findColumnIndex(m_columns[i]);
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
domCreator.setLowerBound(new DoubleCell(m_min));
domCreator.setUpperBound(new DoubleCell(m_max));
creator.setDomain(domCreator.createDomain());
newColSpecs[index] = creator.createSpec();
}
spec = new DataTableSpec(spec.getName(), newColSpecs);
}
ExecutionMonitor normExec = exec.createSubProgress(.7);
BufferedDataContainer container = exec.createDataContainer(spec);
long count = 1;
for (DataRow row : outTable) {
normExec.checkCanceled();
normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
container.addRowToTable(row);
count++;
}
container.close();
return new CalculationResult(container.getTable(), modelSpec, configuration);
}
use of org.knime.core.data.DataColumnSpec in project knime-core by knime.
the class CollectionSplitNodeModel method getTargetColIndex.
/**
* Validate settings and get the target column index.
*/
private int getTargetColIndex(final DataTableSpec spec) throws InvalidSettingsException {
String colName = m_settings.getCollectionColName();
if (colName == null || colName.length() == 0) {
throw new InvalidSettingsException("Not configured");
}
final int colIndex = spec.findColumnIndex(colName);
if (colIndex < 0) {
throw new InvalidSettingsException("No such column: " + colName);
}
DataColumnSpec cs = spec.getColumnSpec(colIndex);
if (!cs.getType().isCompatible(CollectionDataValue.class)) {
throw new InvalidSettingsException("Column \"" + colName + "\" does not contain collection.");
}
return colIndex;
}
use of org.knime.core.data.DataColumnSpec in project knime-core by knime.
the class CollectionSplitNodeModel method getColSpecsByElementNames.
/**
* Get new column specs as inferred from the element names in the
* collection column.
*/
private DataColumnSpec[] getColSpecsByElementNames(final DataTableSpec spec) throws InvalidSettingsException {
int colIndex = getTargetColIndex(spec);
DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
List<String> elementNames = colSpec.getElementNames();
if (elementNames.isEmpty()) {
throw new InvalidSettingsException("Input column \"" + colSpec.getName() + "\" does not provide element names; " + "consider to change option in dialog or make sure that" + "the input table contains the necessary information.");
}
DataType type = colSpec.getType().getCollectionElementType();
HashSet<String> hashNames = new HashSet<String>();
for (DataColumnSpec s : spec) {
hashNames.add(s.getName());
}
if (m_settings.isReplaceInputColumn()) {
hashNames.remove(colSpec.getName());
}
DataColumnSpec[] newColSpec = new DataColumnSpec[elementNames.size()];
for (int i = 0; i < newColSpec.length; i++) {
String baseName = elementNames.get(i);
int uniquifier = 1;
while (!hashNames.add(baseName)) {
baseName = elementNames.get(i) + "(#" + (uniquifier++) + ")";
}
newColSpec[i] = new DataColumnSpecCreator(baseName, type).createSpec();
}
return newColSpec;
}
use of org.knime.core.data.DataColumnSpec in project knime-core by knime.
the class CollectionSplitNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable table = inData[0];
DataTableSpec spec = table.getDataTableSpec();
ExecutionMonitor execForCR = exec;
// validate settings
getTargetColIndex(spec);
DataColumnSpec[] colSpecs;
switch(m_settings.getCountElementsPolicy()) {
case Count:
execForCR = exec.createSubProgress(0.7);
ExecutionMonitor e = exec.createSubProgress(0.3);
colSpecs = countNewColumns(table, e);
break;
case UseElementNamesOrFail:
colSpecs = getColSpecsByElementNames(spec);
break;
case BestEffort:
try {
colSpecs = getColSpecsByElementNames(spec);
} catch (InvalidSettingsException ise) {
execForCR = exec.createSubProgress(0.7);
e = exec.createSubProgress(0.3);
colSpecs = countNewColumns(table, e);
}
break;
default:
throw new InvalidSettingsException("Unsupported policy: " + m_settings.getCountElementsPolicy());
}
Pair<ColumnRearranger, SplitCellFactory> pair = createColumnRearranger(spec, colSpecs);
BufferedDataTable out = exec.createColumnRearrangeTable(table, pair.getFirst(), execForCR);
String warnMessage = pair.getSecond().getWarnMessage();
if (warnMessage != null) {
setWarningMessage(warnMessage);
}
if (m_settings.isDetermineMostSpecificDataType()) {
out = refineTypes(out, pair.getSecond(), exec);
}
return new BufferedDataTable[] { out };
}
use of org.knime.core.data.DataColumnSpec in project knime-core by knime.
the class CollectionSplitNodeModel method refineTypes.
/**
* Retype the argument table to use the types as determined by the
* cell factory.
*/
private BufferedDataTable refineTypes(final BufferedDataTable table, final SplitCellFactory fac, final ExecutionContext exec) {
HashMap<String, Integer> colMap = new HashMap<String, Integer>();
DataTableSpec spec = table.getDataTableSpec();
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < spec.getNumColumns(); i++) {
colMap.put(spec.getColumnSpec(i).getName(), i);
newColSpecs[i] = spec.getColumnSpec(i);
}
DataColumnSpec[] oldReplacedSpecs = fac.getColumnSpecs();
DataType[] mostSpecificTypes = fac.getCommonTypes();
DataColumnDomain[] domains = fac.getDomains();
for (int i = 0; i < oldReplacedSpecs.length; i++) {
DataColumnSpec s = oldReplacedSpecs[i];
Integer index = colMap.get(s.getName());
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
creator.setType(mostSpecificTypes[i]);
creator.setDomain(domains[i]);
newColSpecs[index] = creator.createSpec();
}
DataTableSpec newSpec = new DataTableSpec(spec.getName(), newColSpecs);
return exec.createSpecReplacerTable(table, newSpec);
}
Aggregations