use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class AutoBinnerLearnNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
DataTableSpec dataSpec = (DataTableSpec) inSpecs[0];
AutoBinner binner = new AutoBinner(m_settings);
return binner.getOutputSpec(dataSpec);
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class AutoBinner method calcDomainBoundsIfNeccessary.
/**
* Determines the per column min/max values of the given data if not already present in the domain.
*
* @param data the data
* @param exec the execution context
* @param recalcValuesFor The columns
* @return The data with extended domain information
* @throws InvalidSettingsException ...
* @throws CanceledExecutionException ...
*/
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
return data;
}
List<Integer> valuesI = new ArrayList<Integer>();
for (String colName : recalcValuesFor) {
DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
if (!colSpec.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
}
if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
}
}
if (valuesI.isEmpty()) {
return data;
}
Map<Integer, Double> min = new HashMap<Integer, Double>();
Map<Integer, Double> max = new HashMap<Integer, Double>();
for (int col : valuesI) {
min.put(col, Double.MAX_VALUE);
max.put(col, Double.MIN_VALUE);
}
int c = 0;
for (DataRow row : data) {
c++;
exec.checkCanceled();
exec.setProgress(c / (double) data.getRowCount());
for (int col : valuesI) {
double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
if (min.get(col) > val) {
min.put(col, val);
}
if (max.get(col) < val) {
min.put(col, val);
}
}
}
List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
int cc = 0;
for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
if (recalcValuesFor.contains(columnSpec.getName())) {
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
specCreator.setDomain(domainCreator.createDomain());
DataColumnSpec newColSpec = specCreator.createSpec();
newColSpecList.add(newColSpec);
} else {
newColSpecList.add(columnSpec);
}
cc++;
}
DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
return newDataTable;
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class CategoryToNumberNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
if (m_settings.getIncludedColumns().length == 0) {
// nothing to convert, let's return the input table.
setWarningMessage("No columns selected," + " returning input.");
}
BufferedDataTable inData = (BufferedDataTable) inObjects[0];
DataTableSpec inSpec = (DataTableSpec) inObjects[0].getSpec();
ColumnRearranger rearranger = createRearranger(inSpec);
BufferedDataTable outTable = exec.createColumnRearrangeTable(inData, rearranger, exec);
// the optional PMML in port (can be null)
PMMLPortObject inPMMLPort = (PMMLPortObject) inObjects[1];
PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(inPMMLPort, rearranger.createSpec());
PMMLPortObject outPMMLPort = new PMMLPortObject(creator.createSpec(), inPMMLPort);
for (CategoryToNumberCellFactory factory : m_factories) {
PMMLMapValuesTranslator trans = new PMMLMapValuesTranslator(factory.getConfig(), new DerivedFieldMapper(inPMMLPort));
outPMMLPort.addGlobalTransformations(trans.exportToTransDict());
}
return new PortObject[] { outTable, outPMMLPort };
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class CategoryToNumberNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
DataTableSpec inSpec = (DataTableSpec) inSpecs[0];
List<String> inputCols = new ArrayList<String>();
for (DataColumnSpec column : inSpec) {
if (column.getType().isCompatible(StringValue.class)) {
inputCols.add(column.getName());
}
}
// Auto configuration when included columns are not set
if (null == m_settings.getIncludedColumns()) {
// when there is no column with nominal data
if (inputCols.isEmpty()) {
throw new InvalidSettingsException("No column in " + "the input compatible to \"StringValue\".");
} else {
// auto-configure
m_settings.setIncludedColumns(inputCols.toArray(new String[inputCols.size()]));
}
} else {
if (!m_settings.getIncludeAll()) {
if (m_settings.getIncludedColumns().length == 0) {
setWarningMessage("No columns selected.");
}
List<String> included = new ArrayList<String>();
included.addAll(Arrays.asList(m_settings.getIncludedColumns()));
included.removeAll(inputCols);
if (!included.isEmpty()) {
// output first missing column
throw new InvalidSettingsException("Missing column: " + included.get(0).toString());
}
} else {
if (inputCols.isEmpty()) {
throw new InvalidSettingsException("No column in " + "the input compatible to \"StringValue\".");
} else {
// automatically add all columns
m_settings.setIncludedColumns(inputCols.toArray(new String[inputCols.size()]));
}
}
}
ColumnRearranger rearranger = createRearranger(inSpec);
PMMLPortObjectSpec pmmlSpec = (PMMLPortObjectSpec) inSpecs[1];
PMMLPortObjectSpecCreator pmmlSpecCreator = new PMMLPortObjectSpecCreator(pmmlSpec, inSpec);
pmmlSpecCreator.addPreprocColNames(inputCols);
return new PortObjectSpec[] { rearranger.createSpec(), pmmlSpecCreator.createSpec() };
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class TreeEnsembleClassificationPredictorNodeModel method createStreamableOperator.
/**
* {@inheritDoc}
*/
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
return new StreamableOperator() {
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
TreeEnsembleModelPortObject model = (TreeEnsembleModelPortObject) ((PortObjectInput) inputs[0]).getPortObject();
TreeEnsembleModelPortObjectSpec modelSpec = model.getSpec();
DataTableSpec dataSpec = (DataTableSpec) inSpecs[1];
final TreeEnsemblePredictor pred = new TreeEnsemblePredictor(modelSpec, model, dataSpec, m_configuration);
ColumnRearranger rearranger = pred.getPredictionRearranger();
StreamableFunction func = rearranger.createStreamableFunction(1, 0);
func.runFinal(inputs, outputs, exec);
}
};
}
Aggregations