use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class AutoBinner method calcDomainBoundsIfNeccessary.
/**
* Determines the per column min/max values of the given data if not already present in the domain.
*
* @param data the data
* @param exec the execution context
* @param recalcValuesFor The columns
* @return The data with extended domain information
* @throws InvalidSettingsException ...
* @throws CanceledExecutionException ...
*/
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
return data;
}
List<Integer> valuesI = new ArrayList<Integer>();
for (String colName : recalcValuesFor) {
DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
if (!colSpec.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
}
if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
}
}
if (valuesI.isEmpty()) {
return data;
}
Map<Integer, Double> min = new HashMap<Integer, Double>();
Map<Integer, Double> max = new HashMap<Integer, Double>();
for (int col : valuesI) {
min.put(col, Double.MAX_VALUE);
max.put(col, Double.MIN_VALUE);
}
int c = 0;
for (DataRow row : data) {
c++;
exec.checkCanceled();
exec.setProgress(c / (double) data.getRowCount());
for (int col : valuesI) {
double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
if (min.get(col) > val) {
min.put(col, val);
}
if (max.get(col) < val) {
min.put(col, val);
}
}
}
List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
int cc = 0;
for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
if (recalcValuesFor.contains(columnSpec.getName())) {
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
specCreator.setDomain(domainCreator.createDomain());
DataColumnSpec newColSpec = specCreator.createSpec();
newColSpecList.add(newColSpec);
} else {
newColSpecList.add(columnSpec);
}
cc++;
}
DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
return newDataTable;
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class NormalizerNodeModel method calculate.
/**
* New normalized {@link org.knime.core.data.DataTable} is created depending
* on the mode.
*/
/**
* @param inData The input data.
* @param exec For BufferedDataTable creation and progress.
* @return the result of the calculation
* @throws Exception If the node calculation fails for any reason.
*/
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getSpec();
// extract selected numeric columns
updateNumericColumnSelection(inSpec);
Normalizer ntable = new Normalizer(inTable, m_columns);
long rowcount = inTable.size();
ExecutionMonitor prepareExec = exec.createSubProgress(0.3);
AffineTransTable outTable;
boolean fixDomainBounds = false;
switch(m_mode) {
case NONORM_MODE:
return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
case MINMAX_MODE:
fixDomainBounds = true;
outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
break;
case ZSCORE_MODE:
outTable = ntable.doZScoreNorm(prepareExec);
break;
case DECIMALSCALING_MODE:
outTable = ntable.doDecimalScaling(prepareExec);
break;
default:
throw new Exception("No mode set");
}
if (outTable.getErrorMessage() != null) {
// something went wrong, report and throw an exception
throw new Exception(outTable.getErrorMessage());
}
if (ntable.getErrorMessage() != null) {
// something went wrong during initialization, report.
setWarningMessage(ntable.getErrorMessage());
}
DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
AffineTransConfiguration configuration = outTable.getConfiguration();
DataTableSpec spec = outTable.getDataTableSpec();
// the same transformation, which is not guaranteed to snap to min/max)
if (fixDomainBounds) {
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < newColSpecs.length; i++) {
newColSpecs[i] = spec.getColumnSpec(i);
}
for (int i = 0; i < m_columns.length; i++) {
int index = spec.findColumnIndex(m_columns[i]);
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
domCreator.setLowerBound(new DoubleCell(m_min));
domCreator.setUpperBound(new DoubleCell(m_max));
creator.setDomain(domCreator.createDomain());
newColSpecs[index] = creator.createSpec();
}
spec = new DataTableSpec(spec.getName(), newColSpecs);
}
ExecutionMonitor normExec = exec.createSubProgress(.7);
BufferedDataContainer container = exec.createDataContainer(spec);
long count = 1;
for (DataRow row : outTable) {
normExec.checkCanceled();
normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
container.addRowToTable(row);
count++;
}
container.close();
return new CalculationResult(container.getTable(), modelSpec, configuration);
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class MissingValueHandling3Table method createTableSpecPrivate.
/* private helper that assumes the ColSetting to have the right format. */
private static DataTableSpec createTableSpecPrivate(final DataTableSpec spec, final MissingValueHandling2ColSetting[] sets) {
assert (spec.getNumColumns() == sets.length);
DataColumnSpec[] newSpecs = new DataColumnSpec[sets.length];
for (int i = 0; i < sets.length; i++) {
DataColumnSpec colSpec = spec.getColumnSpec(i);
DataColumnSpec newSpec = colSpec;
if (sets[i].getMethod() == MissingValueHandling2ColSetting.METHOD_FIX_VAL) {
DataColumnDomain dom = colSpec.getDomain();
Comparator<DataCell> comp = colSpec.getType().getComparator();
DataCell fixCell = sets[i].getFixCell();
boolean changed = false;
DataCell l = dom.getLowerBound();
// (but rather be null). It may happen anyway, we catch it here
if (l != null && !l.isMissing() && (comp.compare(fixCell, l) < 0)) {
changed = true;
l = fixCell;
}
DataCell u = dom.getUpperBound();
if (u != null && !u.isMissing() && (comp.compare(fixCell, u) > 0)) {
changed = true;
u = fixCell;
}
Set<DataCell> vals = dom.getValues();
if (vals != null && !vals.contains(fixCell)) {
changed = true;
vals = new LinkedHashSet<DataCell>(vals);
vals.add(fixCell);
}
if (changed) {
DataColumnDomain newDom = new DataColumnDomainCreator(vals, l, u).createDomain();
DataColumnSpecCreator c = new DataColumnSpecCreator(colSpec);
c.setDomain(newDom);
newSpec = c.createSpec();
}
}
newSpecs[i] = newSpec;
}
return new DataTableSpec(newSpecs);
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class PMMLDataDictionaryTranslator method addColSpecsForDataFields.
/**
* @param pmmlDoc the PMML document to analyze
* @param colSpecs the list to add the data column specs to
*/
private void addColSpecsForDataFields(final PMMLDocument pmmlDoc, final List<DataColumnSpec> colSpecs) {
DataDictionary dict = pmmlDoc.getPMML().getDataDictionary();
for (DataField dataField : dict.getDataFieldArray()) {
String name = dataField.getName();
DataType dataType = getKNIMEDataType(dataField.getDataType());
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(name, dataType);
DataColumnDomain domain = null;
if (dataType.isCompatible(NominalValue.class)) {
Value[] valueArray = dataField.getValueArray();
DataCell[] cells;
if (DataType.getType(StringCell.class).equals(dataType)) {
if (dataField.getIntervalArray().length > 0) {
throw new IllegalArgumentException("Intervals cannot be defined for Strings.");
}
cells = new StringCell[valueArray.length];
if (valueArray.length > 0) {
for (int j = 0; j < cells.length; j++) {
cells[j] = new StringCell(valueArray[j].getValue());
}
}
domain = new DataColumnDomainCreator(cells).createDomain();
}
} else if (dataType.isCompatible(DoubleValue.class)) {
Double leftMargin = null;
Double rightMargin = null;
Interval[] intervalArray = dataField.getIntervalArray();
if (intervalArray != null && intervalArray.length > 0) {
Interval interval = dataField.getIntervalArray(0);
leftMargin = interval.getLeftMargin();
rightMargin = interval.getRightMargin();
} else if (dataField.getValueArray() != null && dataField.getValueArray().length > 0) {
// try to derive the bounds from the values
Value[] valueArray = dataField.getValueArray();
List<Double> values = new ArrayList<Double>();
for (int j = 0; j < valueArray.length; j++) {
String value = "";
try {
value = valueArray[j].getValue();
values.add(Double.parseDouble(value));
} catch (Exception e) {
throw new IllegalArgumentException("Skipping domain calculation. " + "Value \"" + value + "\" cannot be cast to double.");
}
}
leftMargin = Collections.min(values);
rightMargin = Collections.max(values);
}
if (leftMargin != null && rightMargin != null) {
// set the bounds of the domain if available
DataCell lowerBound = null;
DataCell upperBound = null;
if (DataType.getType(IntCell.class).equals(dataType)) {
lowerBound = new IntCell(leftMargin.intValue());
upperBound = new IntCell(rightMargin.intValue());
} else if (DataType.getType(DoubleCell.class).equals(dataType)) {
lowerBound = new DoubleCell(leftMargin);
upperBound = new DoubleCell(rightMargin);
}
domain = new DataColumnDomainCreator(lowerBound, upperBound).createDomain();
} else {
domain = new DataColumnDomainCreator().createDomain();
}
}
specCreator.setDomain(domain);
colSpecs.add(specCreator.createSpec());
m_dictFields.add(name);
}
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class TreeNominalColumnDataTest method createPCATestData.
private static Pair<TreeNominalColumnData, TreeTargetNominalColumnData> createPCATestData(final TreeEnsembleLearnerConfiguration config) {
DataColumnSpec colSpec = new DataColumnSpecCreator("test-col", StringCell.TYPE).createSpec();
final String[] attVals = new String[] { "A", "B", "C", "D", "E" };
final String[] classes = new String[] { "T1", "T2", "T3" };
TreeNominalColumnDataCreator colCreator = new TreeNominalColumnDataCreator(colSpec);
DataColumnSpecCreator specCreator = new DataColumnSpecCreator("target-col", StringCell.TYPE);
specCreator.setDomain(new DataColumnDomainCreator(Arrays.stream(classes).distinct().map(s -> new StringCell(s)).toArray(i -> new StringCell[i])).createDomain());
DataColumnSpec targetSpec = specCreator.createSpec();
TreeTargetColumnDataCreator targetCreator = new TreeTargetNominalColumnDataCreator(targetSpec);
long rowKeyCounter = 0;
final int[][] classDistributions = new int[][] { { 40, 10, 10 }, { 10, 40, 10 }, { 20, 30, 10 }, { 20, 15, 25 }, { 10, 5, 45 } };
for (int i = 0; i < attVals.length; i++) {
for (int j = 0; j < classes.length; j++) {
for (int k = 0; k < classDistributions[i][j]; k++) {
RowKey key = RowKey.createRowKey(rowKeyCounter++);
colCreator.add(key, new StringCell(attVals[i]));
targetCreator.add(key, new StringCell(classes[j]));
}
}
}
final TreeNominalColumnData testColData = colCreator.createColumnData(0, config);
testColData.getMetaData().setAttributeIndex(0);
return Pair.create(testColData, (TreeTargetNominalColumnData) targetCreator.createColumnData());
}
Aggregations