use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class ConditionalBoxPlotNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
double nrRows = inData[0].size();
int rowCount = 0;
int numericIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.numericColumn());
int nominalIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.nominalColumn());
Map<String, Map<Double, Set<RowKey>>> data = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
// some default values .. if one column only has missing values.
for (DataCell d : inData[0].getDataTableSpec().getColumnSpec(nominalIndex).getDomain().getValues()) {
String name = ((StringValue) d).getStringValue();
m_mildOutliers.put(name, new HashMap<Double, Set<RowKey>>());
m_extremeOutliers.put(name, new HashMap<Double, Set<RowKey>>());
}
for (DataRow r : inData[0]) {
exec.checkCanceled();
exec.setProgress(rowCount++ / nrRows, "Separating...");
if (!m_settings.showMissingValues()) {
if (r.getCell(nominalIndex).isMissing()) {
// missing cell in nominal values is unwanted?
continue;
}
}
String nominal = replaceSpaces(r.getCell(nominalIndex).toString());
if (r.getCell(numericIndex).isMissing()) {
// ignore missing cells in numeric column
continue;
}
DoubleValue numeric = (DoubleValue) r.getCell(numericIndex);
Map<Double, Set<RowKey>> map = data.get(nominal);
if (map == null) {
map = new LinkedHashMap<Double, Set<RowKey>>();
}
Set<RowKey> set = map.get(numeric.getDoubleValue());
if (set == null) {
set = new HashSet<RowKey>();
}
set.add(r.getKey());
map.put(numeric.getDoubleValue(), set);
data.put(nominal, map);
}
List<String> keys = new ArrayList<String>(data.keySet());
boolean ignoreMissingValues = false;
if (m_settings.showMissingValues() && !keys.contains(DataType.getMissingCell().toString())) {
// we promised to create data for missing values..
// if there aren't any.. we have to create them ourselves
setWarningMessage("No missing values found.");
ignoreMissingValues = true;
}
Collections.sort(keys);
DataColumnSpec[] colSpecs = createColumnSpec(inData[0].getDataTableSpec().getColumnSpec(nominalIndex), ignoreMissingValues);
if (keys.size() == 0) {
setWarningMessage("All classes are empty.");
}
int dataSetNr = 0;
// for (String d : keys) {
for (DataColumnSpec dcs : colSpecs) {
String d = dcs.getName();
if (data.get(d) == null || keys.size() == 0) {
dataSetNr++;
continue;
}
exec.checkCanceled();
exec.setProgress(dataSetNr / (double) keys.size(), "Creating statistics");
Map<Double, Set<RowKey>> extremeOutliers = new LinkedHashMap<Double, Set<RowKey>>();
Map<Double, Set<RowKey>> mildOutliers = new LinkedHashMap<Double, Set<RowKey>>();
double[] stats = calculateStatistic(data.get(d), mildOutliers, extremeOutliers);
double minimum = stats[BoxPlotNodeModel.MIN];
double maximum = stats[BoxPlotNodeModel.MAX];
DataColumnSpecCreator creator = new DataColumnSpecCreator(colSpecs[dataSetNr]);
creator.setDomain(new DataColumnDomainCreator(new DoubleCell(minimum), new DoubleCell(maximum)).createDomain());
colSpecs[dataSetNr] = creator.createSpec();
m_statistics.put(colSpecs[dataSetNr], stats);
m_mildOutliers.put(d, mildOutliers);
m_extremeOutliers.put(d, extremeOutliers);
dataSetNr++;
}
DataTableSpec dts = new DataTableSpec("MyTempTable", colSpecs);
DataContainer cont = new DataContainer(dts);
cont.close();
m_dataArray = new DefaultDataArray(cont.getTable(), 1, 2);
cont.dispose();
if (ignoreMissingValues) {
DataColumnSpec[] temp = new DataColumnSpec[colSpecs.length + 1];
DataColumnSpec missing = new DataColumnSpecCreator(DataType.getMissingCell().toString(), DataType.getMissingCell().getType()).createSpec();
int i = 0;
while (missing.getName().compareTo(colSpecs[i].getName()) > 0) {
temp[i] = colSpecs[i];
i++;
}
temp[i++] = missing;
while (i < temp.length) {
temp[i] = colSpecs[i - 1];
i++;
}
colSpecs = temp;
}
/* Save inSpec of the numeric column to provide the view a way to
* consider the input domain for normalization. */
m_numColSpec = inData[0].getDataTableSpec().getColumnSpec(numericIndex);
return new BufferedDataTable[] { createOutputTable(inData[0].getDataTableSpec(), colSpecs, exec).getTable() };
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class Normalizer3NodeModel method calculate.
/**
* New normalized {@link org.knime.core.data.DataTable} is created depending on the mode.
*/
/**
* @param inData The input data.
* @param exec For BufferedDataTable creation and progress.
* @return the result of the calculation
* @throws Exception If the node calculation fails for any reason.
*/
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getSpec();
// extract selected numeric columns
String[] includedColumns = getIncludedComlumns(inSpec);
Normalizer2 ntable = new Normalizer2(inTable, includedColumns);
long rowcount = inTable.size();
ExecutionContext prepareExec = exec.createSubExecutionContext(0.3);
AffineTransTable outTable;
boolean fixDomainBounds = false;
switch(m_config.getMode()) {
case MINMAX:
fixDomainBounds = true;
outTable = ntable.doMinMaxNorm(m_config.getMax(), m_config.getMin(), prepareExec);
break;
case Z_SCORE:
outTable = ntable.doZScoreNorm(prepareExec);
break;
case DECIMALSCALING:
outTable = ntable.doDecimalScaling(prepareExec);
break;
default:
throw new InvalidSettingsException("No mode set");
}
if (outTable.getErrorMessage() != null) {
// something went wrong, report and throw an exception
throw new Exception(outTable.getErrorMessage());
}
if (ntable.getErrorMessage() != null) {
// something went wrong during initialization, report.
setWarningMessage(ntable.getErrorMessage());
}
DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, includedColumns);
AffineTransConfiguration configuration = outTable.getConfiguration();
DataTableSpec spec = outTable.getDataTableSpec();
// the same transformation, which is not guaranteed to snap to min/max)
if (fixDomainBounds) {
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < newColSpecs.length; i++) {
newColSpecs[i] = spec.getColumnSpec(i);
}
for (int i = 0; i < includedColumns.length; i++) {
int index = spec.findColumnIndex(includedColumns[i]);
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
domCreator.setLowerBound(new DoubleCell(m_config.getMin()));
domCreator.setUpperBound(new DoubleCell(m_config.getMax()));
creator.setDomain(domCreator.createDomain());
newColSpecs[index] = creator.createSpec();
}
spec = new DataTableSpec(spec.getName(), newColSpecs);
}
ExecutionMonitor normExec = exec.createSubProgress(.7);
BufferedDataContainer container = exec.createDataContainer(spec);
long count = 1;
for (DataRow row : outTable) {
normExec.checkCanceled();
normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
container.addRowToTable(row);
count++;
}
container.close();
return new CalculationResult(container.getTable(), modelSpec, configuration);
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class EditNominalDomainDicNodeModel method mergeTableSpecs.
/**
* @param orgSpec
* @param orgIndexToNewDomainValuesMap
* @return
*/
private static DataTableSpecCreator mergeTableSpecs(final DataTableSpec orgSpec, final Map<Integer, Set<DataCell>> orgIndexToNewDomainValuesMap) {
DataTableSpecCreator newSpecCreator = new DataTableSpecCreator(orgSpec).dropAllColumns();
for (int i = 0; i < orgSpec.getNumColumns(); i++) {
if (orgIndexToNewDomainValuesMap.containsKey(i)) {
DataColumnSpec orgDataSpec = orgSpec.getColumnSpec(i);
DataColumnSpecCreator dataColumnSpecCreator = new DataColumnSpecCreator(orgDataSpec);
DataColumnDomainCreator yetAnotherCreator = new DataColumnDomainCreator(orgDataSpec.getDomain());
yetAnotherCreator.setValues(orgIndexToNewDomainValuesMap.get(i));
dataColumnSpecCreator.setDomain(yetAnotherCreator.createDomain());
newSpecCreator.addColumns(dataColumnSpecCreator.createSpec());
} else {
newSpecCreator.addColumns(orgSpec.getColumnSpec(i));
}
}
return newSpecCreator;
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class EditNumericDomainNodeModel method processDomainSettings.
private DataTableSpec processDomainSettings(final DataTableSpec dataTableSpec) throws InvalidSettingsException {
if (m_configuration == null) {
throw new InvalidSettingsException("Missing Configuration.");
}
EditNumericDomainConfiguration config = m_configuration;
FilterResult filterResult = config.getColumnspecFilterConfig().applyTo(dataTableSpec);
List<DataColumnSpec> newColumnSpecs = new ArrayList<DataColumnSpec>(dataTableSpec.getNumColumns());
String[] columnNames = dataTableSpec.getColumnNames();
Set<String> includeSet = new HashSet<String>();
Collections.addAll(includeSet, filterResult.getIncludes());
for (int i = 0; i < dataTableSpec.getNumColumns(); i++) {
DataColumnSpec columnSpec = dataTableSpec.getColumnSpec(i);
String columnName = columnNames[i];
if (includeSet.contains(columnName)) {
DataColumnSpecCreator columnSpecCreator = new DataColumnSpecCreator(columnSpec);
DataColumnDomainCreator domainCreator = new //
DataColumnDomainCreator(//
createCell(columnName, columnSpec.getType(), config.getLowerBound()), createCell(columnName, columnSpec.getType(), config.getUpperBound()));
domainCreator.setValues(columnSpec.getDomain().getValues());
columnSpecCreator.setDomain(domainCreator.createDomain());
newColumnSpecs.add(columnSpecCreator.createSpec());
} else {
newColumnSpecs.add(columnSpec);
}
}
StringBuilder warnings = new StringBuilder();
if (includeSet.isEmpty()) {
warnings.append("No columns are included.");
}
if (filterResult.getRemovedFromIncludes().length > 0) {
warnings.append("\nFollowing columns are configured but no longer exist: " + ConvenienceMethods.getShortStringFrom(Arrays.asList(filterResult.getRemovedFromIncludes()), 5));
}
if (warnings.length() > 0) {
setWarningMessage(warnings.toString());
}
return new DataTableSpecCreator(dataTableSpec).dropAllColumns().addColumns(newColumnSpecs.toArray(new DataColumnSpec[newColumnSpecs.size()])).createSpec();
}
use of org.knime.core.data.DataColumnDomainCreator in project knime-core by knime.
the class PMCCPortObjectAndSpec method createOutSpec.
/**
* Creates output spec for correlation table.
* @param names the column names being analyzed.
* @return The new output spec.
* @since 2.6
*/
public static DataTableSpec createOutSpec(final String[] names) {
DataColumnSpec[] colSpecs = new DataColumnSpec[names.length];
for (int i = 0; i < colSpecs.length; i++) {
DataColumnSpecCreator c = new DataColumnSpecCreator(names[i], DoubleCell.TYPE);
c.setDomain(new DataColumnDomainCreator(MIN_VALUE_CELL, MAX_VALUE_CELL).createDomain());
colSpecs[i] = c.createSpec();
}
return new DataTableSpec("Correlation values", colSpecs);
}
Aggregations