use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class TwoSampleTTestNodeDialog method initGroupComboBoxes.
private void initGroupComboBoxes(final JComboBox groupOne) {
Object selected = groupOne.getSelectedItem();
groupOne.removeAllItems();
String col = m_groupingColumn.getSelectedColumn();
if (col != null && m_spec.containsName(col)) {
DataColumnSpec colSpec = m_spec.getColumnSpec(col);
DataColumnDomain domain = colSpec.getDomain();
if (domain.hasValues()) {
for (DataCell cell : domain.getValues()) {
groupOne.addItem(cell.toString());
}
} else if (domain.hasBounds()) {
groupOne.addItem(domain.getLowerBound().toString());
groupOne.addItem(domain.getUpperBound().toString());
}
}
groupOne.setSelectedItem(selected);
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class StatisticsTable method calculateAllMoments.
/**
* Calculates <b>all the statistical moments in one pass </b>. After the
* call of this operation, the statistical moments can be obtained very fast
* from all the other methods.
*
* @param rowCount Row count of table for progress, may be NaN if unknown.
* @param exec object to check with if user canceled the operation
* @throws CanceledExecutionException if user canceled
* @throws IllegalArgumentException if rowCount argument < 0
*/
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
if (rowCount < 0.0) {
throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
}
DataTableSpec origSpec = m_table.getDataTableSpec();
int numOfCols = origSpec.getNumColumns();
// the number of non-missing cells in each column
int[] validCount = new int[numOfCols];
double[] sumsquare = new double[numOfCols];
final DataValueComparator[] comp = new DataValueComparator[numOfCols];
for (int i = 0; i < numOfCols; i++) {
sumsquare[i] = 0.0;
validCount[i] = 0;
comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
assert comp[i] != null;
}
int nrRows = 0;
for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
DataRow row = rowIt.next();
if (exec != null) {
double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
// throws exception if user canceled
exec.checkCanceled();
}
for (int c = 0; c < numOfCols; c++) {
final DataCell cell = row.getCell(c);
if (!(cell.isMissing())) {
// keep the min and max for each column
if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
m_minValues[c] = cell;
}
if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
m_maxValues[c] = cell;
}
// for double columns we calc the sum (for the mean calc)
DataType type = origSpec.getColumnSpec(c).getType();
if (type.isCompatible(DoubleValue.class)) {
double d = ((DoubleValue) cell).getDoubleValue();
if (Double.isNaN(m_sum[c])) {
m_sum[c] = d;
} else {
m_sum[c] += d;
}
sumsquare[c] += d * d;
validCount[c]++;
}
} else {
m_missingValueCnt[c]++;
}
}
calculateMomentInSubClass(row);
}
m_nrRows = nrRows;
for (int j = 0; j < numOfCols; j++) {
// missing values
if (validCount[j] == 0 || m_minValues[j] == null) {
DataCell mc = DataType.getMissingCell();
m_minValues[j] = mc;
m_maxValues[j] = mc;
m_meanValues[j] = Double.NaN;
m_varianceValues[j] = Double.NaN;
} else {
m_meanValues[j] = m_sum[j] / validCount[j];
if (validCount[j] > 1) {
m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
} else {
m_varianceValues[j] = 0.0;
}
// round-off errors resulting in negative variance values
if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
m_varianceValues[j] = 0.0;
}
assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
}
}
// compute resulting table spec
int nrCols = m_table.getDataTableSpec().getNumColumns();
DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
for (int c = 0; c < nrCols; c++) {
DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
// we create domains with our bounds.
Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
creator.setDomain(newDomain);
cSpec[c] = creator.createSpec();
}
m_tSpec = new DataTableSpec(cSpec);
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class NaiveBayesLearnerNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
// check the internal variables if they are valid
final String classColumn = m_classifyColumnName.getStringValue();
if (classColumn == null || classColumn.length() < 1) {
throw new InvalidSettingsException("Please define the classification column");
}
final PortObjectSpec inSpec = inSpecs[TRAINING_DATA_PORT];
if (!(inSpec instanceof DataTableSpec)) {
throw new IllegalArgumentException("Invalid input data");
}
final DataTableSpec tableSpec = (DataTableSpec) inSpec;
if (tableSpec.findColumnIndex(classColumn) < 0) {
throw new InvalidSettingsException("Please define the classification column");
}
if (tableSpec.getNumColumns() < 2) {
throw new InvalidSettingsException("Input table should contain at least 2 columns");
}
final int maxNoOfNominalVals = m_maxNoOfNominalVals.getIntValue();
// check if the table contains at least one nominal column
// and check each nominal column with a valid domain
// if it contains more values than allowed
boolean containsNominalCol = false;
final List<String> toBigNominalColumns = new ArrayList<>();
for (int i = 0, length = tableSpec.getNumColumns(); i < length; i++) {
final DataColumnSpec colSpec = tableSpec.getColumnSpec(i);
if (colSpec.getType().isCompatible(NominalValue.class)) {
containsNominalCol = true;
final DataColumnDomain domain = colSpec.getDomain();
if (domain != null && domain.getValues() != null) {
if (domain.getValues().size() > maxNoOfNominalVals) {
// unique values
if (colSpec.getName().equals(classColumn)) {
// contains too many unique values
throw new InvalidSettingsException("Class column domain contains too many unique values" + " (" + domain.getValues().size() + ")");
}
toBigNominalColumns.add(colSpec.getName() + " (" + domain.getValues().size() + ")");
}
}
}
}
if (!containsNominalCol) {
throw new InvalidSettingsException("No possible class attribute found in input table");
}
if (toBigNominalColumns.size() == 1) {
setWarningMessage("Column " + toBigNominalColumns.get(0) + " will possibly be skipped.");
} else if (toBigNominalColumns.size() > 1) {
final StringBuilder buf = new StringBuilder();
buf.append("The following columns will possibly be skipped: ");
for (int i = 0, length = toBigNominalColumns.size(); i < length; i++) {
if (i != 0) {
buf.append(", ");
}
if (i > 3) {
buf.append("...");
break;
}
buf.append(toBigNominalColumns.get(i));
}
setWarningMessage(buf.toString());
}
if (tableSpec.getNumColumns() - toBigNominalColumns.size() < 1) {
throw new InvalidSettingsException("Not enough valid columns");
}
return new PortObjectSpec[] { new NaiveBayesPortObjectSpec(tableSpec, tableSpec.getColumnSpec(classColumn)) };
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class DecTreePredictorNodeModel method createOutTableSpec.
private DataTableSpec createOutTableSpec(final PortObjectSpec[] inSpecs) {
LinkedList<DataCell> predValues = null;
if (m_showDistribution.getBooleanValue()) {
predValues = getPredictionValues((PMMLPortObjectSpec) inSpecs[INMODELPORT]);
if (predValues == null) {
// no out spec can be determined
return null;
}
}
int numCols = (predValues == null ? 0 : predValues.size()) + 1;
DataTableSpec inSpec = (DataTableSpec) inSpecs[INDATAPORT];
UniqueNameGenerator nameGenerator = new UniqueNameGenerator(inSpec);
DataColumnSpec[] newCols = new DataColumnSpec[numCols];
/* Set bar renderer and domain [0,1] as default for the double cells
* containing the distribution */
// DataColumnProperties propsRendering = new DataColumnProperties(
// Collections.singletonMap(
// DataValueRenderer.PROPERTY_PREFERRED_RENDERER,
// DoubleBarRenderer.DESCRIPTION));
DataColumnDomain domain = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain();
// add all distribution columns
for (int i = 0; i < numCols - 1; i++) {
DataColumnSpecCreator colSpecCreator = nameGenerator.newCreator(predValues.get(i).toString(), DoubleCell.TYPE);
// colSpecCreator.setProperties(propsRendering);
colSpecCreator.setDomain(domain);
newCols[i] = colSpecCreator.createSpec();
}
// add the prediction column
newCols[numCols - 1] = nameGenerator.newColumn("Prediction (DecTree)", StringCell.TYPE);
DataTableSpec newColSpec = new DataTableSpec(newCols);
return new DataTableSpec(inSpec, newColSpec);
}
use of org.knime.core.data.DataColumnDomain in project knime-core by knime.
the class CollectionSplitNodeModel method refineTypes.
/**
* Retype the argument table to use the types as determined by the
* cell factory.
*/
private BufferedDataTable refineTypes(final BufferedDataTable table, final SplitCellFactory fac, final ExecutionContext exec) {
HashMap<String, Integer> colMap = new HashMap<String, Integer>();
DataTableSpec spec = table.getDataTableSpec();
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < spec.getNumColumns(); i++) {
colMap.put(spec.getColumnSpec(i).getName(), i);
newColSpecs[i] = spec.getColumnSpec(i);
}
DataColumnSpec[] oldReplacedSpecs = fac.getColumnSpecs();
DataType[] mostSpecificTypes = fac.getCommonTypes();
DataColumnDomain[] domains = fac.getDomains();
for (int i = 0; i < oldReplacedSpecs.length; i++) {
DataColumnSpec s = oldReplacedSpecs[i];
Integer index = colMap.get(s.getName());
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
creator.setType(mostSpecificTypes[i]);
creator.setDomain(domains[i]);
newColSpecs[index] = creator.createSpec();
}
DataTableSpec newSpec = new DataTableSpec(spec.getName(), newColSpecs);
return exec.createSpecReplacerTable(table, newSpec);
}
Aggregations