use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class PMMLRuleSetPredictorNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected DataTableSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
DataTableSpec original = (DataTableSpec) inSpecs[DATA_INDEX];
ColumnRearranger rearranger = new ColumnRearranger(original);
PMMLPortObjectSpec portObjectSpec = (PMMLPortObjectSpec) inSpecs[MODEL_INDEX];
List<DataColumnSpec> activeColumnList = portObjectSpec.getActiveColumnList();
List<DataColumnSpec> notFound = new ArrayList<DataColumnSpec>();
for (DataColumnSpec dataColumnSpec : activeColumnList) {
if (original.containsName(dataColumnSpec.getName())) {
DataColumnSpec origSpec = original.getColumnSpec(dataColumnSpec.getName());
if (!origSpec.getType().equals(dataColumnSpec.getType())) {
notFound.add(dataColumnSpec);
}
} else {
notFound.add(dataColumnSpec);
}
}
if (!notFound.isEmpty()) {
StringBuilder sb = new StringBuilder("Incompatible to the table, the following columns are not present, or have a wrong type:");
for (DataColumnSpec dataColumnSpec : notFound) {
sb.append("\n ").append(dataColumnSpec);
}
throw new InvalidSettingsException(sb.toString());
}
List<DataColumnSpec> targetCols = portObjectSpec.getTargetCols();
final DataType dataType = targetCols.isEmpty() ? StringCell.TYPE : targetCols.get(0).getType();
DataColumnSpecCreator specCreator;
if (m_doReplaceColumn.getBooleanValue()) {
String col = m_replaceColumn.getStringValue();
specCreator = new DataColumnSpecCreator(col, dataType);
} else {
specCreator = new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(original, m_outputColumn.getStringValue()), dataType);
}
SingleCellFactory dummy = new SingleCellFactory(specCreator.createSpec()) {
/**
* {@inheritDoc}
*/
@Override
public DataCell getCell(final DataRow row) {
throw new IllegalStateException();
}
};
if (m_addConfidence.getBooleanValue()) {
rearranger.append(new SingleCellFactory(new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(rearranger.createSpec(), m_confidenceColumn.getStringValue()), DoubleCell.TYPE).createSpec()) {
@Override
public DataCell getCell(final DataRow row) {
throw new IllegalStateException();
}
});
}
if (m_doReplaceColumn.getBooleanValue()) {
rearranger.replace(dummy, m_replaceColumn.getStringValue());
} else {
rearranger.append(dummy);
}
return new DataTableSpec[] { rearranger.createSpec() };
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class NumericOutliersReviser method replaceOutliers.
/**
* Replaces outliers found in the row input according to the selected replacement option. Additionally, the outlier
* replacement counts and new domains are calculated.
*
* @param exec the execution context
* @param in the row input whose outliers have to be treated
* @param out the row output whose outliers have been treated
* @param outlierModel the model storing the permitted intervals
* @param memberCounter the member counter
* @param outlierRepCounter the outlier replacement counter
* @param missingGroupsCounter the missing groups counter
* @throws Exception any exception to indicate an error, cancelation
*/
private void replaceOutliers(final ExecutionContext exec, final RowInput in, final RowOutput out, final NumericOutliersModel outlierModel, final MemberCounter memberCounter, final MemberCounter outlierRepCounter, final MemberCounter missingGroupsCounter) throws Exception {
// total number of outlier columns
final int noOutliers = m_outlierColNames.length;
// the in table spec
final DataTableSpec inSpec = in.getDataTableSpec();
// create column re-arranger to overwrite cells corresponding to outliers
final ColumnRearranger colRearranger = new ColumnRearranger(inSpec);
// store the positions where the outlier column names can be found in the input table
final int[] outlierIndices = calculateOutlierIndicies(inSpec);
final DataColumnSpec[] outlierSpecs = new DataColumnSpec[noOutliers];
for (int i = 0; i < noOutliers; i++) {
outlierSpecs[i] = inSpec.getColumnSpec(outlierIndices[i]);
}
// values are copied anyways by the re-arranger so there is no need to
// create new instances for each row
final DataCell[] treatedVals = new DataCell[noOutliers];
final AbstractCellFactory fac = new AbstractCellFactory(true, outlierSpecs) {
@Override
public DataCell[] getCells(final DataRow row) {
final GroupKey key = outlierModel.getKey(row, inSpec);
final Map<String, double[]> colsMap = outlierModel.getGroupIntervals(key);
for (int i = 0; i < noOutliers; i++) {
final DataCell curCell = row.getCell(outlierIndices[i]);
final DataCell treatedCell;
final String outlierColName = m_outlierColNames[i];
if (!curCell.isMissing()) {
// if the key exists treat the value otherwise we process an unkown group
if (colsMap != null) {
// increment the member counter
memberCounter.incrementMemberCount(outlierColName, key);
// treat the value of the cell if its a outlier
treatedCell = treatCellValue(colsMap.get(outlierColName), curCell);
} else {
missingGroupsCounter.incrementMemberCount(outlierColName, key);
treatedCell = curCell;
}
} else {
treatedCell = curCell;
}
// if we changed the value this is an outlier
if (!treatedCell.equals(curCell)) {
outlierRepCounter.incrementMemberCount(outlierColName, key);
}
// update the domain if necessary
if (m_updateDomain && !treatedCell.isMissing()) {
m_domainUpdater.updateDomain(outlierColName, ((DoubleValue) treatedCell).getDoubleValue());
}
treatedVals[i] = treatedCell;
}
return treatedVals;
}
};
// replace the outlier columns by their updated versions
colRearranger.replace(fac, outlierIndices);
// stream it
colRearranger.createStreamableFunction().runFinal(new PortInput[] { in }, new PortOutput[] { out }, exec);
exec.setProgress(1);
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class HistogramColumn method constructFromDataArray.
/**
* Constructs the helper data structures from the numeric hostigran models and the data as {@link DataArray}.
*
* @param histograms The numeric histograms.
* @param data The input data.
* @param nominalColumnNames The nominal column names.
* @return The helper data structures.
* @see #construct(Map, DataTable, Set)
*/
protected static Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> constructFromDataArray(final Map<Integer, HistogramNumericModel> histograms, final DataTable data, final Set<String> nominalColumnNames) {
Map<Integer, Map<Integer, Set<RowKey>>> numericMapping = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
Map<Integer, Map<DataValue, Set<RowKey>>> nominalMapping = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
DataTableSpec tableSpec = data.getDataTableSpec();
for (DataColumnSpec colSpec : tableSpec) {
int colIndex = tableSpec.findColumnIndex(colSpec.getName());
if (colSpec.getType().isCompatible(DoubleValue.class)) {
// + colIndex;
if (histograms.containsKey(Integer.valueOf(colIndex)) && histograms.get(colIndex) != null) {
numericMapping.put(colIndex, new HashMap<Integer, Set<RowKey>>());
}
}
if (colSpec.getDomain().hasValues() || nominalColumnNames.contains(colSpec.getName())) {
nominalMapping.put(colIndex, new HashMap<DataValue, Set<RowKey>>());
}
}
for (DataRow dataRow : data) {
for (Entry<Integer, Map<Integer, Set<RowKey>>> outer : numericMapping.entrySet()) {
Integer key = outer.getKey();
DataCell cell = dataRow.getCell(key);
if (cell instanceof DoubleValue) {
DoubleValue dv = (DoubleValue) cell;
Integer bin = Integer.valueOf(histograms.get(key).findBin(dv));
Map<Integer, Set<RowKey>> inner = outer.getValue();
if (!inner.containsKey(bin)) {
inner.put(bin, new HashSet<RowKey>());
}
inner.get(bin).add(dataRow.getKey());
}
}
for (Entry<Integer, Map<DataValue, Set<RowKey>>> outer : nominalMapping.entrySet()) {
int key = outer.getKey().intValue();
DataCell cell = dataRow.getCell(key);
if (!cell.isMissing()) /* && cell instanceof NominalValue*/
{
Map<DataValue, Set<RowKey>> inner = outer.getValue();
if (!inner.containsKey(cell)) {
inner.put(cell, new HashSet<RowKey>());
}
inner.get(cell).add(dataRow.getKey());
}
}
}
return Pair.create(numericMapping, nominalMapping);
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class RankCorrelationComputeNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
DataTableSpec in = (DataTableSpec) inSpecs[0];
final String[] includes;
if (m_columnFilterModel == null) {
m_columnFilterModel = createColumnFilterModel();
// auto-configure, no previous configuration
m_columnFilterModel.loadDefaults(in);
includes = m_columnFilterModel.applyTo(in).getIncludes();
setWarningMessage("Auto configuration: Using all suitable columns (in total " + includes.length + ")");
} else {
FilterResult applyTo = m_columnFilterModel.applyTo(in);
includes = applyTo.getIncludes();
}
if (includes.length == 0) {
throw new InvalidSettingsException("No columns selected");
}
return new PortObjectSpec[] { PMCCPortObjectAndSpec.createOutSpec(includes), new PMCCPortObjectAndSpec(includes), null };
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class CronbachNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
DataTableSpec in = (DataTableSpec) inSpecs[0];
if (!in.containsCompatibleType(DoubleValue.class)) {
throw new InvalidSettingsException("No double compatible columns in input");
}
final String[] includes;
if (m_columnFilterModel == null) {
m_columnFilterModel = createColumnFilterModel();
// auto-configure, no previous configuration
m_columnFilterModel.loadDefaults(in);
includes = m_columnFilterModel.applyTo(in).getIncludes();
setWarningMessage("Auto configuration: Using all suitable columns (in total " + includes.length + ")");
} else {
FilterResult applyTo = m_columnFilterModel.applyTo(in);
includes = applyTo.getIncludes();
}
if (includes.length == 0) {
throw new InvalidSettingsException("Please include at least two numerical columns!");
}
return new PortObjectSpec[] { getDataTableSpec() };
}
Aggregations