use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class AutoBinner method calcDomainBoundsIfNeccessary.
/**
* Determines the per column min/max values of the given data if not already present in the domain.
*
* @param data the data
* @param exec the execution context
* @param recalcValuesFor The columns
* @return The data with extended domain information
* @throws InvalidSettingsException ...
* @throws CanceledExecutionException ...
*/
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
return data;
}
List<Integer> valuesI = new ArrayList<Integer>();
for (String colName : recalcValuesFor) {
DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
if (!colSpec.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
}
if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
}
}
if (valuesI.isEmpty()) {
return data;
}
Map<Integer, Double> min = new HashMap<Integer, Double>();
Map<Integer, Double> max = new HashMap<Integer, Double>();
for (int col : valuesI) {
min.put(col, Double.MAX_VALUE);
max.put(col, Double.MIN_VALUE);
}
int c = 0;
for (DataRow row : data) {
c++;
exec.checkCanceled();
exec.setProgress(c / (double) data.getRowCount());
for (int col : valuesI) {
double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
if (min.get(col) > val) {
min.put(col, val);
}
if (max.get(col) < val) {
min.put(col, val);
}
}
}
List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
int cc = 0;
for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
if (recalcValuesFor.contains(columnSpec.getName())) {
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
specCreator.setDomain(domainCreator.createDomain());
DataColumnSpec newColSpec = specCreator.createSpec();
newColSpecList.add(newColSpec);
} else {
newColSpecList.add(columnSpec);
}
cc++;
}
DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
return newDataTable;
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class TreeEnsembleClassificationPredictorCellFactory method getCells.
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
TreeEnsembleModelPortObject modelObject = m_predictor.getModelObject();
TreeEnsemblePredictorConfiguration cfg = m_predictor.getConfiguration();
final TreeEnsembleModel ensembleModel = modelObject.getEnsembleModel();
int size = 1;
final boolean appendConfidence = cfg.isAppendPredictionConfidence();
if (appendConfidence) {
size += 1;
}
final boolean appendClassConfidences = cfg.isAppendClassConfidences();
if (appendClassConfidences) {
size += m_targetValueMap.size();
}
final boolean appendModelCount = cfg.isAppendModelCount();
if (appendModelCount) {
size += 1;
}
final boolean hasOutOfBagFilter = m_predictor.hasOutOfBagFilter();
DataCell[] result = new DataCell[size];
DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
PredictorRecord record = ensembleModel.createPredictorRecord(filterRow, m_learnSpec);
if (record == null) {
// missing value
Arrays.fill(result, DataType.getMissingCell());
return result;
}
final Voting voting = m_votingFactory.createVoting();
final int nrModels = ensembleModel.getNrModels();
int nrValidModels = 0;
for (int i = 0; i < nrModels; i++) {
if (hasOutOfBagFilter && m_predictor.isRowPartOfTrainingData(row.getKey(), i)) {
// ignore, row was used to train the model
} else {
TreeModelClassification m = ensembleModel.getTreeModelClassification(i);
TreeNodeClassification match = m.findMatchingNode(record);
voting.addVote(match);
nrValidModels += 1;
}
}
final NominalValueRepresentation[] targetVals = ((TreeTargetNominalColumnMetaData) ensembleModel.getMetaData().getTargetMetaData()).getValues();
String majorityClass = voting.getMajorityClass();
int index = 0;
if (majorityClass == null) {
assert nrValidModels == 0;
Arrays.fill(result, DataType.getMissingCell());
index = size - 1;
} else {
result[index++] = m_targetValueMap.get(majorityClass);
// final float[] distribution = voting.getClassProbabilities();
if (appendConfidence) {
result[index++] = new DoubleCell(voting.getClassProbabilityForClass(majorityClass));
}
if (appendClassConfidences) {
for (String targetValue : m_targetValueMap.keySet()) {
result[index++] = new DoubleCell(voting.getClassProbabilityForClass(targetValue));
}
}
}
if (appendModelCount) {
result[index++] = new IntCell(voting.getNrVotes());
}
return result;
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class ProximityMatrix method createTable.
public BufferedDataTable createTable(final ExecutionContext exec) throws CanceledExecutionException {
int numCols = getNumCols();
int numRows = getNumRows();
DataColumnSpec[] colSpecs = new DataColumnSpec[numCols];
for (int i = 0; i < colSpecs.length; i++) {
colSpecs[i] = new DataColumnSpecCreator(getRowKeyForTable(1, i).getString(), DoubleCell.TYPE).createSpec();
}
DataTableSpec tableSpec = new DataTableSpec(colSpecs);
BufferedDataContainer container = exec.createDataContainer(tableSpec);
for (int i = 0; i < numRows; i++) {
exec.checkCanceled();
exec.setProgress(((double) i) / numRows, "Row " + i + "/" + numRows);
DataCell[] cells = new DataCell[numCols];
for (int j = 0; j < numCols; j++) {
cells[j] = new DoubleCell(getEntryAt(i, j));
}
container.addRowToTable(new DefaultRow(getRowKeyForTable(0, i), cells));
}
container.close();
return container.getTable();
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class TreeEnsembleStatisticsNodeModel method execute.
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
TreeEnsembleModel treeEnsemble = ((TreeEnsembleModelPortObject) inObjects[0]).getEnsembleModel();
EnsembleStatistic ensembleStats = new EnsembleStatistic(treeEnsemble);
DataContainer containerEnsembleStats = exec.createDataContainer(createEnsembleStatsSpec());
DataCell[] cells = new DataCell[7];
cells[0] = new IntCell(treeEnsemble.getNrModels());
cells[1] = new IntCell(ensembleStats.getMinLevel());
cells[2] = new IntCell(ensembleStats.getMaxLevel());
cells[3] = new DoubleCell(ensembleStats.getAvgLevel());
cells[4] = new IntCell(ensembleStats.getMinNumNodes());
cells[5] = new IntCell(ensembleStats.getMaxNumNodes());
cells[6] = new DoubleCell(ensembleStats.getAvgNumNodes());
containerEnsembleStats.addRowToTable(new DefaultRow(RowKey.createRowKey(0L), cells));
containerEnsembleStats.close();
DataContainer containerTreeStats = exec.createDataContainer(createTreeStatsSpec());
for (int i = 0; i < treeEnsemble.getNrModels(); i++) {
DataCell[] treeCells = new DataCell[2];
TreeStatistic treeStat = ensembleStats.getTreeStatistic(i);
treeCells[0] = new IntCell(treeStat.getNumLevels());
treeCells[1] = new IntCell(treeStat.getNumNodes());
containerTreeStats.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), treeCells));
}
containerTreeStats.close();
return new PortObject[] { (PortObject) containerEnsembleStats.getTable(), (PortObject) containerTreeStats.getTable() };
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class GradientBoostingPredictorCellFactory method getCell.
/**
* {@inheritDoc}
*/
@Override
public DataCell getCell(final DataRow row) {
DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
double prediction = m_model.predict(m_model.createPredictorRecord(filterRow, m_learnSpec));
return new DoubleCell(prediction);
}
Aggregations