use of org.knime.core.data.def.IntCell in project knime-core by knime.
the class Statistics2Table method createNominalValueTable.
/**
* Create nominal value table containing all possible values together with
* their occurrences.
* @param nominal value output table
* @return data table with nominal values for each column
*/
public DataTable createNominalValueTable(final List<String> nominal) {
DataTableSpec outSpec = createOutSpecNominal(m_spec, nominal);
Iterator[] it = new Iterator[outSpec.getNumColumns() / 2];
int idx = 0;
for (int i = 0; i < m_nominalValues.length; i++) {
if (m_nominalValues[i] != null) {
it[idx++] = m_nominalValues[i].entrySet().iterator();
}
}
DataContainer cont = new DataContainer(outSpec);
int rowIndex = 0;
do {
boolean addEnd = true;
DataCell[] cells = new DataCell[2 * it.length];
for (int i = 0; i < it.length; i++) {
if (it[i] != null && it[i].hasNext()) {
Map.Entry<DataCell, Integer> e = (Map.Entry<DataCell, Integer>) it[i].next();
cells[2 * i] = e.getKey();
cells[2 * i + 1] = new IntCell(e.getValue());
addEnd = false;
} else {
cells[2 * i] = DataType.getMissingCell();
cells[2 * i + 1] = DataType.getMissingCell();
}
}
if (addEnd) {
break;
}
cont.addRowToTable(new DefaultRow(RowKey.createRowKey(rowIndex++), cells));
} while (true);
cont.close();
return cont.getTable();
}
use of org.knime.core.data.def.IntCell in project knime-core by knime.
the class AppendVariableToTableNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final DataTableSpec spec) throws InvalidSettingsException {
ColumnRearranger arranger = new ColumnRearranger(spec);
Set<String> nameHash = new HashSet<String>();
for (DataColumnSpec c : spec) {
nameHash.add(c.getName());
}
List<Pair<String, FlowVariable.Type>> vars;
if (m_settings.getIncludeAll()) {
vars = getAllVariables();
} else {
vars = m_settings.getVariablesOfInterest();
}
if (vars.isEmpty()) {
throw new InvalidSettingsException("No variables selected");
}
DataColumnSpec[] specs = new DataColumnSpec[vars.size()];
final DataCell[] values = new DataCell[vars.size()];
for (int i = 0; i < vars.size(); i++) {
Pair<String, FlowVariable.Type> c = vars.get(i);
String name = c.getFirst();
DataType type;
switch(c.getSecond()) {
case DOUBLE:
type = DoubleCell.TYPE;
try {
double dValue = peekFlowVariableDouble(name);
values[i] = new DoubleCell(dValue);
} catch (NoSuchElementException e) {
throw new InvalidSettingsException("No such flow variable (of type double): " + name);
}
break;
case INTEGER:
type = IntCell.TYPE;
try {
int iValue = peekFlowVariableInt(name);
values[i] = new IntCell(iValue);
} catch (NoSuchElementException e) {
throw new InvalidSettingsException("No such flow variable (of type int): " + name);
}
break;
case STRING:
type = StringCell.TYPE;
try {
String sValue = peekFlowVariableString(name);
sValue = sValue == null ? "" : sValue;
values[i] = new StringCell(sValue);
} catch (NoSuchElementException e) {
throw new InvalidSettingsException("No such flow variable (of type String): " + name);
}
break;
default:
throw new InvalidSettingsException("Unsupported variable type: " + c.getSecond());
}
if (nameHash.contains(name) && !name.toLowerCase().endsWith("(variable)")) {
name = name.concat(" (variable)");
}
String newName = name;
int uniquifier = 1;
while (!nameHash.add(newName)) {
newName = name + " (#" + (uniquifier++) + ")";
}
specs[i] = new DataColumnSpecCreator(newName, type).createSpec();
}
arranger.append(new AbstractCellFactory(specs) {
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
return values;
}
});
return arranger;
}
use of org.knime.core.data.def.IntCell in project knime-core by knime.
the class LogRegPredictor method getCells.
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
if (hasMissingValues(row)) {
return createMissingOutput();
}
final MissingHandling missingHandling = new MissingHandling(true);
DataCell[] cells = m_includeProbs ? new DataCell[1 + m_targetDomainValuesCount] : new DataCell[1];
Arrays.fill(cells, new IntCell(0));
// column vector
final RealMatrix x = MatrixUtils.createRealMatrix(1, m_parameters.size());
for (int i = 0; i < m_parameters.size(); i++) {
String parameter = m_parameters.get(i);
String predictor = null;
String value = null;
boolean rowIsEmpty = true;
for (final Iterator<String> iter = m_predictors.iterator(); iter.hasNext(); ) {
predictor = iter.next();
value = m_ppMatrix.getValue(parameter, predictor, null);
if (null != value) {
rowIsEmpty = false;
break;
}
}
if (rowIsEmpty) {
x.setEntry(0, i, 1);
} else {
if (m_factors.contains(predictor)) {
List<DataCell> values = m_values.get(predictor);
DataCell cell = row.getCell(m_parameterI.get(parameter));
int index = values.indexOf(cell);
/* When building a general regression model, for each
categorical fields, there is one category used as the
default baseline and therefore it didn't show in the
ParameterList in PMML. This design for the training is fine,
but in the prediction, when the input of Employment is
the default baseline, the parameters should all be 0.
See the commit message for an example and more details.
*/
if (index > 0) {
x.setEntry(0, i + index - 1, 1);
i += values.size() - 2;
}
} else if (m_baseLabelToColName.containsKey(parameter) && m_vectorLengths.containsKey(m_baseLabelToColName.get(parameter))) {
final DataCell cell = row.getCell(m_parameterI.get(parameter));
Optional<NameAndIndex> vectorValue = VectorHandling.parse(predictor);
if (vectorValue.isPresent()) {
int j = vectorValue.get().getIndex();
value = m_ppMatrix.getValue(parameter, predictor, null);
double exponent = Integer.valueOf(value);
double radix = RegressionTrainingRow.getValue(cell, j, missingHandling);
x.setEntry(0, i, Math.pow(radix, exponent));
}
} else {
DataCell cell = row.getCell(m_parameterI.get(parameter));
double radix = ((DoubleValue) cell).getDoubleValue();
double exponent = Integer.valueOf(value);
x.setEntry(0, i, Math.pow(radix, exponent));
}
}
}
// column vector
RealMatrix r = x.multiply(m_beta);
// determine the column with highest probability
int maxIndex = 0;
double maxValue = r.getEntry(0, 0);
for (int i = 1; i < r.getColumnDimension(); i++) {
if (r.getEntry(0, i) > maxValue) {
maxValue = r.getEntry(0, i);
maxIndex = i;
}
}
if (m_includeProbs) {
// compute probabilities of the target categories
for (int i = 0; i < m_targetCategories.size(); i++) {
// test if calculation would overflow
boolean overflow = false;
for (int k = 0; k < r.getColumnDimension(); k++) {
if ((r.getEntry(0, k) - r.getEntry(0, i)) > 700) {
overflow = true;
}
}
if (!overflow) {
double sum = 0;
for (int k = 0; k < r.getColumnDimension(); k++) {
sum += Math.exp(r.getEntry(0, k) - r.getEntry(0, i));
}
cells[m_targetCategoryIndex.get(i)] = new DoubleCell(1.0 / sum);
} else {
cells[m_targetCategoryIndex.get(i)] = new DoubleCell(0);
}
}
}
// the last cell is the prediction
cells[cells.length - 1] = m_targetCategories.get(maxIndex);
return cells;
}
use of org.knime.core.data.def.IntCell in project knime-core by knime.
the class CategoryToNumberNodeDialogPane method saveSettingsTo.
/**
* {@inheritDoc}
*/
@Override
protected void saveSettingsTo(final NodeSettingsWO settings) throws InvalidSettingsException {
Set<String> included = m_includedColumns.getIncludedColumnSet();
m_settings.setIncludedColumns(included.toArray(new String[included.size()]));
m_settings.setIncludeAll(m_includedColumns.isKeepAllSelected());
m_settings.setAppendColumns(m_appendColums.isSelected());
m_settings.setColumnSuffix(m_columnSuffix.getText());
m_settings.setStartIndex((Integer) m_startIndex.getValue());
m_settings.setIncrement((Integer) m_increment.getValue());
m_settings.setMaxCategories((Integer) m_maxCategories.getValue());
if (!m_defaultValue.getText().trim().isEmpty()) {
int value = Integer.valueOf(m_defaultValue.getText());
m_settings.setDefaultValue(new IntCell(value));
} else {
m_settings.setDefaultValue(DataType.getMissingCell());
}
if (!m_mapMissingTo.getText().trim().isEmpty()) {
int value = Integer.valueOf(m_mapMissingTo.getText());
m_settings.setMapMissingTo(new IntCell(value));
} else {
m_settings.setMapMissingTo(DataType.getMissingCell());
}
m_settings.saveSettings(settings);
}
use of org.knime.core.data.def.IntCell in project knime-core by knime.
the class TreeEnsembleClassificationPredictorCellFactory method getCells.
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
TreeEnsembleModelPortObject modelObject = m_predictor.getModelObject();
TreeEnsemblePredictorConfiguration cfg = m_predictor.getConfiguration();
final TreeEnsembleModel ensembleModel = modelObject.getEnsembleModel();
int size = 1;
final boolean appendConfidence = cfg.isAppendPredictionConfidence();
if (appendConfidence) {
size += 1;
}
final boolean appendClassConfidences = cfg.isAppendClassConfidences();
if (appendClassConfidences) {
size += m_targetValueMap.size();
}
final boolean appendModelCount = cfg.isAppendModelCount();
if (appendModelCount) {
size += 1;
}
final boolean hasOutOfBagFilter = m_predictor.hasOutOfBagFilter();
DataCell[] result = new DataCell[size];
DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
PredictorRecord record = ensembleModel.createPredictorRecord(filterRow, m_learnSpec);
if (record == null) {
// missing value
Arrays.fill(result, DataType.getMissingCell());
return result;
}
final Voting voting = m_votingFactory.createVoting();
final int nrModels = ensembleModel.getNrModels();
int nrValidModels = 0;
for (int i = 0; i < nrModels; i++) {
if (hasOutOfBagFilter && m_predictor.isRowPartOfTrainingData(row.getKey(), i)) {
// ignore, row was used to train the model
} else {
TreeModelClassification m = ensembleModel.getTreeModelClassification(i);
TreeNodeClassification match = m.findMatchingNode(record);
voting.addVote(match);
nrValidModels += 1;
}
}
final NominalValueRepresentation[] targetVals = ((TreeTargetNominalColumnMetaData) ensembleModel.getMetaData().getTargetMetaData()).getValues();
String majorityClass = voting.getMajorityClass();
int index = 0;
if (majorityClass == null) {
assert nrValidModels == 0;
Arrays.fill(result, DataType.getMissingCell());
index = size - 1;
} else {
result[index++] = m_targetValueMap.get(majorityClass);
// final float[] distribution = voting.getClassProbabilities();
if (appendConfidence) {
result[index++] = new DoubleCell(voting.getClassProbabilityForClass(majorityClass));
}
if (appendClassConfidences) {
for (String targetValue : m_targetValueMap.keySet()) {
result[index++] = new DoubleCell(voting.getClassProbabilityForClass(targetValue));
}
}
}
if (appendModelCount) {
result[index++] = new IntCell(voting.getNrVotes());
}
return result;
}
Aggregations