use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class RuleSetToTable method createRow.
/**
* Creates a row, {@link DataCell} values based on {@code rule} and the other parameters.
*
* @param rule A PMML {@link Rule}.
* @param outcomeType The expected outcome.
* @param types The types of the input column.
* @return The cells for the {@code rule}.
*/
private DataCell[] createRow(final Rule rule, final DataType outcomeType, final Map<String, DataType> types, final List<String> scoreValues) {
List<DataCell> ret = new ArrayList<>();
boolean usePrecedence = !m_settings.getAdditionalParentheses().getBooleanValue();
if (m_settings.getSplitRules().getBooleanValue()) {
ret.add(new StringCell(convertToString(rule.getCondition(), usePrecedence, types)));
ret.add(convertToExpectedType(rule.getOutcome(), outcomeType));
} else {
ret.add(new StringCell(convertToString(rule.getCondition(), usePrecedence, types) + " => " + toString(convertToExpectedType(rule.getOutcome(), outcomeType))));
}
if (m_settings.getConfidenceAndWeight().getBooleanValue()) {
ret.add(toCell(rule.getConfidence()));
ret.add(toCell(rule.getWeight()));
}
if (m_settings.getProvideStatistics().getBooleanValue()) {
ret.add(toCell(rule.getRecordCount()));
ret.add(toCell(rule.getNbCorrect()));
}
final Map<String, ScoreProbabilityAndRecordCount> scoreDistribution = rule.getScoreDistribution();
if (m_settings.getScoreTableRecordCount().isEnabled() && m_settings.getScoreTableRecordCount().getBooleanValue()) {
for (final String value : scoreValues) {
if (scoreDistribution.containsKey(value)) {
ret.add(new DoubleCell(scoreDistribution.get(value).getRecordCount()));
} else {
ret.add(DataType.getMissingCell());
}
}
}
if (m_settings.getScoreTableProbability().isEnabled() && m_settings.getScoreTableProbability().getBooleanValue()) {
for (final String value : scoreValues) {
if (scoreDistribution.containsKey(value)) {
final BigDecimal probability = scoreDistribution.get(value).getProbability();
ret.add(probability == null ? DataType.getMissingCell() : new DoubleCell(probability.doubleValue()));
} else {
ret.add(DataType.getMissingCell());
}
}
}
return ret.toArray(new DataCell[ret.size()]);
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class Time2StringNodeModel method createColumnRearranger.
/**
* {@inheritDoc}
* @since 2.6
*/
@Override
protected ColumnRearranger createColumnRearranger(final DataTableSpec inSpec) throws InvalidSettingsException {
// check if input has dateandtime column
if (!inSpec.containsCompatibleType(DateAndTimeValue.class)) {
throw new InvalidSettingsException("Input table must contain at least timestamp column!");
}
// currently selected column still there?
String selectedColName = m_selectedCol.getStringValue();
if (selectedColName != null && !selectedColName.isEmpty()) {
if (!inSpec.containsName(selectedColName)) {
throw new InvalidSettingsException("Column " + selectedColName + " not found in input spec!");
}
} else {
// no value set: auto-configure -> choose first timeseries
for (DataColumnSpec colSpec : inSpec) {
if (colSpec.getType().isCompatible(DateAndTimeValue.class)) {
String colName = colSpec.getName();
m_selectedCol.setStringValue(colName);
m_newColName.setStringValue(colName + "_" + COL_NAME_SUFFIX);
setWarningMessage("Auto-selected column: '" + colName + "'");
break;
}
}
}
ColumnRearranger rearranger = new ColumnRearranger(inSpec);
// if replace -> use original column name
final boolean replace = m_replaceCol.getBooleanValue();
String colName = DataTableSpec.getUniqueColumnName(inSpec, m_newColName.getStringValue());
if (replace) {
colName = m_selectedCol.getStringValue();
}
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(colName, StringCell.TYPE);
final SimpleDateFormat dateFormat = new SimpleDateFormat(m_pattern.getStringValue());
dateFormat.setTimeZone(DateAndTimeCell.UTC_TIMEZONE);
final int colIdx = inSpec.findColumnIndex(m_selectedCol.getStringValue());
SingleCellFactory factory = new SingleCellFactory(specCreator.createSpec()) {
@Override
public DataCell getCell(final DataRow row) {
DataCell dc = row.getCell(colIdx);
if (dc.isMissing()) {
return DataType.getMissingCell();
}
if (dc.getType().isCompatible(DateAndTimeValue.class)) {
DateAndTimeValue v = (DateAndTimeValue) dc;
String result = dateFormat.format(v.getUTCCalendarClone().getTime());
return new StringCell(result);
}
LOGGER.error("Encountered unsupported data type: " + dc.getType() + " in row: " + row.getKey());
return DataType.getMissingCell();
}
};
if (!replace) {
rearranger.append(factory);
} else {
rearranger.replace(factory, m_selectedCol.getStringValue());
}
return rearranger;
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class RuleEngineNodeModel method getRulesOutcome.
/**
* @param outType
* @param row
* @param r
* @param isDisallowlongOutputForCompatibility TODO
* @param variableProvider TODO
* @return
* @noreference This method is not intended to be referenced by clients.
*/
public static final DataCell getRulesOutcome(final DataType outType, final DataRow row, final List<Rule> rules, final boolean isDisallowLongOutputForCompatibility, final VariableProvider variableProvider) {
for (Rule r : rules) {
if (r.getCondition().matches(row, variableProvider).getOutcome() == MatchState.matchedAndStop) {
Outcome outcome2 = r.getOutcome();
// r.getSideEffect().perform(row, this);
DataCell cell = (DataCell) outcome2.getComputedResult(row, variableProvider);
// ... don't want Booleans (also implementing Long), for instance)
if (cell instanceof LongCell && isDisallowLongOutputForCompatibility) {
long l = ((LongValue) cell).getLongValue();
if (l > Integer.MAX_VALUE) {
throw new RuntimeException("Values larger than " + Integer.MAX_VALUE + " not supported in old instances of the node -- recreate the node " + "(node was created using an KNIME version < 3.2");
}
cell = new IntCell((int) l);
}
if (outType.equals(StringCell.TYPE) && !cell.isMissing() && !cell.getType().equals(StringCell.TYPE)) {
return new StringCell(cell.toString());
} else {
return cell;
}
}
}
return DataType.getMissingCell();
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class PMMLRuleSetPredictorNodeModel method createRearranger.
/**
* Constructs the {@link ColumnRearranger} for computing the new columns.
*
* @param obj The {@link PMMLPortObject} of the preprocessing model.
* @param spec The {@link DataTableSpec} of the table.
* @param replaceColumn Should replace the {@code outputColumnName}?
* @param outputColumnName The output column name (which might be an existing).
* @param addConfidence Should add the confidence values to a column?
* @param confidenceColumnName The name of the confidence column.
* @param validationColumnIdx Index of the validation column, {@code -1} if not specified.
* @param processConcurrently Should be {@code false} when the statistics are to be computed.
* @return The {@link ColumnRearranger} computing the result.
* @throws InvalidSettingsException Problem with rules.
*/
private static ColumnRearranger createRearranger(final PMMLPortObject obj, final DataTableSpec spec, final boolean replaceColumn, final String outputColumnName, final boolean addConfidence, final String confidenceColumnName, final int validationColumnIdx, final boolean processConcurrently) throws InvalidSettingsException {
List<Node> models = obj.getPMMLValue().getModels(PMMLModelType.RuleSetModel);
if (models.size() != 1) {
throw new InvalidSettingsException("Expected exactly on RuleSetModel, but got: " + models.size());
}
final PMMLRuleTranslator translator = new PMMLRuleTranslator();
obj.initializeModelTranslator(translator);
if (!translator.isScorable()) {
throw new UnsupportedOperationException("The model is not scorable.");
}
final List<PMMLRuleTranslator.Rule> rules = translator.getRules();
ColumnRearranger ret = new ColumnRearranger(spec);
final List<DataColumnSpec> targetCols = obj.getSpec().getTargetCols();
final DataType dataType = targetCols.isEmpty() ? StringCell.TYPE : targetCols.get(0).getType();
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(outputColumnName, dataType);
Set<DataCell> outcomes = new LinkedHashSet<>();
for (Rule rule : rules) {
DataCell outcome;
if (dataType.equals(BooleanCell.TYPE)) {
outcome = BooleanCellFactory.create(rule.getOutcome());
} else if (dataType.equals(StringCell.TYPE)) {
outcome = new StringCell(rule.getOutcome());
} else if (dataType.equals(DoubleCell.TYPE)) {
try {
outcome = new DoubleCell(Double.parseDouble(rule.getOutcome()));
} catch (NumberFormatException e) {
// ignore
continue;
}
} else if (dataType.equals(IntCell.TYPE)) {
try {
outcome = new IntCell(Integer.parseInt(rule.getOutcome()));
} catch (NumberFormatException e) {
// ignore
continue;
}
} else if (dataType.equals(LongCell.TYPE)) {
try {
outcome = new LongCell(Long.parseLong(rule.getOutcome()));
} catch (NumberFormatException e) {
// ignore
continue;
}
} else {
throw new UnsupportedOperationException("Unknown outcome type: " + dataType);
}
outcomes.add(outcome);
}
specCreator.setDomain(new DataColumnDomainCreator(outcomes).createDomain());
DataColumnSpec colSpec = specCreator.createSpec();
final RuleSelectionMethod ruleSelectionMethod = translator.getSelectionMethodList().get(0);
final String defaultScore = translator.getDefaultScore();
final Double defaultConfidence = translator.getDefaultConfidence();
final DataColumnSpec[] specs;
if (addConfidence) {
specs = new DataColumnSpec[] { new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(ret.createSpec(), confidenceColumnName), DoubleCell.TYPE).createSpec(), colSpec };
} else {
specs = new DataColumnSpec[] { colSpec };
}
final int oldColumnIndex = replaceColumn ? ret.indexOf(outputColumnName) : -1;
ret.append(new AbstractCellFactory(processConcurrently, specs) {
private final List<String> m_values;
{
Map<String, List<String>> dd = translator.getDataDictionary();
m_values = dd.get(targetCols.get(0).getName());
}
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
// See http://www.dmg.org/v4-1/RuleSet.html#Rule
switch(ruleSelectionMethod.getCriterion().intValue()) {
case RuleSelectionMethod.Criterion.INT_FIRST_HIT:
{
Pair<DataCell, Double> resultAndConfidence = selectFirstHit(row);
return toCells(resultAndConfidence);
}
case RuleSelectionMethod.Criterion.INT_WEIGHTED_MAX:
{
Pair<DataCell, Double> resultAndConfidence = selectWeightedMax(row);
return toCells(resultAndConfidence);
}
case RuleSelectionMethod.Criterion.INT_WEIGHTED_SUM:
{
Pair<DataCell, Double> resultAndConfidence = selectWeightedSum(row);
return toCells(resultAndConfidence);
}
default:
throw new UnsupportedOperationException(ruleSelectionMethod.getCriterion().toString());
}
}
/**
* Converts the pair to a {@link DataCell} array.
*
* @param resultAndConfidence The {@link Pair}.
* @return The result and possibly the confidence.
*/
private DataCell[] toCells(final Pair<DataCell, Double> resultAndConfidence) {
if (!addConfidence) {
return new DataCell[] { resultAndConfidence.getFirst() };
}
if (resultAndConfidence.getSecond() == null) {
return new DataCell[] { DataType.getMissingCell(), resultAndConfidence.getFirst() };
}
return new DataCell[] { new DoubleCell(resultAndConfidence.getSecond()), resultAndConfidence.getFirst() };
}
/**
* Computes the result and the confidence using the weighted sum method.
*
* @param row A {@link DataRow}
* @return The result and the confidence.
*/
private Pair<DataCell, Double> selectWeightedSum(final DataRow row) {
final Map<String, Double> scoreToSumWeight = new LinkedHashMap<String, Double>();
for (String val : m_values) {
scoreToSumWeight.put(val, 0.0);
}
int matchedRuleCount = 0;
for (final PMMLRuleTranslator.Rule rule : rules) {
if (rule.getCondition().evaluate(row, spec) == Boolean.TRUE) {
++matchedRuleCount;
Double sumWeight = scoreToSumWeight.get(rule.getOutcome());
if (sumWeight == null) {
throw new IllegalStateException("The score value: " + rule.getOutcome() + " is not in the data dictionary.");
}
final Double wRaw = rule.getWeight();
final double w = wRaw == null ? 0.0 : wRaw.doubleValue();
scoreToSumWeight.put(rule.getOutcome(), sumWeight + w);
}
}
double maxSumWeight = Double.NEGATIVE_INFINITY;
String bestScore = null;
for (Entry<String, Double> entry : scoreToSumWeight.entrySet()) {
final double d = entry.getValue().doubleValue();
if (d > maxSumWeight) {
maxSumWeight = d;
bestScore = entry.getKey();
}
}
if (bestScore == null || matchedRuleCount == 0) {
return pair(result(defaultScore), defaultConfidence);
}
return pair(result(bestScore), maxSumWeight / matchedRuleCount);
}
/**
* Helper method to create {@link Pair}s.
*
* @param f The first element.
* @param s The second element.
* @return The new pair.
*/
private <F, S> Pair<F, S> pair(final F f, final S s) {
return new Pair<F, S>(f, s);
}
/**
* Computes the result and the confidence using the weighted max method.
*
* @param row A {@link DataRow}
* @return The result and the confidence.
*/
private Pair<DataCell, Double> selectWeightedMax(final DataRow row) {
double maxWeight = Double.NEGATIVE_INFINITY;
PMMLRuleTranslator.Rule bestRule = null;
for (final PMMLRuleTranslator.Rule rule : rules) {
if (rule.getCondition().evaluate(row, spec) == Boolean.TRUE) {
if (rule.getWeight() > maxWeight) {
maxWeight = rule.getWeight();
bestRule = rule;
}
}
}
if (bestRule == null) {
return pair(result(defaultScore), defaultConfidence);
}
bestRule.setRecordCount(bestRule.getRecordCount() + 1);
DataCell result = result(bestRule);
if (validationColumnIdx >= 0) {
if (row.getCell(validationColumnIdx).equals(result)) {
bestRule.setNbCorrect(bestRule.getNbCorrect() + 1);
}
}
Double confidence = bestRule.getConfidence();
return pair(result, confidence == null ? defaultConfidence : confidence);
}
/**
* Selects the outcome of the rule and converts it to the proper outcome type.
*
* @param rule A {@link Rule}.
* @return The {@link DataCell} representing the result. (May be missing.)
*/
private DataCell result(final PMMLRuleTranslator.Rule rule) {
String outcome = rule.getOutcome();
return result(outcome);
}
/**
* Constructs the {@link DataCell} from its {@link String} representation ({@code outcome}) and its type.
*
* @param dataType The expected {@link DataType}
* @param outcome The {@link String} representation.
* @return The {@link DataCell}.
*/
private DataCell result(final String outcome) {
if (outcome == null) {
return DataType.getMissingCell();
}
try {
if (dataType.isCompatible(BooleanValue.class)) {
return BooleanCellFactory.create(outcome);
}
if (IntCell.TYPE.isASuperTypeOf(dataType)) {
return new IntCell(Integer.parseInt(outcome));
}
if (LongCell.TYPE.isASuperTypeOf(dataType)) {
return new LongCell(Long.parseLong(outcome));
}
if (DoubleCell.TYPE.isASuperTypeOf(dataType)) {
return new DoubleCell(Double.parseDouble(outcome));
}
return new StringCell(outcome);
} catch (NumberFormatException e) {
return new MissingCell(outcome + "\n" + e.getMessage());
}
}
/**
* Selects the first rule that matches and computes the confidence and result for the {@code row}.
*
* @param row A {@link DataRow}.
* @return The result and the confidence.
*/
private Pair<DataCell, Double> selectFirstHit(final DataRow row) {
for (final PMMLRuleTranslator.Rule rule : rules) {
Boolean eval = rule.getCondition().evaluate(row, spec);
if (eval == Boolean.TRUE) {
rule.setRecordCount(rule.getRecordCount() + 1);
DataCell result = result(rule);
if (validationColumnIdx >= 0) {
if (row.getCell(validationColumnIdx).equals(result)) {
rule.setNbCorrect(rule.getNbCorrect() + 1);
}
}
Double confidence = rule.getConfidence();
return pair(result, confidence == null ? defaultConfidence : confidence);
}
}
return pair(result(defaultScore), defaultConfidence);
}
/**
* {@inheritDoc}
*/
@Override
public void afterProcessing() {
super.afterProcessing();
obj.getPMMLValue();
RuleSetModel ruleSet = translator.getOriginalRuleSetModel();
assert rules.size() == ruleSet.getRuleSet().getSimpleRuleList().size() + ruleSet.getRuleSet().getCompoundRuleList().size();
if (ruleSet.getRuleSet().getSimpleRuleList().size() == rules.size()) {
for (int i = 0; i < rules.size(); ++i) {
Rule rule = rules.get(i);
final SimpleRule simpleRuleArray = ruleSet.getRuleSet().getSimpleRuleArray(i);
synchronized (simpleRuleArray) /*synchronized fixes AP-6766 */
{
simpleRuleArray.setRecordCount(rule.getRecordCount());
if (validationColumnIdx >= 0) {
simpleRuleArray.setNbCorrect(rule.getNbCorrect());
} else if (simpleRuleArray.isSetNbCorrect()) {
simpleRuleArray.unsetNbCorrect();
}
}
}
}
}
});
if (replaceColumn) {
ret.remove(outputColumnName);
ret.move(ret.getColumnCount() - 1 - (addConfidence ? 1 : 0), oldColumnIndex);
}
return ret;
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class HistogramColumn method loadNominalHistogramsPrivate.
private static Map<Integer, HistogramNominalModel> loadNominalHistogramsPrivate(final File histogramsGz, final int[] nominalKeysSize) throws IOException, InvalidSettingsException {
final FileInputStream is = new FileInputStream(histogramsGz);
final GZIPInputStream inData = new GZIPInputStream(is);
final ConfigRO config = NodeSettings.loadFromXML(inData);
Map<Integer, HistogramNominalModel> histograms = new HashMap<Integer, HistogramNominalModel>();
// .getConfig(HISTOGRAMS);
ConfigRO hs = config;
int[] nomColumnIndices = config.getIntArray(NOMINAL_COLUMNS);
for (int colIdx : nomColumnIndices) {
Config h = hs.getConfig(HISTOGRAM + colIdx);
int maxCount = h.getInt(MAX_COUNT);
int rowCount = h.getInt(ROW_COUNT);
String colName = h.getString(COL_NAME);
String[] values = h.getStringArray(BIN_VALUES);
int[] binCounts = h.getIntArray(BIN_COUNTS);
Map<DataValue, Integer> bins = new HashMap<DataValue, Integer>();
for (int i = binCounts.length; i-- > 0; ) {
if (values[i] == "?") {
bins.put(new MissingCell(null), binCounts[i]);
} else {
bins.put(new StringCell(values[i]), binCounts[i]);
}
}
HistogramNominalModel histogramData = new HistogramNominalModel(bins, colIdx, colName, rowCount);
histogramData.setMaxCount(maxCount);
histogramData.setRowCount(rowCount);
// assert Math.abs(histogramData.m_width - width) < 1e-9: "histogram data width: " + histogramData.m_width + " width: " + width;
assert nominalKeysSize[colIdx] == bins.size() : "Saved size of nominal bins: " + nominalKeysSize[colIdx] + ", restored from the file: " + bins.size();
histograms.put(colIdx, histogramData);
}
return histograms;
}
Aggregations