use of org.knime.core.data.DataCell in project knime-core by knime.
the class TreeEnsembleClassificationPredictorCellFactory2 method createFactory.
/**
* Creates a TreeEnsembleClassificationPredictorCellFactory from the provided <b>predictor</b>
* @param predictor
* @return an instance of TreeEnsembleClassificationPredictorCellFactory configured according to the settings of the provided
* <b>predictor<b>
* @throws InvalidSettingsException
*/
public static TreeEnsembleClassificationPredictorCellFactory2 createFactory(final TreeEnsemblePredictor predictor) throws InvalidSettingsException {
DataTableSpec testDataSpec = predictor.getDataSpec();
TreeEnsembleModelPortObjectSpec modelSpec = predictor.getModelSpec();
TreeEnsembleModelPortObject modelObject = predictor.getModelObject();
TreeEnsemblePredictorConfiguration configuration = predictor.getConfiguration();
UniqueNameGenerator nameGen = new UniqueNameGenerator(testDataSpec);
Map<String, DataCell> targetValueMap = modelSpec.getTargetColumnPossibleValueMap();
List<DataColumnSpec> newColsList = new ArrayList<DataColumnSpec>();
DataType targetColType = modelSpec.getTargetColumn().getType();
String targetColName = configuration.getPredictionColumnName();
DataColumnSpec targetCol = nameGen.newColumn(targetColName, targetColType);
newColsList.add(targetCol);
if (configuration.isAppendPredictionConfidence()) {
newColsList.add(nameGen.newColumn(targetCol.getName() + " (Confidence)", DoubleCell.TYPE));
}
if (configuration.isAppendClassConfidences()) {
// and this class is not called)
assert targetValueMap != null : "Target column has no possible values";
for (String v : targetValueMap.keySet()) {
newColsList.add(nameGen.newColumn(v, DoubleCell.TYPE));
}
}
if (configuration.isAppendModelCount()) {
newColsList.add(nameGen.newColumn("model count", IntCell.TYPE));
}
// assigned
assert modelObject == null || targetValueMap != null : "Target values must be known during execution";
DataColumnSpec[] newCols = newColsList.toArray(new DataColumnSpec[newColsList.size()]);
int[] learnColumnInRealDataIndices = modelSpec.calculateFilterIndices(testDataSpec);
return new TreeEnsembleClassificationPredictorCellFactory2(predictor, targetValueMap, newCols, learnColumnInRealDataIndices);
}
use of org.knime.core.data.DataCell in project knime-core by knime.
the class RuleNodeFactory method like.
/**
* Returns a new like not that tries to match a wildcard expression in a
* column to a fixed string value.
*
* @param value a fixed value
* @param col the column's index whose contents are interpreted as wildcard
* patterns
*
* @return a new like node
*/
public static RuleNode like(final String value, final int col) {
return new RuleNode() {
@Override
public boolean evaluate(final DataRow row) {
DataCell c = row.getCell(col);
if (c.isMissing()) {
return false;
}
String regex = WildcardMatcher.wildcardToRegex(c.toString());
return value.matches(regex);
}
/**
* {@inheritDoc}
*/
@Override
public String toString() {
return " \"" + value + "\" " + Operators.LIKE + "$" + col + "$";
}
};
}
use of org.knime.core.data.DataCell in project knime-core by knime.
the class MissingValueHandling2ColSetting method loadSettings.
/**
* Loads settings from a NodeSettings object, used in
* {@link org.knime.core.node.NodeModel}.
*
* @param settings the (sub-) config to load from
* @throws InvalidSettingsException if any setting is missing
*/
protected void loadSettings(final NodeSettingsRO settings) throws InvalidSettingsException {
// may be null to indicate meta config
String[] names = null;
if (settings.containsKey(CFG_COLNAME)) {
try {
names = settings.getStringArray(CFG_COLNAME);
} catch (InvalidSettingsException ise) {
// fallback to be compatible with <2.5
String name = settings.getString(CFG_COLNAME);
if (name != null) {
names = new String[] { name };
}
}
}
int method = settings.getInt(CFG_METHOD);
int type = settings.getInt(CFG_TYPE);
DataCell fixVal = null;
switch(method) {
case MissingValueHandling2ColSetting.METHOD_NO_HANDLING:
case MissingValueHandling2ColSetting.METHOD_IGNORE_ROWS:
case MissingValueHandling2ColSetting.METHOD_MEAN:
case MissingValueHandling2ColSetting.METHOD_MIN:
case MissingValueHandling2ColSetting.METHOD_MAX:
case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
break;
case MissingValueHandling2ColSetting.METHOD_FIX_VAL:
DataType superType;
String errorType;
switch(type) {
case MissingValueHandling2ColSetting.TYPE_DOUBLE:
fixVal = settings.getDataCell(CFG_FIXVAL);
superType = DoubleCell.TYPE;
errorType = "Type Double";
break;
case MissingValueHandling2ColSetting.TYPE_INT:
fixVal = settings.getDataCell(CFG_FIXVAL);
superType = IntCell.TYPE;
errorType = "Type Int";
break;
case MissingValueHandling2ColSetting.TYPE_STRING:
superType = StringCell.TYPE;
fixVal = settings.getDataCell(CFG_FIXVAL);
errorType = "Type String";
break;
default:
throw new InvalidSettingsException("Unable to define fix value for unknown type");
}
if (fixVal == null) {
throw new InvalidSettingsException("No replacement value for column: " + (isMetaConfig() ? "meta" : Arrays.toString(m_names)) + "(" + errorType + ")");
}
if (!superType.isASuperTypeOf(fixVal.getType())) {
throw new InvalidSettingsException("Wrong type of replacement value for column: " + (isMetaConfig() ? "meta" : Arrays.toString(m_names)) + "(" + errorType + "): " + fixVal.getType());
}
break;
default:
throw new InvalidSettingsException("Unknown method: " + method);
}
m_names = names;
m_method = method;
m_type = type;
m_fixCell = fixVal;
}
use of org.knime.core.data.DataCell in project knime-core by knime.
the class EnrichmentPlotterModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final double rowCount = inData[0].size();
final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
for (int i = 0; i < m_settings.getCurveCount(); i++) {
final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
exec.setMessage("Generating curve " + (i + 1));
final Curve c = m_settings.getCurve(i);
final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
int k = 0, maxK = 0;
for (DataRow row : inData[0]) {
DataCell c1 = row.getCell(sortIndex);
DataCell c2 = row.getCell(actIndex);
if (k++ % 100 == 0) {
sexec.checkCanceled();
sexec.setProgress(k / rowCount);
}
if (c1.isMissing()) {
continue;
} else {
curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
}
maxK++;
}
Arrays.sort(curve, 0, maxK);
if (c.isSortDescending()) {
for (int j = 0; j < maxK / 2; j++) {
Helper h = curve[j];
curve[j] = curve[maxK - j - 1];
curve[maxK - j - 1] = h;
}
}
// this is for down-sampling so that the view is faster;
// plotting >100,000 points takes quite a long time
final int size = Math.min(MAX_RESOLUTION, maxK);
final double downSampleRate = maxK / (double) size;
final double[] xValues = new double[size + 1];
final double[] yValues = new double[size + 1];
xValues[0] = 0;
yValues[0] = 0;
int lastK = 0;
double y = 0, area = 0;
int nextHitRatePoint = 0;
final double[] hitRateValues = new double[DISCRATE_POINTS.length];
final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
for (k = 1; k <= maxK; k++) {
final Helper h = curve[k - 1];
if (m_settings.plotMode() == PlotMode.PlotSum) {
y += ((DoubleValue) h.b).getDoubleValue();
} else if (m_settings.plotMode() == PlotMode.PlotHits) {
if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
y++;
}
} else if (!h.b.isMissing()) {
MutableInteger count = clusters.get(h.b);
if (count == null) {
count = new MutableInteger(0);
clusters.put(h.b, count);
}
if (count.inc() == m_settings.minClusterMembers()) {
y++;
}
}
area += y / maxK;
if ((int) (k / downSampleRate) >= lastK + 1) {
lastK++;
xValues[lastK] = k;
yValues[lastK] = y;
}
if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
hitRateValues[nextHitRatePoint] = y;
nextHitRatePoint++;
}
}
xValues[xValues.length - 1] = maxK;
yValues[yValues.length - 1] = y;
area /= y;
m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
for (int j = 0; j < hitRateValues.length; j++) {
hitRateValues[j] /= y;
}
discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
}
areaOutCont.close();
discrateOutCont.close();
return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
use of org.knime.core.data.DataCell in project knime-core by knime.
the class CollectionSplitNodeModel method countNewColumns.
/**
* Iterate the argument table, determine maximum element count,
* return freshly created column specs.
*/
private DataColumnSpec[] countNewColumns(final BufferedDataTable table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
DataTableSpec spec = table.getDataTableSpec();
long i = 0;
long rowCount = table.size();
int maxColumns = 0;
int targetColIndex = getTargetColIndex(spec);
for (DataRow row : table) {
DataCell c = row.getCell(targetColIndex);
if (!c.isMissing()) {
maxColumns = Math.max(((CollectionDataValue) c).size(), maxColumns);
}
exec.setProgress((i++) / (double) rowCount, "Determining maximum element count, row \"" + row.getKey() + "\" (" + i + "/" + rowCount + ")");
exec.checkCanceled();
}
HashSet<String> hashNames = new HashSet<String>();
for (DataColumnSpec s : spec) {
hashNames.add(s.getName());
}
if (m_settings.isReplaceInputColumn()) {
hashNames.remove(spec.getColumnSpec(targetColIndex).getName());
}
DataType elementType = spec.getColumnSpec(targetColIndex).getType().getCollectionElementType();
DataColumnSpec[] newColSpec = new DataColumnSpec[maxColumns];
for (int j = 0; j < newColSpec.length; j++) {
String baseName = "Split Value " + (j + 1);
String newName = baseName;
int uniquifier = 1;
while (!hashNames.add(newName)) {
newName = baseName + "(#" + (uniquifier++) + ")";
}
newColSpec[j] = new DataColumnSpecCreator(newName, elementType).createSpec();
}
return newColSpec;
}
Aggregations