Search in sources :

Example 86 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class TreeEnsembleClassificationPredictorCellFactory2 method createFactory.

/**
 * Creates a TreeEnsembleClassificationPredictorCellFactory from the provided <b>predictor</b>
 * @param predictor
 * @return an instance of TreeEnsembleClassificationPredictorCellFactory configured according to the settings of the provided
 * <b>predictor<b>
 * @throws InvalidSettingsException
 */
public static TreeEnsembleClassificationPredictorCellFactory2 createFactory(final TreeEnsemblePredictor predictor) throws InvalidSettingsException {
    DataTableSpec testDataSpec = predictor.getDataSpec();
    TreeEnsembleModelPortObjectSpec modelSpec = predictor.getModelSpec();
    TreeEnsembleModelPortObject modelObject = predictor.getModelObject();
    TreeEnsemblePredictorConfiguration configuration = predictor.getConfiguration();
    UniqueNameGenerator nameGen = new UniqueNameGenerator(testDataSpec);
    Map<String, DataCell> targetValueMap = modelSpec.getTargetColumnPossibleValueMap();
    List<DataColumnSpec> newColsList = new ArrayList<DataColumnSpec>();
    DataType targetColType = modelSpec.getTargetColumn().getType();
    String targetColName = configuration.getPredictionColumnName();
    DataColumnSpec targetCol = nameGen.newColumn(targetColName, targetColType);
    newColsList.add(targetCol);
    if (configuration.isAppendPredictionConfidence()) {
        newColsList.add(nameGen.newColumn(targetCol.getName() + " (Confidence)", DoubleCell.TYPE));
    }
    if (configuration.isAppendClassConfidences()) {
        // and this class is not called)
        assert targetValueMap != null : "Target column has no possible values";
        for (String v : targetValueMap.keySet()) {
            newColsList.add(nameGen.newColumn(v, DoubleCell.TYPE));
        }
    }
    if (configuration.isAppendModelCount()) {
        newColsList.add(nameGen.newColumn("model count", IntCell.TYPE));
    }
    // assigned
    assert modelObject == null || targetValueMap != null : "Target values must be known during execution";
    DataColumnSpec[] newCols = newColsList.toArray(new DataColumnSpec[newColsList.size()]);
    int[] learnColumnInRealDataIndices = modelSpec.calculateFilterIndices(testDataSpec);
    return new TreeEnsembleClassificationPredictorCellFactory2(predictor, targetValueMap, newCols, learnColumnInRealDataIndices);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) TreeEnsembleModelPortObjectSpec(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObjectSpec) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) ArrayList(java.util.ArrayList) UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType)

Example 87 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class RuleNodeFactory method like.

/**
 * Returns a new like not that tries to match a wildcard expression in a
 * column to a fixed string value.
 *
 * @param value a fixed value
 * @param col the column's index whose contents are interpreted as wildcard
 *            patterns
 *
 * @return a new like node
 */
public static RuleNode like(final String value, final int col) {
    return new RuleNode() {

        @Override
        public boolean evaluate(final DataRow row) {
            DataCell c = row.getCell(col);
            if (c.isMissing()) {
                return false;
            }
            String regex = WildcardMatcher.wildcardToRegex(c.toString());
            return value.matches(regex);
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public String toString() {
            return " \"" + value + "\" " + Operators.LIKE + "$" + col + "$";
        }
    };
}
Also used : DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 88 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class MissingValueHandling2ColSetting method loadSettings.

/**
 * Loads settings from a NodeSettings object, used in
 * {@link org.knime.core.node.NodeModel}.
 *
 * @param settings the (sub-) config to load from
 * @throws InvalidSettingsException if any setting is missing
 */
protected void loadSettings(final NodeSettingsRO settings) throws InvalidSettingsException {
    // may be null to indicate meta config
    String[] names = null;
    if (settings.containsKey(CFG_COLNAME)) {
        try {
            names = settings.getStringArray(CFG_COLNAME);
        } catch (InvalidSettingsException ise) {
            // fallback to be compatible with <2.5
            String name = settings.getString(CFG_COLNAME);
            if (name != null) {
                names = new String[] { name };
            }
        }
    }
    int method = settings.getInt(CFG_METHOD);
    int type = settings.getInt(CFG_TYPE);
    DataCell fixVal = null;
    switch(method) {
        case MissingValueHandling2ColSetting.METHOD_NO_HANDLING:
        case MissingValueHandling2ColSetting.METHOD_IGNORE_ROWS:
        case MissingValueHandling2ColSetting.METHOD_MEAN:
        case MissingValueHandling2ColSetting.METHOD_MIN:
        case MissingValueHandling2ColSetting.METHOD_MAX:
        case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
            break;
        case MissingValueHandling2ColSetting.METHOD_FIX_VAL:
            DataType superType;
            String errorType;
            switch(type) {
                case MissingValueHandling2ColSetting.TYPE_DOUBLE:
                    fixVal = settings.getDataCell(CFG_FIXVAL);
                    superType = DoubleCell.TYPE;
                    errorType = "Type Double";
                    break;
                case MissingValueHandling2ColSetting.TYPE_INT:
                    fixVal = settings.getDataCell(CFG_FIXVAL);
                    superType = IntCell.TYPE;
                    errorType = "Type Int";
                    break;
                case MissingValueHandling2ColSetting.TYPE_STRING:
                    superType = StringCell.TYPE;
                    fixVal = settings.getDataCell(CFG_FIXVAL);
                    errorType = "Type String";
                    break;
                default:
                    throw new InvalidSettingsException("Unable to define fix value for unknown type");
            }
            if (fixVal == null) {
                throw new InvalidSettingsException("No replacement value for column: " + (isMetaConfig() ? "meta" : Arrays.toString(m_names)) + "(" + errorType + ")");
            }
            if (!superType.isASuperTypeOf(fixVal.getType())) {
                throw new InvalidSettingsException("Wrong type of replacement value for column: " + (isMetaConfig() ? "meta" : Arrays.toString(m_names)) + "(" + errorType + "): " + fixVal.getType());
            }
            break;
        default:
            throw new InvalidSettingsException("Unknown method: " + method);
    }
    m_names = names;
    m_method = method;
    m_type = type;
    m_fixCell = fixVal;
}
Also used : InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType)

Example 89 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class EnrichmentPlotterModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final double rowCount = inData[0].size();
    final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
    final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
    for (int i = 0; i < m_settings.getCurveCount(); i++) {
        final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
        exec.setMessage("Generating curve " + (i + 1));
        final Curve c = m_settings.getCurve(i);
        final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
        final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
        final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
        int k = 0, maxK = 0;
        for (DataRow row : inData[0]) {
            DataCell c1 = row.getCell(sortIndex);
            DataCell c2 = row.getCell(actIndex);
            if (k++ % 100 == 0) {
                sexec.checkCanceled();
                sexec.setProgress(k / rowCount);
            }
            if (c1.isMissing()) {
                continue;
            } else {
                curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
            }
            maxK++;
        }
        Arrays.sort(curve, 0, maxK);
        if (c.isSortDescending()) {
            for (int j = 0; j < maxK / 2; j++) {
                Helper h = curve[j];
                curve[j] = curve[maxK - j - 1];
                curve[maxK - j - 1] = h;
            }
        }
        // this is for down-sampling so that the view is faster;
        // plotting >100,000 points takes quite a long time
        final int size = Math.min(MAX_RESOLUTION, maxK);
        final double downSampleRate = maxK / (double) size;
        final double[] xValues = new double[size + 1];
        final double[] yValues = new double[size + 1];
        xValues[0] = 0;
        yValues[0] = 0;
        int lastK = 0;
        double y = 0, area = 0;
        int nextHitRatePoint = 0;
        final double[] hitRateValues = new double[DISCRATE_POINTS.length];
        final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
        for (k = 1; k <= maxK; k++) {
            final Helper h = curve[k - 1];
            if (m_settings.plotMode() == PlotMode.PlotSum) {
                y += ((DoubleValue) h.b).getDoubleValue();
            } else if (m_settings.plotMode() == PlotMode.PlotHits) {
                if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
                    y++;
                }
            } else if (!h.b.isMissing()) {
                MutableInteger count = clusters.get(h.b);
                if (count == null) {
                    count = new MutableInteger(0);
                    clusters.put(h.b, count);
                }
                if (count.inc() == m_settings.minClusterMembers()) {
                    y++;
                }
            }
            area += y / maxK;
            if ((int) (k / downSampleRate) >= lastK + 1) {
                lastK++;
                xValues[lastK] = k;
                yValues[lastK] = y;
            }
            if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
                hitRateValues[nextHitRatePoint] = y;
                nextHitRatePoint++;
            }
        }
        xValues[xValues.length - 1] = maxK;
        yValues[yValues.length - 1] = y;
        area /= y;
        m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
        areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
        for (int j = 0; j < hitRateValues.length; j++) {
            hitRateValues[j] /= y;
        }
        discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
    }
    areaOutCont.close();
    discrateOutCont.close();
    return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) HashMap(java.util.HashMap) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) MutableInteger(org.knime.core.util.MutableInteger) Curve(org.knime.base.node.viz.enrichment.EnrichmentPlotterSettings.Curve) DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 90 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class CollectionSplitNodeModel method countNewColumns.

/**
 * Iterate the argument table, determine maximum element count,
 * return freshly created column specs.
 */
private DataColumnSpec[] countNewColumns(final BufferedDataTable table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
    DataTableSpec spec = table.getDataTableSpec();
    long i = 0;
    long rowCount = table.size();
    int maxColumns = 0;
    int targetColIndex = getTargetColIndex(spec);
    for (DataRow row : table) {
        DataCell c = row.getCell(targetColIndex);
        if (!c.isMissing()) {
            maxColumns = Math.max(((CollectionDataValue) c).size(), maxColumns);
        }
        exec.setProgress((i++) / (double) rowCount, "Determining maximum element count, row \"" + row.getKey() + "\" (" + i + "/" + rowCount + ")");
        exec.checkCanceled();
    }
    HashSet<String> hashNames = new HashSet<String>();
    for (DataColumnSpec s : spec) {
        hashNames.add(s.getName());
    }
    if (m_settings.isReplaceInputColumn()) {
        hashNames.remove(spec.getColumnSpec(targetColIndex).getName());
    }
    DataType elementType = spec.getColumnSpec(targetColIndex).getType().getCollectionElementType();
    DataColumnSpec[] newColSpec = new DataColumnSpec[maxColumns];
    for (int j = 0; j < newColSpec.length; j++) {
        String baseName = "Split Value " + (j + 1);
        String newName = baseName;
        int uniquifier = 1;
        while (!hashNames.add(newName)) {
            newName = baseName + "(#" + (uniquifier++) + ")";
        }
        newColSpec[j] = new DataColumnSpecCreator(newName, elementType).createSpec();
    }
    return newColSpec;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue) HashSet(java.util.HashSet)

Aggregations

DataCell (org.knime.core.data.DataCell)780 DataRow (org.knime.core.data.DataRow)268 DataTableSpec (org.knime.core.data.DataTableSpec)175 DataColumnSpec (org.knime.core.data.DataColumnSpec)170 DefaultRow (org.knime.core.data.def.DefaultRow)169 ArrayList (java.util.ArrayList)141 StringCell (org.knime.core.data.def.StringCell)131 DoubleCell (org.knime.core.data.def.DoubleCell)129 DoubleValue (org.knime.core.data.DoubleValue)111 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)109 DataType (org.knime.core.data.DataType)97 RowKey (org.knime.core.data.RowKey)94 BufferedDataTable (org.knime.core.node.BufferedDataTable)93 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)91 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)84 LinkedHashMap (java.util.LinkedHashMap)81 IntCell (org.knime.core.data.def.IntCell)79 HashMap (java.util.HashMap)60 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)57 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)56