Search in sources :

Example 16 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class AutoBinner method calcDomainBoundsIfNeccessary.

/**
 * Determines the per column min/max values of the given data if not already present in the domain.
 *
 * @param data the data
 * @param exec the execution context
 * @param recalcValuesFor The columns
 * @return The data with extended domain information
 * @throws InvalidSettingsException ...
 * @throws CanceledExecutionException ...
 */
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
    if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
        return data;
    }
    List<Integer> valuesI = new ArrayList<Integer>();
    for (String colName : recalcValuesFor) {
        DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
        if (!colSpec.getType().isCompatible(DoubleValue.class)) {
            throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
        }
        if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
            valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
        }
    }
    if (valuesI.isEmpty()) {
        return data;
    }
    Map<Integer, Double> min = new HashMap<Integer, Double>();
    Map<Integer, Double> max = new HashMap<Integer, Double>();
    for (int col : valuesI) {
        min.put(col, Double.MAX_VALUE);
        max.put(col, Double.MIN_VALUE);
    }
    int c = 0;
    for (DataRow row : data) {
        c++;
        exec.checkCanceled();
        exec.setProgress(c / (double) data.getRowCount());
        for (int col : valuesI) {
            double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
            if (min.get(col) > val) {
                min.put(col, val);
            }
            if (max.get(col) < val) {
                min.put(col, val);
            }
        }
    }
    List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
    int cc = 0;
    for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
        if (recalcValuesFor.contains(columnSpec.getName())) {
            DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
            DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
            specCreator.setDomain(domainCreator.createDomain());
            DataColumnSpec newColSpec = specCreator.createSpec();
            newColSpecList.add(newColSpec);
        } else {
            newColSpecList.add(columnSpec);
        }
        cc++;
    }
    DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
    BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
    return newDataTable;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 17 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class EnrichmentPlotterModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final double rowCount = inData[0].size();
    final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
    final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
    for (int i = 0; i < m_settings.getCurveCount(); i++) {
        final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
        exec.setMessage("Generating curve " + (i + 1));
        final Curve c = m_settings.getCurve(i);
        final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
        final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
        final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
        int k = 0, maxK = 0;
        for (DataRow row : inData[0]) {
            DataCell c1 = row.getCell(sortIndex);
            DataCell c2 = row.getCell(actIndex);
            if (k++ % 100 == 0) {
                sexec.checkCanceled();
                sexec.setProgress(k / rowCount);
            }
            if (c1.isMissing()) {
                continue;
            } else {
                curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
            }
            maxK++;
        }
        Arrays.sort(curve, 0, maxK);
        if (c.isSortDescending()) {
            for (int j = 0; j < maxK / 2; j++) {
                Helper h = curve[j];
                curve[j] = curve[maxK - j - 1];
                curve[maxK - j - 1] = h;
            }
        }
        // this is for down-sampling so that the view is faster;
        // plotting >100,000 points takes quite a long time
        final int size = Math.min(MAX_RESOLUTION, maxK);
        final double downSampleRate = maxK / (double) size;
        final double[] xValues = new double[size + 1];
        final double[] yValues = new double[size + 1];
        xValues[0] = 0;
        yValues[0] = 0;
        int lastK = 0;
        double y = 0, area = 0;
        int nextHitRatePoint = 0;
        final double[] hitRateValues = new double[DISCRATE_POINTS.length];
        final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
        for (k = 1; k <= maxK; k++) {
            final Helper h = curve[k - 1];
            if (m_settings.plotMode() == PlotMode.PlotSum) {
                y += ((DoubleValue) h.b).getDoubleValue();
            } else if (m_settings.plotMode() == PlotMode.PlotHits) {
                if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
                    y++;
                }
            } else if (!h.b.isMissing()) {
                MutableInteger count = clusters.get(h.b);
                if (count == null) {
                    count = new MutableInteger(0);
                    clusters.put(h.b, count);
                }
                if (count.inc() == m_settings.minClusterMembers()) {
                    y++;
                }
            }
            area += y / maxK;
            if ((int) (k / downSampleRate) >= lastK + 1) {
                lastK++;
                xValues[lastK] = k;
                yValues[lastK] = y;
            }
            if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
                hitRateValues[nextHitRatePoint] = y;
                nextHitRatePoint++;
            }
        }
        xValues[xValues.length - 1] = maxK;
        yValues[yValues.length - 1] = y;
        area /= y;
        m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
        areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
        for (int j = 0; j < hitRateValues.length; j++) {
            hitRateValues[j] /= y;
        }
        discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
    }
    areaOutCont.close();
    discrateOutCont.close();
    return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) HashMap(java.util.HashMap) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) MutableInteger(org.knime.core.util.MutableInteger) Curve(org.knime.base.node.viz.enrichment.EnrichmentPlotterSettings.Curve) DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 18 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class PolyRegLearnerNodeModel method getCellFactory.

private CellFactory getCellFactory(final int dependentIndex) {
    final int degree = m_settings.getDegree();
    return new CellFactory() {

        @Override
        public DataCell[] getCells(final DataRow row) {
            double sum = m_betas[0];
            int betaCount = 1;
            double y = 0;
            for (int col = 0; col < row.getNumCells(); col++) {
                if ((col != dependentIndex) && m_colSelected[col]) {
                    final double value = ((DoubleValue) row.getCell(col)).getDoubleValue();
                    double poly = 1;
                    for (int d = 1; d <= degree; d++) {
                        poly *= value;
                        sum += m_betas[betaCount++] * poly;
                    }
                } else if (col == dependentIndex) {
                    y = ((DoubleValue) row.getCell(col)).getDoubleValue();
                }
            }
            double err = Math.abs(sum - y);
            m_squaredError += err * err;
            return new DataCell[] { new DoubleCell(sum), new DoubleCell(err) };
        }

        @Override
        public DataColumnSpec[] getColumnSpecs() {
            DataColumnSpecCreator crea = new DataColumnSpecCreator("PolyReg prediction", DoubleCell.TYPE);
            DataColumnSpec col1 = crea.createSpec();
            crea = new DataColumnSpecCreator("Prediction Error", DoubleCell.TYPE);
            DataColumnSpec col2 = crea.createSpec();
            return new DataColumnSpec[] { col1, col2 };
        }

        @Override
        public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor execMon) {
        // do nothing
        }
    };
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleValue(org.knime.core.data.DoubleValue) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CellFactory(org.knime.core.data.container.CellFactory) DataRow(org.knime.core.data.DataRow)

Example 19 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class RegressionPredictorNodeModel method createRearranger.

private ColumnRearranger createRearranger(final DataTableSpec inSpec, final PMMLPortObjectSpec regModelSpec, final PMMLRegressionTranslator regModel) throws InvalidSettingsException {
    if (regModelSpec == null) {
        throw new InvalidSettingsException("No input");
    }
    // exclude last (response column)
    String targetCol = "Response";
    for (String s : regModelSpec.getTargetFields()) {
        targetCol = s;
        break;
    }
    final List<String> learnFields;
    if (regModel != null) {
        RegressionTable regTable = regModel.getRegressionTable();
        learnFields = new ArrayList<String>();
        for (NumericPredictor p : regTable.getVariables()) {
            learnFields.add(p.getName());
        }
    } else {
        learnFields = new ArrayList<String>(regModelSpec.getLearningFields());
    }
    final int[] colIndices = new int[learnFields.size()];
    int k = 0;
    for (String learnCol : learnFields) {
        int index = inSpec.findColumnIndex(learnCol);
        if (index < 0) {
            throw new InvalidSettingsException("Missing column for " + "regressor variable : \"" + learnCol + "\"");
        }
        DataColumnSpec regressor = inSpec.getColumnSpec(index);
        String name = regressor.getName();
        DataColumnSpec col = inSpec.getColumnSpec(index);
        if (!col.getType().isCompatible(DoubleValue.class)) {
            throw new InvalidSettingsException("Incompatible type of " + "column \"" + name + "\": " + col.getType());
        }
        colIndices[k++] = index;
    }
    // try to use some smart naming scheme for the append column
    String oldName = targetCol;
    if (inSpec.containsName(oldName) && !oldName.toLowerCase().endsWith("(prediction)")) {
        oldName = oldName + " (prediction)";
    }
    String newColName = DataTableSpec.getUniqueColumnName(inSpec, oldName);
    DataColumnSpec newCol = new DataColumnSpecCreator(newColName, DoubleCell.TYPE).createSpec();
    SingleCellFactory fac = new SingleCellFactory(newCol) {

        @Override
        public DataCell getCell(final DataRow row) {
            RegressionTable t = regModel.getRegressionTable();
            int j = 0;
            double result = t.getIntercept();
            for (NumericPredictor p : t.getVariables()) {
                DataCell c = row.getCell(colIndices[j++]);
                if (c.isMissing()) {
                    return DataType.getMissingCell();
                }
                double v = ((DoubleValue) c).getDoubleValue();
                if (p.getExponent() != 1) {
                    v = Math.pow(v, p.getExponent());
                }
                result += p.getCoefficient() * v;
            }
            return new DoubleCell(result);
        }
    };
    ColumnRearranger c = new ColumnRearranger(inSpec);
    c.append(fac);
    return c;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) NumericPredictor(org.knime.base.node.mine.regression.PMMLRegressionTranslator.NumericPredictor) DataRow(org.knime.core.data.DataRow) RegressionTable(org.knime.base.node.mine.regression.PMMLRegressionTranslator.RegressionTable) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 20 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class AbstractTreeEnsembleModel method createNominalNumericPredictorRecord.

private PredictorRecord createNominalNumericPredictorRecord(final DataRow filterRow, final DataTableSpec trainSpec) {
    final int nrCols = trainSpec.getNumColumns();
    Map<String, Object> valueMap = new LinkedHashMap<String, Object>((int) (nrCols / 0.75 + 1.0));
    for (int i = 0; i < nrCols; i++) {
        DataColumnSpec col = trainSpec.getColumnSpec(i);
        String colName = col.getName();
        DataType colType = col.getType();
        DataCell cell = filterRow.getCell(i);
        if (cell.isMissing()) {
            valueMap.put(colName, PredictorRecord.NULL);
        } else if (colType.isCompatible(NominalValue.class)) {
            TreeNominalColumnMetaData nomColMeta = (TreeNominalColumnMetaData) m_metaData.getAttributeMetaData(i);
            NominalValueRepresentation[] nomVals = nomColMeta.getValues();
            int assignedInteger = -1;
            String val = cell.toString();
            // find assignedInteger of value
            for (NominalValueRepresentation nomVal : nomVals) {
                if (nomVal.getNominalValue().equals(val)) {
                    assignedInteger = nomVal.getAssignedInteger();
                    break;
                }
            }
            // the value is not known to the model
            if (assignedInteger == -1) {
                // treat as missing value
                valueMap.put(colName, PredictorRecord.NULL);
            } else {
                valueMap.put(colName, Integer.valueOf(assignedInteger));
            }
        } else if (colType.isCompatible(DoubleValue.class)) {
            double val = ((DoubleValue) cell).getDoubleValue();
            if (Double.isNaN(val)) {
                // make sure that NaNs are treated as missing values
                // bug AP-7169
                valueMap.put(colName, PredictorRecord.NULL);
            } else {
                valueMap.put(colName, val);
            }
        } else {
            throw new IllegalStateException("Expected nominal or numeric column type for column \"" + colName + "\" but got \"" + colType + "\"");
        }
    }
    return new PredictorRecord(valueMap);
}
Also used : NominalValue(org.knime.core.data.NominalValue) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) LinkedHashMap(java.util.LinkedHashMap) TreeNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeNominalColumnMetaData) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell)

Aggregations

DoubleValue (org.knime.core.data.DoubleValue)154 DataCell (org.knime.core.data.DataCell)103 DataRow (org.knime.core.data.DataRow)71 DataColumnSpec (org.knime.core.data.DataColumnSpec)38 DataTableSpec (org.knime.core.data.DataTableSpec)38 DoubleCell (org.knime.core.data.def.DoubleCell)32 ArrayList (java.util.ArrayList)26 BufferedDataTable (org.knime.core.node.BufferedDataTable)26 DataType (org.knime.core.data.DataType)23 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)21 LinkedHashMap (java.util.LinkedHashMap)18 IntValue (org.knime.core.data.IntValue)15 HashMap (java.util.HashMap)14 RowIterator (org.knime.core.data.RowIterator)14 RowKey (org.knime.core.data.RowKey)13 DefaultRow (org.knime.core.data.def.DefaultRow)13 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)12 LongValue (org.knime.core.data.LongValue)10 StringValue (org.knime.core.data.StringValue)10 DateAndTimeValue (org.knime.core.data.date.DateAndTimeValue)10