Search in sources :

Example 61 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class NormalizerNodeModel method calculate.

/**
 * New normalized {@link org.knime.core.data.DataTable} is created depending
 * on the mode.
 */
/**
 * @param inData The input data.
 * @param exec For BufferedDataTable creation and progress.
 * @return the result of the calculation
 * @throws Exception If the node calculation fails for any reason.
 */
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getSpec();
    // extract selected numeric columns
    updateNumericColumnSelection(inSpec);
    Normalizer ntable = new Normalizer(inTable, m_columns);
    long rowcount = inTable.size();
    ExecutionMonitor prepareExec = exec.createSubProgress(0.3);
    AffineTransTable outTable;
    boolean fixDomainBounds = false;
    switch(m_mode) {
        case NONORM_MODE:
            return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
        case MINMAX_MODE:
            fixDomainBounds = true;
            outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
            break;
        case ZSCORE_MODE:
            outTable = ntable.doZScoreNorm(prepareExec);
            break;
        case DECIMALSCALING_MODE:
            outTable = ntable.doDecimalScaling(prepareExec);
            break;
        default:
            throw new Exception("No mode set");
    }
    if (outTable.getErrorMessage() != null) {
        // something went wrong, report and throw an exception
        throw new Exception(outTable.getErrorMessage());
    }
    if (ntable.getErrorMessage() != null) {
        // something went wrong during initialization, report.
        setWarningMessage(ntable.getErrorMessage());
    }
    DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
    AffineTransConfiguration configuration = outTable.getConfiguration();
    DataTableSpec spec = outTable.getDataTableSpec();
    // the same transformation, which is not guaranteed to snap to min/max)
    if (fixDomainBounds) {
        DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
        for (int i = 0; i < newColSpecs.length; i++) {
            newColSpecs[i] = spec.getColumnSpec(i);
        }
        for (int i = 0; i < m_columns.length; i++) {
            int index = spec.findColumnIndex(m_columns[i]);
            DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
            DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
            domCreator.setLowerBound(new DoubleCell(m_min));
            domCreator.setUpperBound(new DoubleCell(m_max));
            creator.setDomain(domCreator.createDomain());
            newColSpecs[index] = creator.createSpec();
        }
        spec = new DataTableSpec(spec.getName(), newColSpecs);
    }
    ExecutionMonitor normExec = exec.createSubProgress(.7);
    BufferedDataContainer container = exec.createDataContainer(spec);
    long count = 1;
    for (DataRow row : outTable) {
        normExec.checkCanceled();
        normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
        container.addRowToTable(row);
        count++;
    }
    container.close();
    return new CalculationResult(container.getTable(), modelSpec, configuration);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) Normalizer(org.knime.base.data.normalize.Normalizer) DoubleCell(org.knime.core.data.def.DoubleCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) AffineTransTable(org.knime.base.data.normalize.AffineTransTable) AffineTransConfiguration(org.knime.base.data.normalize.AffineTransConfiguration) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 62 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class EnrichmentPlotterModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final double rowCount = inData[0].size();
    final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
    final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
    for (int i = 0; i < m_settings.getCurveCount(); i++) {
        final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
        exec.setMessage("Generating curve " + (i + 1));
        final Curve c = m_settings.getCurve(i);
        final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
        final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
        final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
        int k = 0, maxK = 0;
        for (DataRow row : inData[0]) {
            DataCell c1 = row.getCell(sortIndex);
            DataCell c2 = row.getCell(actIndex);
            if (k++ % 100 == 0) {
                sexec.checkCanceled();
                sexec.setProgress(k / rowCount);
            }
            if (c1.isMissing()) {
                continue;
            } else {
                curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
            }
            maxK++;
        }
        Arrays.sort(curve, 0, maxK);
        if (c.isSortDescending()) {
            for (int j = 0; j < maxK / 2; j++) {
                Helper h = curve[j];
                curve[j] = curve[maxK - j - 1];
                curve[maxK - j - 1] = h;
            }
        }
        // this is for down-sampling so that the view is faster;
        // plotting >100,000 points takes quite a long time
        final int size = Math.min(MAX_RESOLUTION, maxK);
        final double downSampleRate = maxK / (double) size;
        final double[] xValues = new double[size + 1];
        final double[] yValues = new double[size + 1];
        xValues[0] = 0;
        yValues[0] = 0;
        int lastK = 0;
        double y = 0, area = 0;
        int nextHitRatePoint = 0;
        final double[] hitRateValues = new double[DISCRATE_POINTS.length];
        final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
        for (k = 1; k <= maxK; k++) {
            final Helper h = curve[k - 1];
            if (m_settings.plotMode() == PlotMode.PlotSum) {
                y += ((DoubleValue) h.b).getDoubleValue();
            } else if (m_settings.plotMode() == PlotMode.PlotHits) {
                if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
                    y++;
                }
            } else if (!h.b.isMissing()) {
                MutableInteger count = clusters.get(h.b);
                if (count == null) {
                    count = new MutableInteger(0);
                    clusters.put(h.b, count);
                }
                if (count.inc() == m_settings.minClusterMembers()) {
                    y++;
                }
            }
            area += y / maxK;
            if ((int) (k / downSampleRate) >= lastK + 1) {
                lastK++;
                xValues[lastK] = k;
                yValues[lastK] = y;
            }
            if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
                hitRateValues[nextHitRatePoint] = y;
                nextHitRatePoint++;
            }
        }
        xValues[xValues.length - 1] = maxK;
        yValues[yValues.length - 1] = y;
        area /= y;
        m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
        areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
        for (int j = 0; j < hitRateValues.length; j++) {
            hitRateValues[j] /= y;
        }
        discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
    }
    areaOutCont.close();
    discrateOutCont.close();
    return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) HashMap(java.util.HashMap) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) MutableInteger(org.knime.core.util.MutableInteger) Curve(org.knime.base.node.viz.enrichment.EnrichmentPlotterSettings.Curve) DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 63 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class CollectionSplitNodeModel method countNewColumns.

/**
 * Iterate the argument table, determine maximum element count,
 * return freshly created column specs.
 */
private DataColumnSpec[] countNewColumns(final BufferedDataTable table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
    DataTableSpec spec = table.getDataTableSpec();
    long i = 0;
    long rowCount = table.size();
    int maxColumns = 0;
    int targetColIndex = getTargetColIndex(spec);
    for (DataRow row : table) {
        DataCell c = row.getCell(targetColIndex);
        if (!c.isMissing()) {
            maxColumns = Math.max(((CollectionDataValue) c).size(), maxColumns);
        }
        exec.setProgress((i++) / (double) rowCount, "Determining maximum element count, row \"" + row.getKey() + "\" (" + i + "/" + rowCount + ")");
        exec.checkCanceled();
    }
    HashSet<String> hashNames = new HashSet<String>();
    for (DataColumnSpec s : spec) {
        hashNames.add(s.getName());
    }
    if (m_settings.isReplaceInputColumn()) {
        hashNames.remove(spec.getColumnSpec(targetColIndex).getName());
    }
    DataType elementType = spec.getColumnSpec(targetColIndex).getType().getCollectionElementType();
    DataColumnSpec[] newColSpec = new DataColumnSpec[maxColumns];
    for (int j = 0; j < newColSpec.length; j++) {
        String baseName = "Split Value " + (j + 1);
        String newName = baseName;
        int uniquifier = 1;
        while (!hashNames.add(newName)) {
            newName = baseName + "(#" + (uniquifier++) + ")";
        }
        newColSpec[j] = new DataColumnSpecCreator(newName, elementType).createSpec();
    }
    return newColSpec;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue) HashSet(java.util.HashSet)

Example 64 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class RuleEngineNodeModel method createRearranger.

private ColumnRearranger createRearranger(final DataTableSpec inSpec, final List<Rule> rules) throws InvalidSettingsException {
    ColumnRearranger crea = new ColumnRearranger(inSpec);
    String newColName = DataTableSpec.getUniqueColumnName(inSpec, m_settings.getNewColName());
    final int defaultLabelColumnIndex;
    if (m_settings.getDefaultLabelIsColumn()) {
        if (m_settings.getDefaultLabel().length() < 3) {
            throw new InvalidSettingsException("Default label is not a column reference");
        }
        if (!m_settings.getDefaultLabel().startsWith("$") || !m_settings.getDefaultLabel().endsWith("$")) {
            throw new InvalidSettingsException("Column references in default label must be enclosed in $");
        }
        String colRef = m_settings.getDefaultLabel().substring(1, m_settings.getDefaultLabel().length() - 1);
        defaultLabelColumnIndex = inSpec.findColumnIndex(colRef);
        if (defaultLabelColumnIndex == -1) {
            throw new InvalidSettingsException("Column '" + m_settings.getDefaultLabel() + "' for default label does not exist in input table");
        }
    } else {
        defaultLabelColumnIndex = -1;
    }
    // determine output type
    List<DataType> types = new ArrayList<DataType>();
    // add outcome column types
    for (Rule r : rules) {
        if (r.getOutcome() instanceof ColumnReference) {
            types.add(((ColumnReference) r.getOutcome()).spec.getType());
        } else if (r.getOutcome() instanceof Double) {
            types.add(DoubleCell.TYPE);
        } else if (r.getOutcome() instanceof Integer) {
            types.add(IntCell.TYPE);
        } else if (r.getOutcome().toString().length() > 0) {
            types.add(StringCell.TYPE);
        }
    }
    if (defaultLabelColumnIndex >= 0) {
        types.add(inSpec.getColumnSpec(defaultLabelColumnIndex).getType());
    } else if (m_settings.getDefaultLabel().length() > 0) {
        try {
            Integer.parseInt(m_settings.getDefaultLabel());
            types.add(IntCell.TYPE);
        } catch (NumberFormatException ex) {
            try {
                Double.parseDouble(m_settings.getDefaultLabel());
                types.add(DoubleCell.TYPE);
            } catch (NumberFormatException ex1) {
                types.add(StringCell.TYPE);
            }
        }
    }
    final DataType outType;
    if (types.size() > 0) {
        DataType temp = types.get(0);
        for (int i = 1; i < types.size(); i++) {
            temp = DataType.getCommonSuperType(temp, types.get(i));
        }
        if ((temp.getValueClasses().size() == 1) && temp.getValueClasses().contains(DataValue.class)) {
            // a non-native type, we replace it with string
            temp = StringCell.TYPE;
        }
        outType = temp;
    } else {
        outType = StringCell.TYPE;
    }
    DataColumnSpec cs = new DataColumnSpecCreator(newColName, outType).createSpec();
    crea.append(new SingleCellFactory(cs) {

        @Override
        public DataCell getCell(final DataRow row) {
            for (Rule r : rules) {
                if (r.matches(row)) {
                    Object outcome = r.getOutcome();
                    if (outcome instanceof ColumnReference) {
                        DataCell cell = row.getCell(((ColumnReference) outcome).index);
                        if (outType.equals(StringCell.TYPE) && !cell.isMissing() && !cell.getType().equals(StringCell.TYPE)) {
                            return new StringCell(cell.toString());
                        } else {
                            return cell;
                        }
                    } else if (outType.equals(IntCell.TYPE)) {
                        return new IntCell((Integer) outcome);
                    } else if (outType.equals(DoubleCell.TYPE)) {
                        return new DoubleCell((Double) outcome);
                    } else {
                        return new StringCell(outcome.toString());
                    }
                }
            }
            if (defaultLabelColumnIndex >= 0) {
                DataCell cell = row.getCell(defaultLabelColumnIndex);
                if (outType.equals(StringCell.TYPE) && !cell.getType().equals(StringCell.TYPE)) {
                    return new StringCell(cell.toString());
                } else {
                    return cell;
                }
            } else if (m_settings.getDefaultLabel().length() > 0) {
                String l = m_settings.getDefaultLabel();
                if (outType.equals(StringCell.TYPE)) {
                    return new StringCell(l);
                }
                try {
                    int i = Integer.parseInt(l);
                    return new IntCell(i);
                } catch (NumberFormatException ex) {
                    try {
                        double d = Double.parseDouble(l);
                        return new DoubleCell(d);
                    } catch (NumberFormatException ex1) {
                        return new StringCell(l);
                    }
                }
            } else {
                return DataType.getMissingCell();
            }
        }
    });
    return crea;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataValue(org.knime.core.data.DataValue) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) ColumnReference(org.knime.base.node.rules.Rule.ColumnReference)

Example 65 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class GeneralRegressionPredictorNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    PMMLPortObjectSpec regModelSpec = (PMMLPortObjectSpec) inSpecs[0];
    DataTableSpec dataSpec = (DataTableSpec) inSpecs[1];
    if (dataSpec == null || regModelSpec == null) {
        throw new InvalidSettingsException("No input specification available");
    }
    RegressionPredictorSettings s = createRegressionPredictorSettings(regModelSpec, dataSpec);
    if (null != RegressionPredictorCellFactory.createColumnSpec(regModelSpec, dataSpec, s)) {
        ColumnRearranger c = new ColumnRearranger(dataSpec);
        c.append(new RegressionPredictorCellFactory(regModelSpec, dataSpec, s) {

            @Override
            public DataCell[] getCells(final DataRow row) {
                // not called during configure.
                return null;
            }
        });
        DataTableSpec outSpec = c.createSpec();
        return new DataTableSpec[] { outSpec };
    } else {
        return null;
    }
}
Also used : RegressionPredictorSettings(org.knime.base.node.mine.regression.predict2.RegressionPredictorSettings) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataRow(org.knime.core.data.DataRow) RegressionPredictorCellFactory(org.knime.base.node.mine.regression.predict2.RegressionPredictorCellFactory)

Aggregations

DataRow (org.knime.core.data.DataRow)482 DataCell (org.knime.core.data.DataCell)268 DataTableSpec (org.knime.core.data.DataTableSpec)159 BufferedDataTable (org.knime.core.node.BufferedDataTable)125 DataColumnSpec (org.knime.core.data.DataColumnSpec)109 RowKey (org.knime.core.data.RowKey)88 DefaultRow (org.knime.core.data.def.DefaultRow)88 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)80 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)76 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)73 DoubleValue (org.knime.core.data.DoubleValue)72 ArrayList (java.util.ArrayList)65 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)65 RowIterator (org.knime.core.data.RowIterator)62 DataType (org.knime.core.data.DataType)61 DoubleCell (org.knime.core.data.def.DoubleCell)57 StringCell (org.knime.core.data.def.StringCell)53 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)48 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)44 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)43