Search in sources :

Example 61 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class EnrichmentPlotterModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final double rowCount = inData[0].size();
    final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
    final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
    for (int i = 0; i < m_settings.getCurveCount(); i++) {
        final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
        exec.setMessage("Generating curve " + (i + 1));
        final Curve c = m_settings.getCurve(i);
        final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
        final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
        final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
        int k = 0, maxK = 0;
        for (DataRow row : inData[0]) {
            DataCell c1 = row.getCell(sortIndex);
            DataCell c2 = row.getCell(actIndex);
            if (k++ % 100 == 0) {
                sexec.checkCanceled();
                sexec.setProgress(k / rowCount);
            }
            if (c1.isMissing()) {
                continue;
            } else {
                curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
            }
            maxK++;
        }
        Arrays.sort(curve, 0, maxK);
        if (c.isSortDescending()) {
            for (int j = 0; j < maxK / 2; j++) {
                Helper h = curve[j];
                curve[j] = curve[maxK - j - 1];
                curve[maxK - j - 1] = h;
            }
        }
        // this is for down-sampling so that the view is faster;
        // plotting >100,000 points takes quite a long time
        final int size = Math.min(MAX_RESOLUTION, maxK);
        final double downSampleRate = maxK / (double) size;
        final double[] xValues = new double[size + 1];
        final double[] yValues = new double[size + 1];
        xValues[0] = 0;
        yValues[0] = 0;
        int lastK = 0;
        double y = 0, area = 0;
        int nextHitRatePoint = 0;
        final double[] hitRateValues = new double[DISCRATE_POINTS.length];
        final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
        for (k = 1; k <= maxK; k++) {
            final Helper h = curve[k - 1];
            if (m_settings.plotMode() == PlotMode.PlotSum) {
                y += ((DoubleValue) h.b).getDoubleValue();
            } else if (m_settings.plotMode() == PlotMode.PlotHits) {
                if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
                    y++;
                }
            } else if (!h.b.isMissing()) {
                MutableInteger count = clusters.get(h.b);
                if (count == null) {
                    count = new MutableInteger(0);
                    clusters.put(h.b, count);
                }
                if (count.inc() == m_settings.minClusterMembers()) {
                    y++;
                }
            }
            area += y / maxK;
            if ((int) (k / downSampleRate) >= lastK + 1) {
                lastK++;
                xValues[lastK] = k;
                yValues[lastK] = y;
            }
            if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
                hitRateValues[nextHitRatePoint] = y;
                nextHitRatePoint++;
            }
        }
        xValues[xValues.length - 1] = maxK;
        yValues[yValues.length - 1] = y;
        area /= y;
        m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
        areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
        for (int j = 0; j < hitRateValues.length; j++) {
            hitRateValues[j] /= y;
        }
        discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
    }
    areaOutCont.close();
    discrateOutCont.close();
    return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) HashMap(java.util.HashMap) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) MutableInteger(org.knime.core.util.MutableInteger) Curve(org.knime.base.node.viz.enrichment.EnrichmentPlotterSettings.Curve) DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 62 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class CollectionSplitNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable table = inData[0];
    DataTableSpec spec = table.getDataTableSpec();
    ExecutionMonitor execForCR = exec;
    // validate settings
    getTargetColIndex(spec);
    DataColumnSpec[] colSpecs;
    switch(m_settings.getCountElementsPolicy()) {
        case Count:
            execForCR = exec.createSubProgress(0.7);
            ExecutionMonitor e = exec.createSubProgress(0.3);
            colSpecs = countNewColumns(table, e);
            break;
        case UseElementNamesOrFail:
            colSpecs = getColSpecsByElementNames(spec);
            break;
        case BestEffort:
            try {
                colSpecs = getColSpecsByElementNames(spec);
            } catch (InvalidSettingsException ise) {
                execForCR = exec.createSubProgress(0.7);
                e = exec.createSubProgress(0.3);
                colSpecs = countNewColumns(table, e);
            }
            break;
        default:
            throw new InvalidSettingsException("Unsupported policy: " + m_settings.getCountElementsPolicy());
    }
    Pair<ColumnRearranger, SplitCellFactory> pair = createColumnRearranger(spec, colSpecs);
    BufferedDataTable out = exec.createColumnRearrangeTable(table, pair.getFirst(), execForCR);
    String warnMessage = pair.getSecond().getWarnMessage();
    if (warnMessage != null) {
        setWarningMessage(warnMessage);
    }
    if (m_settings.isDetermineMostSpecificDataType()) {
        out = refineTypes(out, pair.getSecond(), exec);
    }
    return new BufferedDataTable[] { out };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 63 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class RuleEngineNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    List<Rule> rules = parseRules(inData[0].getDataTableSpec());
    ColumnRearranger crea = createRearranger(inData[0].getDataTableSpec(), rules);
    return new BufferedDataTable[] { exec.createColumnRearrangeTable(inData[0], crea, exec) };
}
Also used : ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 64 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class LogRegLearner method recalcDomainForTargetAndLearningFields.

private BufferedDataTable recalcDomainForTargetAndLearningFields(final BufferedDataTable data, final PMMLPortObjectSpec inPMMLSpec, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
    final String targetCol = m_pmmlOutSpec.getTargetFields().get(0);
    DataTableDomainCreator domainCreator = new DataTableDomainCreator(data.getDataTableSpec(), new DomainCreatorColumnSelection() {

        @Override
        public boolean dropDomain(final DataColumnSpec colSpec) {
            return false;
        }

        @Override
        public boolean createDomain(final DataColumnSpec colSpec) {
            return colSpec.getName().equals(targetCol) || (colSpec.getType().isCompatible(NominalValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName()));
        }
    }, new DomainCreatorColumnSelection() {

        @Override
        public boolean dropDomain(final DataColumnSpec colSpec) {
            // drop domain of numeric learning fields so that we can check for constant columns
            return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
        }

        @Override
        public boolean createDomain(final DataColumnSpec colSpec) {
            return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
        }
    });
    domainCreator.updateDomain(data, exec);
    DataTableSpec spec = domainCreator.createSpec();
    CheckUtils.checkSetting(spec.getColumnSpec(targetCol).getDomain().hasValues(), "Target column '%s' has too many" + " unique values - consider to use domain calucator node before to enforce calculation", targetCol);
    BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
    // bug fix 5580 - ignore columns with too many different values
    Set<String> columnWithTooManyDomainValues = new LinkedHashSet<>();
    for (String learningField : m_pmmlOutSpec.getLearningFields()) {
        DataColumnSpec columnSpec = spec.getColumnSpec(learningField);
        if (columnSpec.getType().isCompatible(NominalValue.class) && !columnSpec.getDomain().hasValues()) {
            columnWithTooManyDomainValues.add(learningField);
        }
    }
    if (!columnWithTooManyDomainValues.isEmpty()) {
        StringBuilder warning = new StringBuilder();
        warning.append(columnWithTooManyDomainValues.size() == 1 ? "Column " : "Columns ");
        warning.append(ConvenienceMethods.getShortStringFrom(columnWithTooManyDomainValues, 5));
        warning.append(columnWithTooManyDomainValues.size() == 1 ? " has " : " have ");
        warning.append("too many different values - will be ignored during training ");
        warning.append("(enforce inclusion by using a domain calculator node before)");
        LOGGER.warn(warning.toString());
        m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
    }
    // initialize m_learner so that it has the correct DataTableSpec of the input
    init(newDataTable.getDataTableSpec(), inPMMLSpec, columnWithTooManyDomainValues);
    return newDataTable;
}
Also used : DataTableDomainCreator(org.knime.core.data.DataTableDomainCreator) LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) NominalValue(org.knime.core.data.NominalValue) DomainCreatorColumnSelection(org.knime.core.data.DomainCreatorColumnSelection) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 65 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class LogRegLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    final BufferedDataTable data = (BufferedDataTable) inObjects[0];
    DataTableSpec tableSpec = data.getDataTableSpec();
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inObjects[1] : null;
    PMMLPortObjectSpec inPMMLSpec = null;
    if (inPMMLPort != null) {
        inPMMLSpec = inPMMLPort.getSpec();
    } else {
        PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(tableSpec);
        inPMMLSpec = creator.createSpec();
        inPMMLPort = new PMMLPortObject(inPMMLSpec);
    }
    LogRegLearner learner = new LogRegLearner(new PortObjectSpec[] { tableSpec, inPMMLSpec }, m_pmmlInEnabled, m_settings);
    m_content = learner.execute(new PortObject[] { data, inPMMLPort }, exec);
    String warn = learner.getWarningMessage();
    if (warn != null) {
        setWarningMessage(warn);
    }
    // third argument is ignored since we provide a port
    PMMLPortObject outPMMLPort = new PMMLPortObject((PMMLPortObjectSpec) learner.getOutputSpec()[0], inPMMLPort, null);
    PMMLGeneralRegressionTranslator trans = new PMMLGeneralRegressionTranslator(m_content.createGeneralRegressionContent());
    outPMMLPort.addModelTranslater(trans);
    return new PortObject[] { outPMMLPort, m_content.createTablePortObject(exec) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMMLGeneralRegressionTranslator(org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionTranslator) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Aggregations

BufferedDataTable (org.knime.core.node.BufferedDataTable)425 DataTableSpec (org.knime.core.data.DataTableSpec)213 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)148 DataRow (org.knime.core.data.DataRow)118 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)97 PortObject (org.knime.core.node.port.PortObject)96 DataCell (org.knime.core.data.DataCell)85 DataColumnSpec (org.knime.core.data.DataColumnSpec)61 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)60 DefaultRow (org.knime.core.data.def.DefaultRow)56 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)54 RowKey (org.knime.core.data.RowKey)52 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)50 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)47 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)43 IOException (java.io.IOException)41 ExecutionContext (org.knime.core.node.ExecutionContext)40 ArrayList (java.util.ArrayList)33 LinkedHashMap (java.util.LinkedHashMap)31 DoubleValue (org.knime.core.data.DoubleValue)29