Search in sources :

Example 11 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class TreeEnsembleClassificationLearnerNodeModel method saveInternals.

/**
 * {@inheritDoc}
 */
@Override
protected void saveInternals(final File nodeInternDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
    File file;
    ExecutionMonitor sub;
    if (m_oldStyleEnsembleModel_deprecated != null) {
        // old workflow (<2.10) loaded and saved ...
        file = new File(nodeInternDir, INTERNAL_TREES_FILE);
        OutputStream out = new GZIPOutputStream(new FileOutputStream(file));
        sub = exec.createSubProgress(0.2);
        m_oldStyleEnsembleModel_deprecated.save(out, sub);
        out.close();
    }
    if (m_hiliteRowSample != null) {
        file = new File(nodeInternDir, INTERNAL_DATASAMPLE_FILE);
        sub = exec.createSubProgress(0.2);
        DataContainer.writeToZip(m_hiliteRowSample, file, sub);
    }
    if (m_viewMessage != null) {
        file = new File(nodeInternDir, INTERNAL_INFO_FILE);
        NodeSettings sets = new NodeSettings("ensembleData");
        sets.addString("view_warning", m_viewMessage);
        sets.saveToXML(new FileOutputStream(file));
    }
}
Also used : NodeSettings(org.knime.core.node.NodeSettings) GZIPOutputStream(java.util.zip.GZIPOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) FileOutputStream(java.io.FileOutputStream) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) File(java.io.File)

Example 12 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class NormalizerNodeModel method calculate.

/**
 * New normalized {@link org.knime.core.data.DataTable} is created depending
 * on the mode.
 */
/**
 * @param inData The input data.
 * @param exec For BufferedDataTable creation and progress.
 * @return the result of the calculation
 * @throws Exception If the node calculation fails for any reason.
 */
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getSpec();
    // extract selected numeric columns
    updateNumericColumnSelection(inSpec);
    Normalizer ntable = new Normalizer(inTable, m_columns);
    long rowcount = inTable.size();
    ExecutionMonitor prepareExec = exec.createSubProgress(0.3);
    AffineTransTable outTable;
    boolean fixDomainBounds = false;
    switch(m_mode) {
        case NONORM_MODE:
            return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
        case MINMAX_MODE:
            fixDomainBounds = true;
            outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
            break;
        case ZSCORE_MODE:
            outTable = ntable.doZScoreNorm(prepareExec);
            break;
        case DECIMALSCALING_MODE:
            outTable = ntable.doDecimalScaling(prepareExec);
            break;
        default:
            throw new Exception("No mode set");
    }
    if (outTable.getErrorMessage() != null) {
        // something went wrong, report and throw an exception
        throw new Exception(outTable.getErrorMessage());
    }
    if (ntable.getErrorMessage() != null) {
        // something went wrong during initialization, report.
        setWarningMessage(ntable.getErrorMessage());
    }
    DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
    AffineTransConfiguration configuration = outTable.getConfiguration();
    DataTableSpec spec = outTable.getDataTableSpec();
    // the same transformation, which is not guaranteed to snap to min/max)
    if (fixDomainBounds) {
        DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
        for (int i = 0; i < newColSpecs.length; i++) {
            newColSpecs[i] = spec.getColumnSpec(i);
        }
        for (int i = 0; i < m_columns.length; i++) {
            int index = spec.findColumnIndex(m_columns[i]);
            DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
            DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
            domCreator.setLowerBound(new DoubleCell(m_min));
            domCreator.setUpperBound(new DoubleCell(m_max));
            creator.setDomain(domCreator.createDomain());
            newColSpecs[index] = creator.createSpec();
        }
        spec = new DataTableSpec(spec.getName(), newColSpecs);
    }
    ExecutionMonitor normExec = exec.createSubProgress(.7);
    BufferedDataContainer container = exec.createDataContainer(spec);
    long count = 1;
    for (DataRow row : outTable) {
        normExec.checkCanceled();
        normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
        container.addRowToTable(row);
        count++;
    }
    container.close();
    return new CalculationResult(container.getTable(), modelSpec, configuration);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) Normalizer(org.knime.base.data.normalize.Normalizer) DoubleCell(org.knime.core.data.def.DoubleCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) AffineTransTable(org.knime.base.data.normalize.AffineTransTable) AffineTransConfiguration(org.knime.base.data.normalize.AffineTransConfiguration) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 13 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class EnrichmentPlotterModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final double rowCount = inData[0].size();
    final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
    final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
    for (int i = 0; i < m_settings.getCurveCount(); i++) {
        final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
        exec.setMessage("Generating curve " + (i + 1));
        final Curve c = m_settings.getCurve(i);
        final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
        final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
        final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
        int k = 0, maxK = 0;
        for (DataRow row : inData[0]) {
            DataCell c1 = row.getCell(sortIndex);
            DataCell c2 = row.getCell(actIndex);
            if (k++ % 100 == 0) {
                sexec.checkCanceled();
                sexec.setProgress(k / rowCount);
            }
            if (c1.isMissing()) {
                continue;
            } else {
                curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
            }
            maxK++;
        }
        Arrays.sort(curve, 0, maxK);
        if (c.isSortDescending()) {
            for (int j = 0; j < maxK / 2; j++) {
                Helper h = curve[j];
                curve[j] = curve[maxK - j - 1];
                curve[maxK - j - 1] = h;
            }
        }
        // this is for down-sampling so that the view is faster;
        // plotting >100,000 points takes quite a long time
        final int size = Math.min(MAX_RESOLUTION, maxK);
        final double downSampleRate = maxK / (double) size;
        final double[] xValues = new double[size + 1];
        final double[] yValues = new double[size + 1];
        xValues[0] = 0;
        yValues[0] = 0;
        int lastK = 0;
        double y = 0, area = 0;
        int nextHitRatePoint = 0;
        final double[] hitRateValues = new double[DISCRATE_POINTS.length];
        final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
        for (k = 1; k <= maxK; k++) {
            final Helper h = curve[k - 1];
            if (m_settings.plotMode() == PlotMode.PlotSum) {
                y += ((DoubleValue) h.b).getDoubleValue();
            } else if (m_settings.plotMode() == PlotMode.PlotHits) {
                if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
                    y++;
                }
            } else if (!h.b.isMissing()) {
                MutableInteger count = clusters.get(h.b);
                if (count == null) {
                    count = new MutableInteger(0);
                    clusters.put(h.b, count);
                }
                if (count.inc() == m_settings.minClusterMembers()) {
                    y++;
                }
            }
            area += y / maxK;
            if ((int) (k / downSampleRate) >= lastK + 1) {
                lastK++;
                xValues[lastK] = k;
                yValues[lastK] = y;
            }
            if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
                hitRateValues[nextHitRatePoint] = y;
                nextHitRatePoint++;
            }
        }
        xValues[xValues.length - 1] = maxK;
        yValues[yValues.length - 1] = y;
        area /= y;
        m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
        areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
        for (int j = 0; j < hitRateValues.length; j++) {
            hitRateValues[j] /= y;
        }
        discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
    }
    areaOutCont.close();
    discrateOutCont.close();
    return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) HashMap(java.util.HashMap) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) MutableInteger(org.knime.core.util.MutableInteger) Curve(org.knime.base.node.viz.enrichment.EnrichmentPlotterSettings.Curve) DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 14 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class CollectionSplitNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable table = inData[0];
    DataTableSpec spec = table.getDataTableSpec();
    ExecutionMonitor execForCR = exec;
    // validate settings
    getTargetColIndex(spec);
    DataColumnSpec[] colSpecs;
    switch(m_settings.getCountElementsPolicy()) {
        case Count:
            execForCR = exec.createSubProgress(0.7);
            ExecutionMonitor e = exec.createSubProgress(0.3);
            colSpecs = countNewColumns(table, e);
            break;
        case UseElementNamesOrFail:
            colSpecs = getColSpecsByElementNames(spec);
            break;
        case BestEffort:
            try {
                colSpecs = getColSpecsByElementNames(spec);
            } catch (InvalidSettingsException ise) {
                execForCR = exec.createSubProgress(0.7);
                e = exec.createSubProgress(0.3);
                colSpecs = countNewColumns(table, e);
            }
            break;
        default:
            throw new InvalidSettingsException("Unsupported policy: " + m_settings.getCountElementsPolicy());
    }
    Pair<ColumnRearranger, SplitCellFactory> pair = createColumnRearranger(spec, colSpecs);
    BufferedDataTable out = exec.createColumnRearrangeTable(table, pair.getFirst(), execForCR);
    String warnMessage = pair.getSecond().getWarnMessage();
    if (warnMessage != null) {
        setWarningMessage(warnMessage);
    }
    if (m_settings.isDetermineMostSpecificDataType()) {
        out = refineTypes(out, pair.getSecond(), exec);
    }
    return new BufferedDataTable[] { out };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 15 with ExecutionMonitor

use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.

the class Learner method perform.

/**
 * @param data The data table.
 * @param exec The execution context used for reporting progress.
 * @return An object which holds the results.
 * @throws CanceledExecutionException when method is cancelled
 * @throws InvalidSettingsException When settings are inconsistent with the data
 */
public LogisticRegressionContent perform(final BufferedDataTable data, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    exec.checkCanceled();
    int iter = 0;
    boolean converged = false;
    final RegressionTrainingData trainingData = new RegressionTrainingData(data, m_outSpec, m_specialColumns, true, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories);
    int targetIndex = data.getDataTableSpec().findColumnIndex(m_outSpec.getTargetCols().get(0).getName());
    final int tcC = trainingData.getDomainValues().get(targetIndex).size();
    final int rC = trainingData.getRegressorCount();
    final RealMatrix beta = new Array2DRowRealMatrix(1, (tcC - 1) * (rC + 1));
    Double loglike = 0.0;
    Double loglikeOld = 0.0;
    exec.setMessage("Iterative optimization. Processing iteration 1.");
    // main loop
    while (iter < m_maxIter && !converged) {
        RealMatrix betaOld = beta.copy();
        loglikeOld = loglike;
        // Do heavy work in a separate thread which allows to interrupt it
        // note the queue may block if no more threads are available (e.g. thread count = 1)
        // as soon as we stall in 'get' this thread reduces the number of running thread
        Future<Double> future = ThreadPool.currentPool().enqueue(new Callable<Double>() {

            @Override
            public Double call() throws Exception {
                final ExecutionMonitor progMon = exec.createSubProgress(1.0 / m_maxIter);
                irlsRls(trainingData, beta, rC, tcC, progMon);
                progMon.setProgress(1.0);
                return likelihood(trainingData.iterator(), beta, rC, tcC, exec);
            }
        });
        try {
            loglike = future.get();
        } catch (InterruptedException e) {
            future.cancel(true);
            exec.checkCanceled();
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            if (e.getCause() instanceof RuntimeException) {
                throw (RuntimeException) e.getCause();
            } else {
                throw new RuntimeException(e.getCause());
            }
        }
        if (Double.isInfinite(loglike) || Double.isNaN(loglike)) {
            throw new RuntimeException(FAILING_MSG);
        }
        exec.checkCanceled();
        // test for decreasing likelihood
        while ((Double.isInfinite(loglike) || Double.isNaN(loglike) || loglike < loglikeOld) && iter > 0) {
            converged = true;
            for (int k = 0; k < beta.getRowDimension(); k++) {
                if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
                    converged = false;
                    break;
                }
            }
            if (converged) {
                break;
            }
            // half the step size of beta
            beta.setSubMatrix((beta.add(betaOld)).scalarMultiply(0.5).getData(), 0, 0);
            exec.checkCanceled();
            loglike = likelihood(trainingData.iterator(), beta, rC, tcC, exec);
            exec.checkCanceled();
        }
        // test for convergence
        converged = true;
        for (int k = 0; k < beta.getRowDimension(); k++) {
            if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
                converged = false;
                break;
            }
        }
        iter++;
        LOGGER.debug("#Iterations: " + iter);
        LOGGER.debug("Log Likelihood: " + loglike);
        StringBuilder betaBuilder = new StringBuilder();
        for (int i = 0; i < beta.getRowDimension() - 1; i++) {
            betaBuilder.append(Double.toString(beta.getEntry(i, 0)));
            betaBuilder.append(", ");
        }
        if (beta.getRowDimension() > 0) {
            betaBuilder.append(Double.toString(beta.getEntry(beta.getRowDimension() - 1, 0)));
        }
        LOGGER.debug("beta: " + betaBuilder.toString());
        exec.checkCanceled();
        exec.setMessage("Iterative optimization. #Iterations: " + iter + " | Log-likelihood: " + DoubleFormat.formatDouble(loglike) + ". Processing iteration " + (iter + 1) + ".");
    }
    // The covariance matrix
    RealMatrix covMat = new QRDecomposition(A).getSolver().getInverse().scalarMultiply(-1);
    List<String> factorList = new ArrayList<String>();
    List<String> covariateList = new ArrayList<String>();
    Map<String, List<DataCell>> factorDomainValues = new HashMap<String, List<DataCell>>();
    for (int i : trainingData.getActiveCols()) {
        DataColumnSpec columnSpec = data.getDataTableSpec().getColumnSpec(i);
        if (trainingData.getIsNominal().get(i)) {
            String factor = columnSpec.getName();
            factorList.add(factor);
            List<DataCell> values = trainingData.getDomainValues().get(i);
            factorDomainValues.put(factor, values);
        } else {
            if (columnSpec.getType().isCompatible(BitVectorValue.class) || columnSpec.getType().isCompatible(ByteVectorValue.class)) {
                int length = trainingData.getVectorLengths().getOrDefault(i, 0).intValue();
                for (int j = 0; j < length; ++j) {
                    covariateList.add(columnSpec.getName() + "[" + j + "]");
                }
            } else {
                covariateList.add(columnSpec.getName());
            }
        }
    }
    final Map<? extends Integer, Integer> vectorIndexLengths = trainingData.getVectorLengths();
    final Map<String, Integer> vectorLengths = new LinkedHashMap<String, Integer>();
    for (DataColumnSpec spec : m_specialColumns) {
        int colIndex = data.getSpec().findColumnIndex(spec.getName());
        if (colIndex >= 0) {
            vectorLengths.put(spec.getName(), vectorIndexLengths.get(colIndex));
        }
    }
    // create content
    LogisticRegressionContent content = new LogisticRegressionContent(m_outSpec, factorList, covariateList, vectorLengths, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories, beta, loglike, covMat, iter);
    return content;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RegressionTrainingData(org.knime.base.node.mine.regression.RegressionTrainingData) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException) QRDecomposition(org.apache.commons.math3.linear.QRDecomposition) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DataCell(org.knime.core.data.DataCell) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Aggregations

ExecutionMonitor (org.knime.core.node.ExecutionMonitor)160 BufferedDataTable (org.knime.core.node.BufferedDataTable)50 DataTableSpec (org.knime.core.data.DataTableSpec)43 DataRow (org.knime.core.data.DataRow)39 DataCell (org.knime.core.data.DataCell)35 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)35 Test (org.junit.Test)33 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)33 File (java.io.File)29 IOException (java.io.IOException)25 PortObject (org.knime.core.node.port.PortObject)25 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)23 DataColumnSpec (org.knime.core.data.DataColumnSpec)21 RowKey (org.knime.core.data.RowKey)20 ArrayList (java.util.ArrayList)19 WorkflowLoadResult (org.knime.core.node.workflow.WorkflowPersistor.WorkflowLoadResult)17 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)16 ExecutionException (java.util.concurrent.ExecutionException)14 ExecutionContext (org.knime.core.node.ExecutionContext)13 FileOutputStream (java.io.FileOutputStream)12