Search in sources :

Example 26 with Covariance

use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.

the class Learner method perform.

/**
 * @param data The data table.
 * @param exec The execution context used for reporting progress.
 * @return An object which holds the results.
 * @throws CanceledExecutionException when method is cancelled
 * @throws InvalidSettingsException When settings are inconsistent with the data
 */
public LogisticRegressionContent perform(final BufferedDataTable data, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    exec.checkCanceled();
    int iter = 0;
    boolean converged = false;
    final RegressionTrainingData trainingData = new RegressionTrainingData(data, m_outSpec, true, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories);
    int targetIndex = data.getDataTableSpec().findColumnIndex(m_outSpec.getTargetCols().get(0).getName());
    final int tcC = trainingData.getDomainValues().get(targetIndex).size();
    final int rC = trainingData.getRegressorCount();
    final RealMatrix beta = new Array2DRowRealMatrix(1, (tcC - 1) * (rC + 1));
    Double loglike = 0.0;
    Double loglikeOld = 0.0;
    exec.setMessage("Iterative optimization. Processing iteration 1.");
    // main loop
    while (iter < m_maxIter && !converged) {
        RealMatrix betaOld = beta.copy();
        loglikeOld = loglike;
        // Do heavy work in a separate thread which allows to interrupt it
        // note the queue may block if no more threads are available (e.g. thread count = 1)
        // as soon as we stall in 'get' this thread reduces the number of running thread
        Future<Double> future = ThreadPool.currentPool().enqueue(new Callable<Double>() {

            @Override
            public Double call() throws Exception {
                final ExecutionMonitor progMon = exec.createSubProgress(1.0 / m_maxIter);
                irlsRls(trainingData, beta, rC, tcC, progMon);
                progMon.setProgress(1.0);
                return likelihood(trainingData.iterator(), beta, rC, tcC, exec);
            }
        });
        try {
            loglike = future.get();
        } catch (InterruptedException e) {
            future.cancel(true);
            exec.checkCanceled();
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            if (e.getCause() instanceof RuntimeException) {
                throw (RuntimeException) e.getCause();
            } else {
                throw new RuntimeException(e.getCause());
            }
        }
        if (Double.isInfinite(loglike) || Double.isNaN(loglike)) {
            throw new RuntimeException(FAILING_MSG);
        }
        exec.checkCanceled();
        // test for decreasing likelihood
        while ((Double.isInfinite(loglike) || Double.isNaN(loglike) || loglike < loglikeOld) && iter > 0) {
            converged = true;
            for (int k = 0; k < beta.getRowDimension(); k++) {
                if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
                    converged = false;
                    break;
                }
            }
            if (converged) {
                break;
            }
            // half the step size of beta
            beta.setSubMatrix((beta.add(betaOld)).scalarMultiply(0.5).getData(), 0, 0);
            exec.checkCanceled();
            loglike = likelihood(trainingData.iterator(), beta, rC, tcC, exec);
            exec.checkCanceled();
        }
        // test for convergence
        converged = true;
        for (int k = 0; k < beta.getRowDimension(); k++) {
            if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
                converged = false;
                break;
            }
        }
        iter++;
        LOGGER.debug("#Iterations: " + iter);
        LOGGER.debug("Log Likelihood: " + loglike);
        StringBuilder betaBuilder = new StringBuilder();
        for (int i = 0; i < beta.getRowDimension() - 1; i++) {
            betaBuilder.append(Double.toString(beta.getEntry(i, 0)));
            betaBuilder.append(", ");
        }
        if (beta.getRowDimension() > 0) {
            betaBuilder.append(Double.toString(beta.getEntry(beta.getRowDimension() - 1, 0)));
        }
        LOGGER.debug("beta: " + betaBuilder.toString());
        exec.checkCanceled();
        exec.setMessage("Iterative optimization. #Iterations: " + iter + " | Log-likelihood: " + DoubleFormat.formatDouble(loglike) + ". Processing iteration " + (iter + 1) + ".");
    }
    // The covariance matrix
    RealMatrix covMat = new QRDecomposition(A).getSolver().getInverse().scalarMultiply(-1);
    List<String> factorList = new ArrayList<String>();
    List<String> covariateList = new ArrayList<String>();
    Map<String, List<DataCell>> factorDomainValues = new HashMap<String, List<DataCell>>();
    for (int i : trainingData.getActiveCols()) {
        if (trainingData.getIsNominal().get(i)) {
            String factor = data.getDataTableSpec().getColumnSpec(i).getName();
            factorList.add(factor);
            List<DataCell> values = trainingData.getDomainValues().get(i);
            factorDomainValues.put(factor, values);
        } else {
            covariateList.add(data.getDataTableSpec().getColumnSpec(i).getName());
        }
    }
    Matrix betaJama = new Matrix(beta.getData());
    Matrix covMatJama = new Matrix(covMat.getData());
    // create content
    LogisticRegressionContent content = new LogisticRegressionContent(m_outSpec, factorList, covariateList, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories, betaJama, loglike, covMatJama, iter);
    return content;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) Matrix(Jama.Matrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RegressionTrainingData(org.knime.base.node.mine.regression.RegressionTrainingData) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException) QRDecomposition(org.apache.commons.math3.linear.QRDecomposition) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DataCell(org.knime.core.data.DataCell)

Example 27 with Covariance

use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.

the class CovarianceMatrixCalculator method calculateCovarianceMatrix.

/**
 * Computes the covariance matrix and puts the result in the given (optional) data container and additionally
 * returns a in memory representation. The data container is expected to have the data table spec returned at
 * {@link #getResultSpec()}. The implementation traverses the data once.
 *
 * @param exec the execution container
 * @param inTable input data
 * @param tableSize the data table size
 * @param resultDataContainer optional result data container
 * @return the covariance matrix
 * @throws CanceledExecutionException if the user canceled the execution
 */
public RealMatrix calculateCovarianceMatrix(final ExecutionMonitor exec, final DataTable inTable, final long tableSize, final DataContainer resultDataContainer) throws CanceledExecutionException {
    checkArgument(m_targetSpec.equalStructure(inTable.getDataTableSpec()), "Target tables spec is different from the one given in the constructor!");
    if (resultDataContainer != null) {
        checkArgument(m_resultSpec.equalStructure(resultDataContainer.getTableSpec()), "Result tables spec is invalid!");
    }
    final ExecutionMonitor computingProgress = exec.createSubProgress(resultDataContainer != null ? 0.8 : 1);
    List<StorelessCovariance> covariancesList = new ArrayList<>();
    // create covariance pairs
    for (int i = 0; i < m_indexes.length; i++) {
        for (int j = i; j < m_indexes.length; j++) {
            covariancesList.add(new StorelessCovariance(2));
        }
    }
    // compute rest of co-variance matrix
    int rowCount = 0;
    double[] buffer = new double[2];
    for (DataRow dataRow : inTable) {
        for (int i = 0; i < m_indexes.length; i++) {
            final int outerIndex = m_indexes[i];
            final DataCell outerCell = dataRow.getCell(outerIndex);
            if (outerCell.isMissing()) {
                // skip missing values
                continue;
            }
            final double outerDouble = ((DoubleValue) outerCell).getDoubleValue();
            for (int j = i; j < m_indexes.length; j++) {
                final int innerIndex = m_indexes[j];
                final DataCell innerCell = dataRow.getCell(innerIndex);
                if (innerCell.isMissing()) {
                    // skip missing values
                    continue;
                }
                final double innerDouble = ((DoubleValue) innerCell).getDoubleValue();
                buffer[0] = outerDouble;
                buffer[1] = innerDouble;
                int covListIndex = index(m_indexes.length, i, j);
                covariancesList.get(covListIndex).increment(buffer);
            }
        }
        computingProgress.setProgress(rowCount++ / (double) tableSize, "Calculate covariance values, processing row: '" + dataRow.getKey() + "'");
        computingProgress.checkCanceled();
    }
    // Copy the storeless covariances to a real matrix
    RealMatrix covMatrix = new Array2DRowRealMatrix(m_indexes.length, m_indexes.length);
    for (int i = 0; i < m_indexes.length; i++) {
        for (int j = i; j < m_indexes.length; j++) {
            int covListIndex = index(m_indexes.length, i, j);
            double covValue;
            try {
                covValue = i == j ? covariancesList.get(covListIndex).getCovariance(1, 1) : covariancesList.get(covListIndex).getCovariance(0, 1);
            } catch (NumberIsTooSmallException e) {
                throw new IllegalArgumentException(String.format("There were not enough valid values to " + "compute covariance between columns: '%s' and '%s'.", inTable.getDataTableSpec().getColumnSpec(m_indexes[i]).getName(), inTable.getDataTableSpec().getColumnSpec(m_indexes[j]).getName()), e);
            }
            covMatrix.setEntry(i, j, covValue);
            covMatrix.setEntry(j, i, covValue);
        }
    }
    if (resultDataContainer != null) {
        exec.setProgress("Writing matrix to data table");
        final ExecutionMonitor writingProgress = exec.createSubProgress(0.2);
        for (int i = 0; i < covMatrix.getRowDimension(); i++) {
            resultDataContainer.addRowToTable(new DefaultRow(RowKey.toRowKeys(resultDataContainer.getTableSpec().getColumnSpec(i).getName())[0], covMatrix.getRow(i)));
            exec.checkCanceled();
            writingProgress.setProgress((double) i / covMatrix.getRowDimension(), "Writing row: " + resultDataContainer.getTableSpec().getColumnSpec(i).getName());
        }
    }
    return covMatrix;
}
Also used : ArrayList(java.util.ArrayList) NumberIsTooSmallException(org.apache.commons.math3.exception.NumberIsTooSmallException) StorelessCovariance(org.apache.commons.math3.stat.correlation.StorelessCovariance) DataRow(org.knime.core.data.DataRow) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 28 with Covariance

use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.

the class CovarianceOperator method getResultInternal.

/**
 * {@inheritDoc}
 */
@Override
protected DataCell getResultInternal() {
    if (m_cells.getNumElements() != add_cells.getNumElements()) {
        return DataType.getMissingCell();
    }
    Covariance cov = new Covariance();
    double value = cov.covariance(m_cells.getElements(), add_cells.getElements());
    return new DoubleCell(value);
}
Also used : Covariance(org.apache.commons.math3.stat.correlation.Covariance) DoubleCell(org.knime.core.data.def.DoubleCell)

Example 29 with Covariance

use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.

the class IrlsLearner method learn.

/**
 * {@inheritDoc}
 */
@Override
public LogRegLearnerResult learn(final TrainingData<ClassificationTrainingRow> trainingData, final ExecutionMonitor exec) throws CanceledExecutionException, InvalidSettingsException {
    exec.checkCanceled();
    int iter = 0;
    boolean converged = false;
    final int tcC = trainingData.getTargetDimension() + 1;
    final int rC = trainingData.getFeatureCount() - 1;
    final RealMatrix beta = MatrixUtils.createRealMatrix(1, (tcC - 1) * (rC + 1));
    Double loglike = 0.0;
    Double loglikeOld = 0.0;
    exec.setMessage("Iterative optimization. Processing iteration 1.");
    // main loop
    while (iter < m_maxIter && !converged) {
        RealMatrix betaOld = beta.copy();
        loglikeOld = loglike;
        // Do heavy work in a separate thread which allows to interrupt it
        // note the queue may block if no more threads are available (e.g. thread count = 1)
        // as soon as we stall in 'get' this thread reduces the number of running thread
        Future<Double> future = ThreadPool.currentPool().enqueue(new Callable<Double>() {

            @Override
            public Double call() throws Exception {
                final ExecutionMonitor progMon = exec.createSubProgress(1.0 / m_maxIter);
                irlsRls(trainingData, beta, rC, tcC, progMon);
                progMon.setProgress(1.0);
                return likelihood(trainingData.iterator(), beta, rC, tcC, exec);
            }
        });
        try {
            loglike = future.get();
        } catch (InterruptedException e) {
            future.cancel(true);
            exec.checkCanceled();
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            if (e.getCause() instanceof RuntimeException) {
                throw (RuntimeException) e.getCause();
            } else {
                throw new RuntimeException(e.getCause());
            }
        }
        if (Double.isInfinite(loglike) || Double.isNaN(loglike)) {
            throw new RuntimeException(FAILING_MSG);
        }
        exec.checkCanceled();
        // test for decreasing likelihood
        while ((Double.isInfinite(loglike) || Double.isNaN(loglike) || loglike < loglikeOld) && iter > 0) {
            converged = true;
            for (int k = 0; k < beta.getColumnDimension(); k++) {
                if (abs(beta.getEntry(0, k) - betaOld.getEntry(0, k)) > m_eps * abs(betaOld.getEntry(0, k))) {
                    converged = false;
                    break;
                }
            }
            if (converged) {
                break;
            }
            // half the step size of beta
            beta.setSubMatrix((beta.add(betaOld)).scalarMultiply(0.5).getData(), 0, 0);
            exec.checkCanceled();
            loglike = likelihood(trainingData.iterator(), beta, rC, tcC, exec);
            exec.checkCanceled();
        }
        // test for convergence
        converged = true;
        for (int k = 0; k < beta.getColumnDimension(); k++) {
            if (abs(beta.getEntry(0, k) - betaOld.getEntry(0, k)) > m_eps * abs(betaOld.getEntry(0, k))) {
                converged = false;
                break;
            }
        }
        iter++;
        LOGGER.debug("#Iterations: " + iter);
        LOGGER.debug("Log Likelihood: " + loglike);
        StringBuilder betaBuilder = new StringBuilder();
        for (int i = 0; i < beta.getColumnDimension() - 1; i++) {
            betaBuilder.append(Double.toString(beta.getEntry(0, i)));
            betaBuilder.append(", ");
        }
        if (beta.getColumnDimension() > 0) {
            betaBuilder.append(Double.toString(beta.getEntry(0, beta.getColumnDimension() - 1)));
        }
        LOGGER.debug("beta: " + betaBuilder.toString());
        exec.checkCanceled();
        exec.setMessage("Iterative optimization. #Iterations: " + iter + " | Log-likelihood: " + DoubleFormat.formatDouble(loglike) + ". Processing iteration " + (iter + 1) + ".");
    }
    StringBuilder warnBuilder = new StringBuilder();
    if (iter >= m_maxIter) {
        warnBuilder.append("The algorithm did not reach convergence after the specified number of epochs. " + "Setting the epoch limit higher might result in a better model.");
    }
    // The covariance matrix
    RealMatrix covMat = null;
    if (m_calcCovMatrix) {
        try {
            covMat = new QRDecomposition(A).getSolver().getInverse().scalarMultiply(-1);
        } catch (SingularMatrixException sme) {
            if (warnBuilder.length() > 0) {
                warnBuilder.append("\n");
            }
            warnBuilder.append("The covariance matrix could not be calculated because the" + " observed fisher information matrix was singular.");
        }
    }
    RealMatrix betaMat = MatrixUtils.createRealMatrix(tcC - 1, rC + 1);
    for (int i = 0; i < beta.getColumnDimension(); i++) {
        int r = i / (rC + 1);
        int c = i % (rC + 1);
        betaMat.setEntry(r, c, beta.getEntry(0, i));
    }
    m_warning = warnBuilder.length() > 0 ? warnBuilder.toString() : null;
    return new LogRegLearnerResult(betaMat, covMat, iter, loglike);
}
Also used : InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) SingularMatrixException(org.apache.commons.math3.linear.SingularMatrixException) ExecutionException(java.util.concurrent.ExecutionException) QRDecomposition(org.apache.commons.math3.linear.QRDecomposition) RealMatrix(org.apache.commons.math3.linear.RealMatrix) SingularMatrixException(org.apache.commons.math3.linear.SingularMatrixException) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException)

Example 30 with Covariance

use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.

the class CovarianceMatrixCalculatorTest method computeCovarianceOfRandomData.

/**
 * Computes a set of random double
 *
 * @throws InvalidSettingsException
 * @throws CanceledExecutionException
 */
@Test
public void computeCovarianceOfRandomData() throws InvalidSettingsException, CanceledExecutionException {
    long currentTimeMillis = System.currentTimeMillis();
    System.out.println("Mahalanobis test random seed: " + currentTimeMillis);
    final Random random = new Random(currentTimeMillis);
    double[][] data = new double[TEST_TABLE_SIZE][];
    BufferedDataContainer inTableCont = generateData(random, data, SPEC_4);
    inTableCont.close();
    BufferedDataTable inTable = inTableCont.getTable();
    // test the covariance matrix computation
    CovarianceMatrixCalculator covMatrixCalculator = new CovarianceMatrixCalculator(SPEC_4, SPEC_4.getColumnNames());
    BufferedDataContainer covDataContainer = m_exec.createDataContainer(covMatrixCalculator.getResultSpec());
    RealMatrix covMatrixUnderTest = covMatrixCalculator.computeCovarianceMatrix(m_exec, inTable, covDataContainer);
    covDataContainer.close();
    Covariance covariance = new Covariance(data);
    RealMatrix referenceCovarianceMatrix = covariance.getCovarianceMatrix();
    BufferedDataTable covTableUnderTest = covDataContainer.getTable();
    assertCovarianceMatrixEquality(covMatrixUnderTest, referenceCovarianceMatrix, covTableUnderTest, SPEC_4, true);
}
Also used : Random(java.util.Random) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Covariance(org.apache.commons.math3.stat.correlation.Covariance) BufferedDataTable(org.knime.core.node.BufferedDataTable) Test(org.junit.Test)

Aggregations

RealMatrix (org.apache.commons.math3.linear.RealMatrix)27 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)10 IntStream (java.util.stream.IntStream)9 ArrayList (java.util.ArrayList)8 List (java.util.List)7 Nonnull (javax.annotation.Nonnull)7 Collectors (java.util.stream.Collectors)6 Stream (java.util.stream.Stream)5 Covariance (org.apache.commons.math3.stat.correlation.Covariance)5 java.util (java.util)4 Supplier (java.util.function.Supplier)4 Nullable (javax.annotation.Nullable)4 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)4 TooManyEvaluationsException (org.apache.commons.math3.exception.TooManyEvaluationsException)4 EigenDecomposition (org.apache.commons.math3.linear.EigenDecomposition)4 Logger (org.apache.logging.log4j.Logger)4 UserException (org.broadinstitute.hellbender.exceptions.UserException)4 Nd4jIOUtils (org.broadinstitute.hellbender.tools.coveragemodel.nd4jutils.Nd4jIOUtils)4 Utils (org.broadinstitute.hellbender.utils.Utils)4 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)4