Search in sources :

Example 11 with Covariance

use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.

the class RegressionStatisticsLearner method createCovarianceMatrix.

/**
 * Creates the covariance matrix from the {@link RegressionResults}.
 *
 * @param result A {@link RegressionResults} object.
 * @return The covariance {@link RealMatrix}.
 * @since 3.4
 */
protected RealMatrix createCovarianceMatrix(final RegressionResults result) {
    // The covariance matrix
    int dim = result.getNumberOfParameters();
    RealMatrix covMat = MatrixUtils.createRealMatrix(dim, dim);
    for (int i = 0; i < dim; i++) {
        for (int k = 0; k < dim; k++) {
            covMat.setEntry(i, k, result.getCovarianceOfParameters(i, k));
        }
    }
    return covMat;
}
Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix)

Example 12 with Covariance

use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.

the class Learner method perform.

/**
 * @param data The data table.
 * @param exec The execution context used for reporting progress.
 * @return An object which holds the results.
 * @throws CanceledExecutionException When method is cancelled
 * @throws InvalidSettingsException When settings are inconsistent with the data
 */
@Override
public LinearRegressionContent perform(final BufferedDataTable data, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    exec.checkCanceled();
    RegressionTrainingData trainingData = new RegressionTrainingData(data, m_outSpec, m_failOnMissing);
    final int regressorCount = Math.max(1, trainingData.getRegressorCount());
    SummaryStatistics[] stats = new SummaryStatistics[regressorCount];
    UpdatingMultipleLinearRegression regr = initStatistics(regressorCount, stats);
    processTable(exec, trainingData, stats, regr);
    List<String> factorList = new ArrayList<String>();
    List<String> covariateList = createCovariateListAndFillFactors(data, trainingData, factorList);
    try {
        RegressionResults result = regr.regress();
        RealMatrix beta = MatrixUtils.createRowRealMatrix(result.getParameterEstimates());
        // The covariance matrix
        RealMatrix covMat = createCovarianceMatrix(result);
        LinearRegressionContent content = new LinearRegressionContent(m_outSpec, (int) stats[0].getN(), factorList, covariateList, beta, m_includeConstant, m_offsetValue, covMat, result.getRSquared(), result.getAdjustedRSquared(), stats, null);
        return content;
    } catch (ModelSpecificationException e) {
        int dim = (m_includeConstant ? 1 : 0) + trainingData.getRegressorCount() + (factorList.size() > 0 ? Math.max(1, data.getDataTableSpec().getColumnSpec(factorList.get(0)).getDomain().getValues().size() - 1) : 0);
        RealMatrix beta = MatrixUtils.createRealMatrix(1, dim);
        RealMatrix covMat = MatrixUtils.createRealMatrix(dim, dim);
        // fillWithNaNs(beta);
        fillWithNaNs(covMat);
        return new LinearRegressionContent(m_outSpec, (int) stats[0].getN(), factorList, covariateList, beta, m_includeConstant, m_offsetValue, covMat, Double.NaN, Double.NaN, stats, e.getMessage());
    }
}
Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix) ArrayList(java.util.ArrayList) RegressionTrainingData(org.knime.base.node.mine.regression.RegressionTrainingData) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) RegressionResults(org.apache.commons.math3.stat.regression.RegressionResults) UpdatingMultipleLinearRegression(org.apache.commons.math3.stat.regression.UpdatingMultipleLinearRegression) ModelSpecificationException(org.apache.commons.math3.stat.regression.ModelSpecificationException)

Example 13 with Covariance

use of org.apache.commons.math3.stat.correlation.Covariance in project lucene-solr by apache.

the class CovarianceEvaluator method evaluate.

public Number evaluate(Tuple tuple) throws IOException {
    StreamEvaluator colEval1 = subEvaluators.get(0);
    StreamEvaluator colEval2 = subEvaluators.get(1);
    List<Number> numbers1 = (List<Number>) colEval1.evaluate(tuple);
    List<Number> numbers2 = (List<Number>) colEval2.evaluate(tuple);
    double[] column1 = new double[numbers1.size()];
    double[] column2 = new double[numbers2.size()];
    for (int i = 0; i < numbers1.size(); i++) {
        column1[i] = numbers1.get(i).doubleValue();
    }
    for (int i = 0; i < numbers2.size(); i++) {
        column2[i] = numbers2.get(i).doubleValue();
    }
    Covariance covariance = new Covariance();
    return covariance.covariance(column1, column2);
}
Also used : Covariance(org.apache.commons.math3.stat.correlation.Covariance) List(java.util.List)

Example 14 with Covariance

use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.

the class CovarianceMatrixCalculator method calculateCovarianceMatrix.

/**
 * Computes the covariance matrix and puts the result in the given (optional) data container and additionally
 * returns a in memory representation. The data container is expected to have the data table spec returned at
 * {@link #getResultSpec()}. The implementation traverses the data once.
 *
 * @param exec the execution container
 * @param inTable input data
 * @param tableSize the data table size
 * @param resultDataContainer optional result data container
 * @return the covariance matrix
 * @throws CanceledExecutionException if the user canceled the execution
 */
public RealMatrix calculateCovarianceMatrix(final ExecutionMonitor exec, final DataTable inTable, final long tableSize, final DataContainer resultDataContainer) throws CanceledExecutionException {
    checkArgument(m_targetSpec.equalStructure(inTable.getDataTableSpec()), "Target tables spec is different from the one given in the constructor!");
    if (resultDataContainer != null) {
        checkArgument(m_resultSpec.equalStructure(resultDataContainer.getTableSpec()), "Result tables spec is invalid!");
    }
    final ExecutionMonitor computingProgress = exec.createSubProgress(resultDataContainer != null ? 0.8 : 1);
    List<StorelessCovariance> covariancesList = new ArrayList<>();
    // create covariance pairs
    for (int i = 0; i < m_indexes.length; i++) {
        for (int j = i; j < m_indexes.length; j++) {
            covariancesList.add(new StorelessCovariance(2));
        }
    }
    // compute rest of co-variance matrix
    int rowCount = 0;
    double[] buffer = new double[2];
    for (DataRow dataRow : inTable) {
        for (int i = 0; i < m_indexes.length; i++) {
            final int outerIndex = m_indexes[i];
            final DataCell outerCell = dataRow.getCell(outerIndex);
            if (outerCell.isMissing()) {
                // skip missing values
                continue;
            }
            final double outerDouble = ((DoubleValue) outerCell).getDoubleValue();
            for (int j = i; j < m_indexes.length; j++) {
                final int innerIndex = m_indexes[j];
                final DataCell innerCell = dataRow.getCell(innerIndex);
                if (innerCell.isMissing()) {
                    // skip missing values
                    continue;
                }
                final double innerDouble = ((DoubleValue) innerCell).getDoubleValue();
                buffer[0] = outerDouble;
                buffer[1] = innerDouble;
                int covListIndex = index(m_indexes.length, i, j);
                covariancesList.get(covListIndex).increment(buffer);
            }
        }
        computingProgress.setProgress(rowCount++ / (double) tableSize, "Calculate covariance values, processing row: '" + dataRow.getKey() + "'");
        computingProgress.checkCanceled();
    }
    // Copy the storeless covariances to a real matrix
    RealMatrix covMatrix = new Array2DRowRealMatrix(m_indexes.length, m_indexes.length);
    for (int i = 0; i < m_indexes.length; i++) {
        for (int j = i; j < m_indexes.length; j++) {
            int covListIndex = index(m_indexes.length, i, j);
            double covValue;
            try {
                covValue = i == j ? covariancesList.get(covListIndex).getCovariance(1, 1) : covariancesList.get(covListIndex).getCovariance(0, 1);
            } catch (NumberIsTooSmallException e) {
                throw new IllegalArgumentException(String.format("There were not enough valid values to " + "compute covariance between columns: '%s' and '%s'.", inTable.getDataTableSpec().getColumnSpec(m_indexes[i]).getName(), inTable.getDataTableSpec().getColumnSpec(m_indexes[j]).getName()), e);
            }
            covMatrix.setEntry(i, j, covValue);
            covMatrix.setEntry(j, i, covValue);
        }
    }
    if (resultDataContainer != null) {
        exec.setProgress("Writing matrix to data table");
        final ExecutionMonitor writingProgress = exec.createSubProgress(0.2);
        for (int i = 0; i < covMatrix.getRowDimension(); i++) {
            resultDataContainer.addRowToTable(new DefaultRow(RowKey.toRowKeys(resultDataContainer.getTableSpec().getColumnSpec(i).getName())[0], covMatrix.getRow(i)));
            exec.checkCanceled();
            writingProgress.setProgress((double) i / covMatrix.getRowDimension(), "Writing row: " + resultDataContainer.getTableSpec().getColumnSpec(i).getName());
        }
    }
    return covMatrix;
}
Also used : ArrayList(java.util.ArrayList) NumberIsTooSmallException(org.apache.commons.math3.exception.NumberIsTooSmallException) StorelessCovariance(org.apache.commons.math3.stat.correlation.StorelessCovariance) DataRow(org.knime.core.data.DataRow) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 15 with Covariance

use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.

the class CovarianceOperator method getResultInternal.

/**
 * {@inheritDoc}
 */
@Override
protected DataCell getResultInternal() {
    if (m_cells.getNumElements() != add_cells.getNumElements()) {
        return DataType.getMissingCell();
    }
    Covariance cov = new Covariance();
    double value = cov.covariance(m_cells.getElements(), add_cells.getElements());
    return new DoubleCell(value);
}
Also used : Covariance(org.apache.commons.math3.stat.correlation.Covariance) DoubleCell(org.knime.core.data.def.DoubleCell)

Aggregations

RealMatrix (org.apache.commons.math3.linear.RealMatrix)12 ArrayList (java.util.ArrayList)5 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 IntStream (java.util.stream.IntStream)4 Nonnull (javax.annotation.Nonnull)4 Nullable (javax.annotation.Nullable)4 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)4 Array2DRowRealMatrix (org.apache.commons.math3.linear.Array2DRowRealMatrix)4 Covariance (org.apache.commons.math3.stat.correlation.Covariance)4 Logger (org.apache.logging.log4j.Logger)4 UserException (org.broadinstitute.hellbender.exceptions.UserException)4 Nd4jIOUtils (org.broadinstitute.hellbender.tools.coveragemodel.nd4jutils.Nd4jIOUtils)4 Utils (org.broadinstitute.hellbender.utils.Utils)4 ParamUtils (org.broadinstitute.hellbender.utils.param.ParamUtils)4 RegressionTrainingData (org.knime.base.node.mine.regression.RegressionTrainingData)4 INDArray (org.nd4j.linalg.api.ndarray.INDArray)4 Nd4j (org.nd4j.linalg.factory.Nd4j)4 NDArrayIndex (org.nd4j.linalg.indexing.NDArrayIndex)4 TooManyEvaluationsException (org.apache.commons.math3.exception.TooManyEvaluationsException)3