use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.
the class RegressionStatisticsLearner method createCovarianceMatrix.
/**
* Creates the covariance matrix from the {@link RegressionResults}.
*
* @param result A {@link RegressionResults} object.
* @return The covariance {@link RealMatrix}.
* @since 3.4
*/
protected RealMatrix createCovarianceMatrix(final RegressionResults result) {
// The covariance matrix
int dim = result.getNumberOfParameters();
RealMatrix covMat = MatrixUtils.createRealMatrix(dim, dim);
for (int i = 0; i < dim; i++) {
for (int k = 0; k < dim; k++) {
covMat.setEntry(i, k, result.getCovarianceOfParameters(i, k));
}
}
return covMat;
}
use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.
the class Learner method perform.
/**
* @param data The data table.
* @param exec The execution context used for reporting progress.
* @return An object which holds the results.
* @throws CanceledExecutionException When method is cancelled
* @throws InvalidSettingsException When settings are inconsistent with the data
*/
@Override
public LinearRegressionContent perform(final BufferedDataTable data, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
exec.checkCanceled();
RegressionTrainingData trainingData = new RegressionTrainingData(data, m_outSpec, m_failOnMissing);
final int regressorCount = Math.max(1, trainingData.getRegressorCount());
SummaryStatistics[] stats = new SummaryStatistics[regressorCount];
UpdatingMultipleLinearRegression regr = initStatistics(regressorCount, stats);
processTable(exec, trainingData, stats, regr);
List<String> factorList = new ArrayList<String>();
List<String> covariateList = createCovariateListAndFillFactors(data, trainingData, factorList);
try {
RegressionResults result = regr.regress();
RealMatrix beta = MatrixUtils.createRowRealMatrix(result.getParameterEstimates());
// The covariance matrix
RealMatrix covMat = createCovarianceMatrix(result);
LinearRegressionContent content = new LinearRegressionContent(m_outSpec, (int) stats[0].getN(), factorList, covariateList, beta, m_includeConstant, m_offsetValue, covMat, result.getRSquared(), result.getAdjustedRSquared(), stats, null);
return content;
} catch (ModelSpecificationException e) {
int dim = (m_includeConstant ? 1 : 0) + trainingData.getRegressorCount() + (factorList.size() > 0 ? Math.max(1, data.getDataTableSpec().getColumnSpec(factorList.get(0)).getDomain().getValues().size() - 1) : 0);
RealMatrix beta = MatrixUtils.createRealMatrix(1, dim);
RealMatrix covMat = MatrixUtils.createRealMatrix(dim, dim);
// fillWithNaNs(beta);
fillWithNaNs(covMat);
return new LinearRegressionContent(m_outSpec, (int) stats[0].getN(), factorList, covariateList, beta, m_includeConstant, m_offsetValue, covMat, Double.NaN, Double.NaN, stats, e.getMessage());
}
}
use of org.apache.commons.math3.stat.correlation.Covariance in project lucene-solr by apache.
the class CovarianceEvaluator method evaluate.
public Number evaluate(Tuple tuple) throws IOException {
StreamEvaluator colEval1 = subEvaluators.get(0);
StreamEvaluator colEval2 = subEvaluators.get(1);
List<Number> numbers1 = (List<Number>) colEval1.evaluate(tuple);
List<Number> numbers2 = (List<Number>) colEval2.evaluate(tuple);
double[] column1 = new double[numbers1.size()];
double[] column2 = new double[numbers2.size()];
for (int i = 0; i < numbers1.size(); i++) {
column1[i] = numbers1.get(i).doubleValue();
}
for (int i = 0; i < numbers2.size(); i++) {
column2[i] = numbers2.get(i).doubleValue();
}
Covariance covariance = new Covariance();
return covariance.covariance(column1, column2);
}
use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.
the class CovarianceMatrixCalculator method calculateCovarianceMatrix.
/**
* Computes the covariance matrix and puts the result in the given (optional) data container and additionally
* returns a in memory representation. The data container is expected to have the data table spec returned at
* {@link #getResultSpec()}. The implementation traverses the data once.
*
* @param exec the execution container
* @param inTable input data
* @param tableSize the data table size
* @param resultDataContainer optional result data container
* @return the covariance matrix
* @throws CanceledExecutionException if the user canceled the execution
*/
public RealMatrix calculateCovarianceMatrix(final ExecutionMonitor exec, final DataTable inTable, final long tableSize, final DataContainer resultDataContainer) throws CanceledExecutionException {
checkArgument(m_targetSpec.equalStructure(inTable.getDataTableSpec()), "Target tables spec is different from the one given in the constructor!");
if (resultDataContainer != null) {
checkArgument(m_resultSpec.equalStructure(resultDataContainer.getTableSpec()), "Result tables spec is invalid!");
}
final ExecutionMonitor computingProgress = exec.createSubProgress(resultDataContainer != null ? 0.8 : 1);
List<StorelessCovariance> covariancesList = new ArrayList<>();
// create covariance pairs
for (int i = 0; i < m_indexes.length; i++) {
for (int j = i; j < m_indexes.length; j++) {
covariancesList.add(new StorelessCovariance(2));
}
}
// compute rest of co-variance matrix
int rowCount = 0;
double[] buffer = new double[2];
for (DataRow dataRow : inTable) {
for (int i = 0; i < m_indexes.length; i++) {
final int outerIndex = m_indexes[i];
final DataCell outerCell = dataRow.getCell(outerIndex);
if (outerCell.isMissing()) {
// skip missing values
continue;
}
final double outerDouble = ((DoubleValue) outerCell).getDoubleValue();
for (int j = i; j < m_indexes.length; j++) {
final int innerIndex = m_indexes[j];
final DataCell innerCell = dataRow.getCell(innerIndex);
if (innerCell.isMissing()) {
// skip missing values
continue;
}
final double innerDouble = ((DoubleValue) innerCell).getDoubleValue();
buffer[0] = outerDouble;
buffer[1] = innerDouble;
int covListIndex = index(m_indexes.length, i, j);
covariancesList.get(covListIndex).increment(buffer);
}
}
computingProgress.setProgress(rowCount++ / (double) tableSize, "Calculate covariance values, processing row: '" + dataRow.getKey() + "'");
computingProgress.checkCanceled();
}
// Copy the storeless covariances to a real matrix
RealMatrix covMatrix = new Array2DRowRealMatrix(m_indexes.length, m_indexes.length);
for (int i = 0; i < m_indexes.length; i++) {
for (int j = i; j < m_indexes.length; j++) {
int covListIndex = index(m_indexes.length, i, j);
double covValue;
try {
covValue = i == j ? covariancesList.get(covListIndex).getCovariance(1, 1) : covariancesList.get(covListIndex).getCovariance(0, 1);
} catch (NumberIsTooSmallException e) {
throw new IllegalArgumentException(String.format("There were not enough valid values to " + "compute covariance between columns: '%s' and '%s'.", inTable.getDataTableSpec().getColumnSpec(m_indexes[i]).getName(), inTable.getDataTableSpec().getColumnSpec(m_indexes[j]).getName()), e);
}
covMatrix.setEntry(i, j, covValue);
covMatrix.setEntry(j, i, covValue);
}
}
if (resultDataContainer != null) {
exec.setProgress("Writing matrix to data table");
final ExecutionMonitor writingProgress = exec.createSubProgress(0.2);
for (int i = 0; i < covMatrix.getRowDimension(); i++) {
resultDataContainer.addRowToTable(new DefaultRow(RowKey.toRowKeys(resultDataContainer.getTableSpec().getColumnSpec(i).getName())[0], covMatrix.getRow(i)));
exec.checkCanceled();
writingProgress.setProgress((double) i / covMatrix.getRowDimension(), "Writing row: " + resultDataContainer.getTableSpec().getColumnSpec(i).getName());
}
}
return covMatrix;
}
use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.
the class CovarianceOperator method getResultInternal.
/**
* {@inheritDoc}
*/
@Override
protected DataCell getResultInternal() {
if (m_cells.getNumElements() != add_cells.getNumElements()) {
return DataType.getMissingCell();
}
Covariance cov = new Covariance();
double value = cov.covariance(m_cells.getElements(), add_cells.getElements());
return new DoubleCell(value);
}
Aggregations