use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.
the class IrlsLearner method learn.
/**
* {@inheritDoc}
*/
@Override
public LogRegLearnerResult learn(final TrainingData<ClassificationTrainingRow> trainingData, final ExecutionMonitor exec) throws CanceledExecutionException, InvalidSettingsException {
exec.checkCanceled();
int iter = 0;
boolean converged = false;
final int tcC = trainingData.getTargetDimension() + 1;
final int rC = trainingData.getFeatureCount() - 1;
final RealMatrix beta = MatrixUtils.createRealMatrix(1, (tcC - 1) * (rC + 1));
Double loglike = 0.0;
Double loglikeOld = 0.0;
exec.setMessage("Iterative optimization. Processing iteration 1.");
// main loop
while (iter < m_maxIter && !converged) {
RealMatrix betaOld = beta.copy();
loglikeOld = loglike;
// Do heavy work in a separate thread which allows to interrupt it
// note the queue may block if no more threads are available (e.g. thread count = 1)
// as soon as we stall in 'get' this thread reduces the number of running thread
Future<Double> future = ThreadPool.currentPool().enqueue(new Callable<Double>() {
@Override
public Double call() throws Exception {
final ExecutionMonitor progMon = exec.createSubProgress(1.0 / m_maxIter);
irlsRls(trainingData, beta, rC, tcC, progMon);
progMon.setProgress(1.0);
return likelihood(trainingData.iterator(), beta, rC, tcC, exec);
}
});
try {
loglike = future.get();
} catch (InterruptedException e) {
future.cancel(true);
exec.checkCanceled();
throw new RuntimeException(e);
} catch (ExecutionException e) {
if (e.getCause() instanceof RuntimeException) {
throw (RuntimeException) e.getCause();
} else {
throw new RuntimeException(e.getCause());
}
}
if (Double.isInfinite(loglike) || Double.isNaN(loglike)) {
throw new RuntimeException(FAILING_MSG);
}
exec.checkCanceled();
// test for decreasing likelihood
while ((Double.isInfinite(loglike) || Double.isNaN(loglike) || loglike < loglikeOld) && iter > 0) {
converged = true;
for (int k = 0; k < beta.getColumnDimension(); k++) {
if (abs(beta.getEntry(0, k) - betaOld.getEntry(0, k)) > m_eps * abs(betaOld.getEntry(0, k))) {
converged = false;
break;
}
}
if (converged) {
break;
}
// half the step size of beta
beta.setSubMatrix((beta.add(betaOld)).scalarMultiply(0.5).getData(), 0, 0);
exec.checkCanceled();
loglike = likelihood(trainingData.iterator(), beta, rC, tcC, exec);
exec.checkCanceled();
}
// test for convergence
converged = true;
for (int k = 0; k < beta.getColumnDimension(); k++) {
if (abs(beta.getEntry(0, k) - betaOld.getEntry(0, k)) > m_eps * abs(betaOld.getEntry(0, k))) {
converged = false;
break;
}
}
iter++;
LOGGER.debug("#Iterations: " + iter);
LOGGER.debug("Log Likelihood: " + loglike);
StringBuilder betaBuilder = new StringBuilder();
for (int i = 0; i < beta.getColumnDimension() - 1; i++) {
betaBuilder.append(Double.toString(beta.getEntry(0, i)));
betaBuilder.append(", ");
}
if (beta.getColumnDimension() > 0) {
betaBuilder.append(Double.toString(beta.getEntry(0, beta.getColumnDimension() - 1)));
}
LOGGER.debug("beta: " + betaBuilder.toString());
exec.checkCanceled();
exec.setMessage("Iterative optimization. #Iterations: " + iter + " | Log-likelihood: " + DoubleFormat.formatDouble(loglike) + ". Processing iteration " + (iter + 1) + ".");
}
StringBuilder warnBuilder = new StringBuilder();
if (iter >= m_maxIter) {
warnBuilder.append("The algorithm did not reach convergence after the specified number of epochs. " + "Setting the epoch limit higher might result in a better model.");
}
// The covariance matrix
RealMatrix covMat = null;
if (m_calcCovMatrix) {
try {
covMat = new QRDecomposition(A).getSolver().getInverse().scalarMultiply(-1);
} catch (SingularMatrixException sme) {
if (warnBuilder.length() > 0) {
warnBuilder.append("\n");
}
warnBuilder.append("The covariance matrix could not be calculated because the" + " observed fisher information matrix was singular.");
}
}
RealMatrix betaMat = MatrixUtils.createRealMatrix(tcC - 1, rC + 1);
for (int i = 0; i < beta.getColumnDimension(); i++) {
int r = i / (rC + 1);
int c = i % (rC + 1);
betaMat.setEntry(r, c, beta.getEntry(0, i));
}
m_warning = warnBuilder.length() > 0 ? warnBuilder.toString() : null;
return new LogRegLearnerResult(betaMat, covMat, iter, loglike);
}
use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.
the class CovarianceMatrixCalculatorTest method computeCovarianceOfRandomData.
/**
* Computes a set of random double
*
* @throws InvalidSettingsException
* @throws CanceledExecutionException
*/
@Test
public void computeCovarianceOfRandomData() throws InvalidSettingsException, CanceledExecutionException {
long currentTimeMillis = System.currentTimeMillis();
System.out.println("Mahalanobis test random seed: " + currentTimeMillis);
final Random random = new Random(currentTimeMillis);
double[][] data = new double[TEST_TABLE_SIZE][];
BufferedDataContainer inTableCont = generateData(random, data, SPEC_4);
inTableCont.close();
BufferedDataTable inTable = inTableCont.getTable();
// test the covariance matrix computation
CovarianceMatrixCalculator covMatrixCalculator = new CovarianceMatrixCalculator(SPEC_4, SPEC_4.getColumnNames());
BufferedDataContainer covDataContainer = m_exec.createDataContainer(covMatrixCalculator.getResultSpec());
RealMatrix covMatrixUnderTest = covMatrixCalculator.computeCovarianceMatrix(m_exec, inTable, covDataContainer);
covDataContainer.close();
Covariance covariance = new Covariance(data);
RealMatrix referenceCovarianceMatrix = covariance.getCovarianceMatrix();
BufferedDataTable covTableUnderTest = covDataContainer.getTable();
assertCovarianceMatrixEquality(covMatrixUnderTest, referenceCovarianceMatrix, covTableUnderTest, SPEC_4, true);
}
use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.
the class CovarianceMatrixCalculatorTest method computeCovarianceOfRandomDataWithMissingValues.
/**
* Tests the covariance computation on data with missing values
*
* @throws InvalidSettingsException
* @throws CanceledExecutionException
*/
@Test
public void computeCovarianceOfRandomDataWithMissingValues() throws InvalidSettingsException, CanceledExecutionException {
long currentTimeMillis = System.currentTimeMillis();
System.out.println("Mahalanobis test random seed: " + currentTimeMillis);
final Random random = new Random(47);
double[][] data = new double[10][];
BufferedDataContainer inTableCont = generateData(random, data, SPEC_2);
// add two rows with missing values, at the end both should be ignored
DataCell[] row = new DataCell[2];
row[0] = new DoubleCell(random.nextDouble());
row[1] = DataType.getMissingCell();
inTableCont.addRowToTable(new DefaultRow(new RowKey("Missing!1"), row));
row[1] = new DoubleCell(random.nextDouble());
row[0] = DataType.getMissingCell();
inTableCont.addRowToTable(new DefaultRow(new RowKey("Missing!2"), row));
inTableCont.close();
BufferedDataTable inTable = inTableCont.getTable();
// As the missing row should be ignored the test the covariance matrix computation should be the same
CovarianceMatrixCalculator covMatrixCalculator = new CovarianceMatrixCalculator(SPEC_2, SPEC_2.getColumnNames());
BufferedDataContainer covDataContainer = m_exec.createDataContainer(covMatrixCalculator.getResultSpec());
RealMatrix covMatrixUnderTest = covMatrixCalculator.computeCovarianceMatrix(m_exec, inTable, covDataContainer);
covDataContainer.close();
Covariance covariance = new Covariance(data);
RealMatrix referenceCovarianceMatrix = covariance.getCovarianceMatrix();
BufferedDataTable covTableUnderTest = covDataContainer.getTable();
// The diagonal is the variance which also changes considering missing values...
// but we check only the part of the covariance matrix at the top right triangle.
assertCovarianceMatrixEquality(covMatrixUnderTest, referenceCovarianceMatrix, covTableUnderTest, SPEC_2, false);
}
use of org.apache.commons.math3.stat.correlation.Covariance in project knime-core by knime.
the class Learner method perform.
/**
* @param data The data table.
* @param exec The execution context used for reporting progress.
* @return An object which holds the results.
* @throws CanceledExecutionException when method is cancelled
* @throws InvalidSettingsException When settings are inconsistent with the data
*/
public LogisticRegressionContent perform(final BufferedDataTable data, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
exec.checkCanceled();
int iter = 0;
boolean converged = false;
final RegressionTrainingData trainingData = new RegressionTrainingData(data, m_outSpec, true, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories);
int targetIndex = data.getDataTableSpec().findColumnIndex(m_outSpec.getTargetCols().get(0).getName());
final int tcC = trainingData.getDomainValues().get(targetIndex).size();
final int rC = trainingData.getRegressorCount();
final RealMatrix beta = new Array2DRowRealMatrix(1, (tcC - 1) * (rC + 1));
Double loglike = 0.0;
Double loglikeOld = 0.0;
exec.setMessage("Iterative optimization. Processing iteration 1.");
// main loop
while (iter < m_maxIter && !converged) {
RealMatrix betaOld = beta.copy();
loglikeOld = loglike;
// Do heavy work in a separate thread which allows to interrupt it
// note the queue may block if no more threads are available (e.g. thread count = 1)
// as soon as we stall in 'get' this thread reduces the number of running thread
Future<Double> future = ThreadPool.currentPool().enqueue(new Callable<Double>() {
@Override
public Double call() throws Exception {
final ExecutionMonitor progMon = exec.createSubProgress(1.0 / m_maxIter);
irlsRls(trainingData, beta, rC, tcC, progMon);
progMon.setProgress(1.0);
return likelihood(trainingData.iterator(), beta, rC, tcC, exec);
}
});
try {
loglike = future.get();
} catch (InterruptedException e) {
future.cancel(true);
exec.checkCanceled();
throw new RuntimeException(e);
} catch (ExecutionException e) {
if (e.getCause() instanceof RuntimeException) {
throw (RuntimeException) e.getCause();
} else {
throw new RuntimeException(e.getCause());
}
}
if (Double.isInfinite(loglike) || Double.isNaN(loglike)) {
throw new RuntimeException(FAILING_MSG);
}
exec.checkCanceled();
// test for decreasing likelihood
while ((Double.isInfinite(loglike) || Double.isNaN(loglike) || loglike < loglikeOld) && iter > 0) {
converged = true;
for (int k = 0; k < beta.getRowDimension(); k++) {
if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
converged = false;
break;
}
}
if (converged) {
break;
}
// half the step size of beta
beta.setSubMatrix((beta.add(betaOld)).scalarMultiply(0.5).getData(), 0, 0);
exec.checkCanceled();
loglike = likelihood(trainingData.iterator(), beta, rC, tcC, exec);
exec.checkCanceled();
}
// test for convergence
converged = true;
for (int k = 0; k < beta.getRowDimension(); k++) {
if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
converged = false;
break;
}
}
iter++;
LOGGER.debug("#Iterations: " + iter);
LOGGER.debug("Log Likelihood: " + loglike);
StringBuilder betaBuilder = new StringBuilder();
for (int i = 0; i < beta.getRowDimension() - 1; i++) {
betaBuilder.append(Double.toString(beta.getEntry(i, 0)));
betaBuilder.append(", ");
}
if (beta.getRowDimension() > 0) {
betaBuilder.append(Double.toString(beta.getEntry(beta.getRowDimension() - 1, 0)));
}
LOGGER.debug("beta: " + betaBuilder.toString());
exec.checkCanceled();
exec.setMessage("Iterative optimization. #Iterations: " + iter + " | Log-likelihood: " + DoubleFormat.formatDouble(loglike) + ". Processing iteration " + (iter + 1) + ".");
}
// The covariance matrix
RealMatrix covMat = new QRDecomposition(A).getSolver().getInverse().scalarMultiply(-1);
List<String> factorList = new ArrayList<String>();
List<String> covariateList = new ArrayList<String>();
Map<String, List<DataCell>> factorDomainValues = new HashMap<String, List<DataCell>>();
for (int i : trainingData.getActiveCols()) {
if (trainingData.getIsNominal().get(i)) {
String factor = data.getDataTableSpec().getColumnSpec(i).getName();
factorList.add(factor);
List<DataCell> values = trainingData.getDomainValues().get(i);
factorDomainValues.put(factor, values);
} else {
covariateList.add(data.getDataTableSpec().getColumnSpec(i).getName());
}
}
Matrix betaJama = new Matrix(beta.getData());
Matrix covMatJama = new Matrix(covMat.getData());
// create content
LogisticRegressionContent content = new LogisticRegressionContent(m_outSpec, factorList, covariateList, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories, betaJama, loglike, covMatJama, iter);
return content;
}
Aggregations