use of Jama.Matrix in project gatk by broadinstitute.
the class VariantRecalibrator method makeCovariancesTable.
private GATKReportTable makeCovariancesTable(final String tableName, final String tableDescription, final List<String> annotationList, final GaussianMixtureModel model, final String formatString) {
GATKReportTable modelCovariances = new GATKReportTable(tableName, tableDescription, annotationList.size() + 2, //+2 is for Gaussian and Annotation columns
GATKReportTable.Sorting.DO_NOT_SORT);
modelCovariances.addColumn("Gaussian", "");
modelCovariances.addColumn("Annotation", "");
for (final String annotationName : annotationList) {
modelCovariances.addColumn(annotationName, formatString);
}
final List<MultivariateGaussian> modelGaussians = model.getModelGaussians();
for (int i = 0; i < modelGaussians.size(); i++) {
final MultivariateGaussian gaussian = modelGaussians.get(i);
final Matrix covMat = gaussian.sigma;
if (covMat.getRowDimension() != annotationList.size() || covMat.getColumnDimension() != annotationList.size())
throw new IllegalStateException("Gaussian covariance matrix does not have the same size as the list of annotations");
for (int j = 0; j < annotationList.size(); j++) {
modelCovariances.set(j + i * annotationList.size(), "Gaussian", i);
modelCovariances.set(j + i * annotationList.size(), "Annotation", annotationList.get(j));
for (int k = 0; k < annotationList.size(); k++) {
modelCovariances.set(j + i * annotationList.size(), annotationList.get(k), covMat.get(j, k));
}
}
}
return modelCovariances;
}
use of Jama.Matrix in project knime-core by knime.
the class PCANodeModel method execute.
/**
* Performs the PCA.
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
// remove all non-numeric columns from the input date
// final DataTable filteredTable =
// filterNonNumericalColumns(inData[DATA_INPORT]);
final BufferedDataTable dataTable = (BufferedDataTable) inData[DATA_INPORT];
if (dataTable.size() == 0) {
throw new IllegalArgumentException("Input table is empty!");
}
if (dataTable.size() == 1) {
throw new IllegalArgumentException("Input table has only one row!");
}
final double[] meanVector = getMeanVector(dataTable, m_inputColumnIndices, false, exec.createSubExecutionContext(0.2));
final double[][] m = new double[m_inputColumnIndices.length][m_inputColumnIndices.length];
final int missingValues = getCovarianceMatrix(exec.createSubExecutionContext(0.2), dataTable, m_inputColumnIndices, meanVector, m);
final Matrix covarianceMatrix = new Matrix(m);
if (missingValues > 0) {
if (m_failOnMissingValues.getBooleanValue()) {
throw new IllegalArgumentException("missing, infinite or impossible values in table");
}
setWarningMessage(missingValues + " rows ignored because of missing" + ", infinite or impossible values");
}
final ExecutionContext evdContext = exec.createSubExecutionContext(0.2);
evdContext.setMessage("computing spectral decomposition");
final EigenvalueDecomposition eig = covarianceMatrix.eig();
exec.checkCanceled();
evdContext.setProgress(0.8);
final double[] evs = EigenValue.extractEVVector(eig);
m_dimSelection.setEigenValues(evs);
final int dimensions = m_dimSelection.getNeededDimensions();
// don't remember these in case input changes
m_dimSelection.setEigenValues(null);
// adjust to selected numerical columns
if (dimensions > m_inputColumnIndices.length || dimensions < 1) {
throw new IllegalArgumentException("invalid number of dimensions to reduce to: " + dimensions);
}
exec.checkCanceled();
evdContext.setProgress(0.9);
final Matrix eigenvectors = EigenValue.getSortedEigenVectors(eig.getV().getArray(), evs, dimensions);
exec.checkCanceled();
evdContext.setProgress(1);
exec.checkCanceled();
final DataColumnSpec[] specs = createAddTableSpec((DataTableSpec) inData[DATA_INPORT].getSpec(), dimensions);
final CellFactory fac = new CellFactory() {
@Override
public DataCell[] getCells(final DataRow row) {
return convertInputRow(eigenvectors, row, meanVector, m_inputColumnIndices, dimensions, false);
}
@Override
public DataColumnSpec[] getColumnSpecs() {
return specs;
}
@Override
public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor texec) {
texec.setProgress(curRowNr / (double) rowCount, "processing " + curRowNr + " of " + rowCount);
}
};
final ColumnRearranger cr = new ColumnRearranger((DataTableSpec) inData[0].getSpec());
cr.append(fac);
if (m_removeOriginalCols.getBooleanValue()) {
cr.remove(m_inputColumnIndices);
}
final BufferedDataTable result = exec.createColumnRearrangeTable((BufferedDataTable) inData[0], cr, exec.createSubProgress(0.4));
final PortObject[] out = new PortObject[1];
out[DATA_OUTPORT] = result;
// m_inputColumnNames);
return out;
}
use of Jama.Matrix in project knime-core by knime.
the class PCANodeModel method convertInputRow.
/**
* reduce a single input row to the principal components.
*
* @param eigenvectors
* transposed matrix of eigenvectors (eigenvectors in rows,
* number of eigenvectors corresponds to dimensions to be
* projected to)
* @param row
* the row to convert
* @param means
* mean values of the columns
* @param inputColumnIndices
* indices of the input columns
* @param resultDimensions
* number of dimensions to project to
* @param failOnMissing
* throw exception if missing values are encountered
* @return array of data cells to be added to the row
*/
protected static DataCell[] convertInputRow(final Matrix eigenvectors, final DataRow row, final double[] means, final int[] inputColumnIndices, final int resultDimensions, final boolean failOnMissing) {
// get row of input values
boolean missingValues = false;
for (int i = 0; i < inputColumnIndices.length; i++) {
if (row.getCell(inputColumnIndices[i]).isMissing()) {
missingValues = true;
continue;
}
}
if (missingValues && failOnMissing) {
throw new IllegalArgumentException("table contains missing values");
}
// put each cell of a pca row into the row to append
final DataCell[] cells = new DataCell[resultDimensions];
if (missingValues) {
for (int i = 0; i < resultDimensions; i++) {
cells[i] = DataType.getMissingCell();
}
} else {
final double[][] rowVec = new double[1][inputColumnIndices.length];
for (int i = 0; i < rowVec[0].length; i++) {
rowVec[0][i] = ((DoubleValue) row.getCell(inputColumnIndices[i])).getDoubleValue() - means[i];
}
final double[][] newRow = new Matrix(rowVec).times(eigenvectors).getArray();
for (int i = 0; i < resultDimensions; i++) {
cells[i] = new DoubleCell(newRow[0][i]);
}
}
return cells;
}
use of Jama.Matrix in project knime-core by knime.
the class Learner method perform.
/**
* @param data The data table.
* @param exec The execution context used for reporting progress.
* @return An object which holds the results.
* @throws CanceledExecutionException when method is cancelled
* @throws InvalidSettingsException When settings are inconsistent with the data
*/
public LogisticRegressionContent perform(final BufferedDataTable data, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
exec.checkCanceled();
int iter = 0;
boolean converged = false;
final RegressionTrainingData trainingData = new RegressionTrainingData(data, m_outSpec, true, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories);
int targetIndex = data.getDataTableSpec().findColumnIndex(m_outSpec.getTargetCols().get(0).getName());
final int tcC = trainingData.getDomainValues().get(targetIndex).size();
final int rC = trainingData.getRegressorCount();
final RealMatrix beta = new Array2DRowRealMatrix(1, (tcC - 1) * (rC + 1));
Double loglike = 0.0;
Double loglikeOld = 0.0;
exec.setMessage("Iterative optimization. Processing iteration 1.");
// main loop
while (iter < m_maxIter && !converged) {
RealMatrix betaOld = beta.copy();
loglikeOld = loglike;
// Do heavy work in a separate thread which allows to interrupt it
// note the queue may block if no more threads are available (e.g. thread count = 1)
// as soon as we stall in 'get' this thread reduces the number of running thread
Future<Double> future = ThreadPool.currentPool().enqueue(new Callable<Double>() {
@Override
public Double call() throws Exception {
final ExecutionMonitor progMon = exec.createSubProgress(1.0 / m_maxIter);
irlsRls(trainingData, beta, rC, tcC, progMon);
progMon.setProgress(1.0);
return likelihood(trainingData.iterator(), beta, rC, tcC, exec);
}
});
try {
loglike = future.get();
} catch (InterruptedException e) {
future.cancel(true);
exec.checkCanceled();
throw new RuntimeException(e);
} catch (ExecutionException e) {
if (e.getCause() instanceof RuntimeException) {
throw (RuntimeException) e.getCause();
} else {
throw new RuntimeException(e.getCause());
}
}
if (Double.isInfinite(loglike) || Double.isNaN(loglike)) {
throw new RuntimeException(FAILING_MSG);
}
exec.checkCanceled();
// test for decreasing likelihood
while ((Double.isInfinite(loglike) || Double.isNaN(loglike) || loglike < loglikeOld) && iter > 0) {
converged = true;
for (int k = 0; k < beta.getRowDimension(); k++) {
if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
converged = false;
break;
}
}
if (converged) {
break;
}
// half the step size of beta
beta.setSubMatrix((beta.add(betaOld)).scalarMultiply(0.5).getData(), 0, 0);
exec.checkCanceled();
loglike = likelihood(trainingData.iterator(), beta, rC, tcC, exec);
exec.checkCanceled();
}
// test for convergence
converged = true;
for (int k = 0; k < beta.getRowDimension(); k++) {
if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
converged = false;
break;
}
}
iter++;
LOGGER.debug("#Iterations: " + iter);
LOGGER.debug("Log Likelihood: " + loglike);
StringBuilder betaBuilder = new StringBuilder();
for (int i = 0; i < beta.getRowDimension() - 1; i++) {
betaBuilder.append(Double.toString(beta.getEntry(i, 0)));
betaBuilder.append(", ");
}
if (beta.getRowDimension() > 0) {
betaBuilder.append(Double.toString(beta.getEntry(beta.getRowDimension() - 1, 0)));
}
LOGGER.debug("beta: " + betaBuilder.toString());
exec.checkCanceled();
exec.setMessage("Iterative optimization. #Iterations: " + iter + " | Log-likelihood: " + DoubleFormat.formatDouble(loglike) + ". Processing iteration " + (iter + 1) + ".");
}
// The covariance matrix
RealMatrix covMat = new QRDecomposition(A).getSolver().getInverse().scalarMultiply(-1);
List<String> factorList = new ArrayList<String>();
List<String> covariateList = new ArrayList<String>();
Map<String, List<DataCell>> factorDomainValues = new HashMap<String, List<DataCell>>();
for (int i : trainingData.getActiveCols()) {
if (trainingData.getIsNominal().get(i)) {
String factor = data.getDataTableSpec().getColumnSpec(i).getName();
factorList.add(factor);
List<DataCell> values = trainingData.getDomainValues().get(i);
factorDomainValues.put(factor, values);
} else {
covariateList.add(data.getDataTableSpec().getColumnSpec(i).getName());
}
}
Matrix betaJama = new Matrix(beta.getData());
Matrix covMatJama = new Matrix(covMat.getData());
// create content
LogisticRegressionContent content = new LogisticRegressionContent(m_outSpec, factorList, covariateList, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories, betaJama, loglike, covMatJama, iter);
return content;
}
use of Jama.Matrix in project knime-core by knime.
the class LogisticRegressionContent method toMatrix.
private static Matrix toMatrix(final double[] array, final int colCount) {
int length = array.length;
int m = length / colCount;
int n = colCount;
assert length == m * n;
Matrix matrix = new Matrix(m, n);
int c = 0;
for (int i = 0; i < m; i++) {
for (int k = 0; k < n; k++) {
matrix.set(i, k, array[c]);
c++;
}
}
return matrix;
}
Aggregations