use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.
the class ComBat method checkForProblems.
/**
* Check sdata for problems. If the design is not of full rank, we get NaN in standardized data.
*
* @param sdata sdata
* @throws ComBatException combat problem
*/
private void checkForProblems(DoubleMatrix2D sdata) throws ComBatException {
int numMissing = 0;
int total = 0;
for (int i = 0; i < sdata.rows(); i++) {
DoubleMatrix1D row = sdata.viewRow(i);
for (int j = 0; j < sdata.columns(); j++) {
if (Double.isNaN(row.getQuick(j))) {
numMissing++;
}
total++;
}
}
if (total == numMissing) {
/*
* Alternative that can help in some cases: back out and drop factors. There are definitely strategies for
* doing this (drop factors that have no major PC loadings, for example), but it might be bad to do this
* "automagically".
*/
throw new ComBatException("Could not complete batch correction: model must not be of full rank.");
}
}
use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.
the class ComBat method runNonParametric.
private void runNonParametric(final DoubleMatrix2D sdata, DoubleMatrix2D gammastar, DoubleMatrix2D deltastar) {
final ConcurrentHashMap<String, DoubleMatrix1D[]> results = new ConcurrentHashMap<>();
int numThreads = Math.min(batches.size(), Runtime.getRuntime().availableProcessors());
ComBat.log.info("Runing nonparametric estimation on " + numThreads + " threads");
Future<?>[] futures = new Future[numThreads];
ExecutorService service = Executors.newCachedThreadPool();
/*
* Divvy up batches over threads.
*/
int batchesPerThread = batches.size() / numThreads;
final String[] batchIds = batches.keySet().toArray(new String[] {});
for (int i = 0; i < numThreads; i++) {
final int firstBatch = i * batchesPerThread;
final int lastBatch = i == (numThreads - 1) ? batches.size() : firstBatch + batchesPerThread;
futures[i] = service.submit(new Runnable() {
@Override
public void run() {
for (int k = firstBatch; k < lastBatch; k++) {
String batchId = batchIds[k];
DoubleMatrix2D batchData = ComBat.this.getBatchData(sdata, batchId);
DoubleMatrix1D[] batchResults = ComBat.this.nonParametricFit(batchData, gammaHat.viewRow(k), deltaHat.viewRow(k));
results.put(batchId, batchResults);
}
}
});
}
service.shutdown();
boolean allDone = false;
do {
for (Future<?> f : futures) {
allDone = true;
if (!f.isDone() && !f.isCancelled()) {
allDone = false;
break;
}
}
} while (!allDone);
for (int i = 0; i < batchIds.length; i++) {
String batchId = batchIds[i];
DoubleMatrix1D[] batchResults = results.get(batchId);
for (int j = 0; j < batchResults[0].size(); j++) {
gammastar.set(i, j, batchResults[0].get(j));
}
for (int j = 0; j < batchResults[1].size(); j++) {
deltastar.set(i, j, batchResults[1].get(j));
}
}
}
use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixUtil method filterAndLog2Transform.
/**
* Log2 transform if necessary, do any required filtering prior to analysis. Count data is converted to log2CPM (but
* we store log2cpm as the processed data, so that is what would generally be used).
*
* @param quantitationType QT
* @param dmatrix matrix
* @return ee data double matrix
*/
public static ExpressionDataDoubleMatrix filterAndLog2Transform(QuantitationType quantitationType, ExpressionDataDoubleMatrix dmatrix) {
ScaleType scaleType = ExpressionDataDoubleMatrixUtil.findScale(quantitationType, dmatrix.getMatrix());
if (scaleType.equals(ScaleType.LOG2)) {
ExpressionDataDoubleMatrixUtil.log.info("Data is already on a log2 scale");
} else if (scaleType.equals(ScaleType.LN)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from ln to log2 **** ");
MatrixStats.convertToLog2(dmatrix.getMatrix(), Math.E);
} else if (scaleType.equals(ScaleType.LOG10)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from log10 to log2 **** ");
MatrixStats.convertToLog2(dmatrix.getMatrix(), 10);
} else if (scaleType.equals(ScaleType.LINEAR)) {
ExpressionDataDoubleMatrixUtil.log.info(" **** LOG TRANSFORMING **** ");
MatrixStats.logTransform(dmatrix.getMatrix());
} else if (scaleType.equals(ScaleType.COUNT)) {
/*
* Since we store log2cpm this shouldn't be reached any more. We don't do it in place.
*/
ExpressionDataDoubleMatrixUtil.log.info(" **** Converting from count to log2 counts per million **** ");
DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
DoubleMatrix<CompositeSequence, BioMaterial> log2cpm = MatrixStats.convertToLog2Cpm(dmatrix.getMatrix(), librarySize);
dmatrix = new ExpressionDataDoubleMatrix(dmatrix, log2cpm);
} else {
throw new UnknownLogScaleException("Can't figure out what scale the data are on");
}
/*
* We do this second because doing it first causes some kind of subtle problem ... (round off? I could not
* really track this down).
*
* Remove zero-variance rows, but also rows that have lots of equal values even if variance is non-zero. This
* happens when data is "clipped" (e.g., all values under 10 set to 10).
*/
int r = dmatrix.rows();
dmatrix = ExpressionExperimentFilter.zeroVarianceFilter(dmatrix);
if (dmatrix.rows() < r) {
ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to low variance");
}
r = dmatrix.rows();
if (dmatrix.columns() > ExpressionDataDoubleMatrixUtil.COLUMNS_LIMIT) {
dmatrix = ExpressionExperimentFilter.tooFewDistinctValues(dmatrix, ExpressionDataDoubleMatrixUtil.VALUES_LIMIT);
if (dmatrix.rows() < r) {
ExpressionDataDoubleMatrixUtil.log.info((r - dmatrix.rows()) + " rows removed due to too many identical values");
}
}
return dmatrix;
}
use of cern.colt.matrix.DoubleMatrix1D in project Gemma by PavlidisLab.
the class DataUpdater method log2cpmFromCounts.
/**
* For back filling log2cpm when only counts are available. This wouldn't be used routinely, because new experiments
* get log2cpm computed when loaded.
*
* @param ee ee
* @param qt qt
*/
public void log2cpmFromCounts(ExpressionExperiment ee, QuantitationType qt) {
ee = experimentService.thawLite(ee);
/*
* Get the count data; Make sure it is currently preferred (so we don't do this twice by accident)
* We need to do this from the Raw data, not the data that has been normalized etc.
*/
Collection<RawExpressionDataVector> counts = rawExpressionDataVectorService.find(qt);
ExpressionDataDoubleMatrix countMatrix = new ExpressionDataDoubleMatrix(counts);
try {
/*
* Get the count data quantitation type and make it non-preferred
*/
qt.setIsPreferred(false);
qtService.update(qt);
// so updated QT is attached.
ee = experimentService.thawLite(ee);
QuantitationType log2cpmQt = this.makelog2cpmQt();
DoubleMatrix1D librarySize = MatrixStats.colSums(countMatrix.getMatrix());
DoubleMatrix<CompositeSequence, BioMaterial> log2cpmMatrix = MatrixStats.convertToLog2Cpm(countMatrix.getMatrix(), librarySize);
ExpressionDataDoubleMatrix log2cpmEEMatrix = new ExpressionDataDoubleMatrix(ee, log2cpmQt, log2cpmMatrix);
assert log2cpmEEMatrix.getQuantitationTypes().iterator().next().getIsPreferred();
Collection<ArrayDesign> platforms = experimentService.getArrayDesignsUsed(ee);
if (platforms.size() > 1)
throw new IllegalArgumentException("Cannot apply to multiplatform data sets");
this.addData(ee, platforms.iterator().next(), log2cpmEEMatrix);
} catch (Exception e) {
DataUpdater.log.error(e, e);
// try to recover.
qt.setIsPreferred(true);
qtService.update(qt);
}
}
use of cern.colt.matrix.DoubleMatrix1D in project beast-mcmc by beast-dev.
the class ComplexSubstitutionModel method setupMatrix.
public void setupMatrix() {
if (!eigenInitialised) {
initialiseEigen();
storedEvalImag = new double[stateCount];
}
int i = 0;
storeIntoAmat();
makeValid(amat, stateCount);
// compute eigenvalues and eigenvectors
// EigenvalueDecomposition eigenDecomp = new EigenvalueDecomposition(new DenseDoubleMatrix2D(amat));
RobustEigenDecomposition eigenDecomp;
try {
eigenDecomp = new RobustEigenDecomposition(new DenseDoubleMatrix2D(amat), maxIterations);
} catch (ArithmeticException ae) {
System.err.println(ae.getMessage());
wellConditioned = false;
System.err.println("amat = \n" + new Matrix(amat));
return;
}
DoubleMatrix2D eigenV = eigenDecomp.getV();
DoubleMatrix1D eigenVReal = eigenDecomp.getRealEigenvalues();
DoubleMatrix1D eigenVImag = eigenDecomp.getImagEigenvalues();
DoubleMatrix2D eigenVInv;
if (checkConditioning) {
RobustSingularValueDecomposition svd;
try {
svd = new RobustSingularValueDecomposition(eigenV, maxIterations);
} catch (ArithmeticException ae) {
System.err.println(ae.getMessage());
wellConditioned = false;
return;
}
if (svd.cond() > maxConditionNumber) {
wellConditioned = false;
return;
}
}
try {
eigenVInv = alegbra.inverse(eigenV);
} catch (IllegalArgumentException e) {
wellConditioned = false;
return;
}
Ievc = eigenVInv.toArray();
Evec = eigenV.toArray();
Eval = eigenVReal.toArray();
EvalImag = eigenVImag.toArray();
// Check for valid decomposition
for (i = 0; i < stateCount; i++) {
if (Double.isNaN(Eval[i]) || Double.isNaN(EvalImag[i]) || Double.isInfinite(Eval[i]) || Double.isInfinite(EvalImag[i])) {
wellConditioned = false;
return;
} else if (Math.abs(Eval[i]) < 1e-10) {
Eval[i] = 0.0;
}
}
updateMatrix = false;
wellConditioned = true;
// compute normalization and rescale eigenvalues
computeStationaryDistribution();
if (doNormalization) {
double subst = 0.0;
for (i = 0; i < stateCount; i++) subst += -amat[i][i] * stationaryDistribution[i];
for (i = 0; i < stateCount; i++) {
Eval[i] /= subst;
EvalImag[i] /= subst;
}
}
}
Aggregations