use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method makeSubSets.
private Map<FactorValue, ExpressionDataDoubleMatrix> makeSubSets(DifferentialExpressionAnalysisConfig config, ExpressionDataDoubleMatrix dmatrix, List<BioMaterial> samplesUsed, ExperimentalFactor subsetFactor) {
if (subsetFactor.getType().equals(FactorType.CONTINUOUS)) {
throw new IllegalArgumentException("You cannot subset on a continuous factor (has a Measurement)");
}
if (config.getFactorsToInclude().contains(subsetFactor)) {
throw new IllegalArgumentException("You cannot analyze a factor and use it for subsetting at the same time.");
}
Map<FactorValue, List<BioMaterial>> subSetSamples = new HashMap<>();
for (FactorValue fv : subsetFactor.getFactorValues()) {
assert fv.getMeasurement() == null;
subSetSamples.put(fv, new ArrayList<BioMaterial>());
}
for (BioMaterial sample : samplesUsed) {
boolean ok = false;
for (FactorValue fv : sample.getFactorValues()) {
if (fv.getExperimentalFactor().equals(subsetFactor)) {
subSetSamples.get(fv).add(sample);
ok = true;
break;
}
}
if (!ok) {
throw new IllegalArgumentException("Cannot subset on a factor unless each sample has a value for it. Missing value for: " + sample + " " + sample.getBioAssaysUsedIn());
}
}
Map<FactorValue, ExpressionDataDoubleMatrix> subMatrices = new HashMap<>();
for (FactorValue fv : subSetSamples.keySet()) {
List<BioMaterial> samplesInSubset = subSetSamples.get(fv);
if (samplesInSubset.isEmpty()) {
throw new IllegalArgumentException("The subset was empty for fv: " + fv);
}
assert samplesInSubset.size() < samplesUsed.size();
samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset, config.getFactorsToInclude());
ExpressionDataDoubleMatrix subMatrix = new ExpressionDataDoubleMatrix(samplesInSubset, dmatrix);
subMatrices.put(fv, subMatrix);
}
return subMatrices;
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class MeanVarianceServiceImpl method create.
@Override
public MeanVarianceRelation create(ExpressionExperiment ee, boolean forceRecompute) {
if (ee == null) {
log.warn("Experiment is null");
return null;
}
log.info("Starting mean-variance computation");
MeanVarianceRelation mvr = ee.getMeanVarianceRelation();
if (forceRecompute || mvr == null) {
log.info("Recomputing mean-variance");
ee = expressionExperimentService.thawLiter(ee);
mvr = ee.getMeanVarianceRelation();
ExpressionDataDoubleMatrix intensities = meanVarianceServiceHelper.getIntensities(ee);
if (intensities == null) {
throw new IllegalStateException("Could not locate intensity matrix for " + ee.getShortName());
}
Collection<QuantitationType> qtList = expressionExperimentService.getPreferredQuantitationType(ee);
QuantitationType qt;
if (qtList.size() == 0) {
log.error("Did not find any preferred quantitation type. Mean-variance relation was not computed.");
} else {
qt = qtList.iterator().next();
if (qtList.size() > 1) {
// Really this should be an error condition.
log.warn("Found more than one preferred quantitation type. Only the first preferred quantitation type (" + qt + ") will be used.");
}
try {
intensities = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform(qt, intensities);
} catch (UnknownLogScaleException e) {
log.warn("Problem log transforming data. Check that the appropriate log scale is used. Mean-variance will be computed as is.");
}
mvr = calculateMeanVariance(intensities, mvr);
meanVarianceServiceHelper.createMeanVariance(ee, mvr);
}
}
log.info("Mean-variance computation is complete");
return mvr;
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class PreprocessorServiceImpl method batchCorrect.
@Override
public void batchCorrect(ExpressionExperiment ee, boolean force) throws PreprocessingException {
String note = "ComBat batch correction";
String detail = null;
/*
* This leaves the raw data alone; it updates the processed data.
*/
this.checkArrayDesign(ee);
ee = expressionExperimentService.thawLite(ee);
this.checkCorrectable(ee, force);
/*
* If there are predicted outliers, but which we decide are okay, we just go ahead.
*/
if (!force) {
this.checkOutliers(ee);
} else {
note = "[Forced]" + note;
detail = "Batch correction skipped outlier check.";
PreprocessorServiceImpl.log.warn(detail);
}
try {
Collection<ProcessedExpressionDataVector> vecs = this.getProcessedExpressionDataVectors(ee);
// TODO log-transform if not already, update QT. See https://github.com/PavlidisLab/Gemma/issues/50
ExpressionDataDoubleMatrix correctedData = this.getCorrectedData(ee, vecs);
// Convert to vectors
processedExpressionDataVectorService.createProcessedDataVectors(ee, correctedData.toProcessedDataVectors());
AuditEventType eventType = BatchCorrectionEvent.Factory.newInstance();
String bConf = expressionExperimentService.getBatchConfound(ee);
if (bConf != null && force) {
String add = "Batch correction forced over a detected confound: " + bConf;
// noinspection ConstantConditions // That is simply false.
detail = (detail == null) ? add : detail + "\n" + add;
}
auditTrailService.addUpdateEvent(ee, eventType, note, detail);
this.removeInvalidatedData(ee);
this.processExceptForVectorCreate(ee);
} catch (Exception e) {
throw new PreprocessingException(e);
}
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class ExpressionDataSVD method equalize.
/**
* Implements the method described in the SPELL paper, alternative interpretation as related by Q. Morris. Set all
* components to have equal weight (set all singular values to 1)
*
* @return the reconstructed matrix; values that were missing before are re-masked.
*/
public ExpressionDataDoubleMatrix equalize() {
DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
for (int i = 0; i < copy.columns(); i++) {
copy.set(i, i, 1.0);
}
double[][] rawU = svd.getU().getRawMatrix();
double[][] rawS = copy.getRawMatrix();
double[][] rawV = svd.getV().getRawMatrix();
DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
Algebra a = new Algebra();
DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
// re-mask the missing values.
for (int i = 0; i < reconstructed.rows(); i++) {
for (int j = 0; j < reconstructed.columns(); j++) {
if (Double.isNaN(this.missingValueInfo.get(i, j))) {
reconstructed.set(i, j, Double.NaN);
}
}
}
return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class ExpressionDataSVD method winnow.
/**
* Implements method described in Skillicorn et al., "Strategies for winnowing microarray data" (also section 3.5.5
* of his book)
*
* @param thresholdQuantile Enter 0.5 for median. Value must be > 0 and < 1.
* @return a filtered matrix
*/
public ExpressionDataDoubleMatrix winnow(double thresholdQuantile) {
if (thresholdQuantile <= 0 || thresholdQuantile >= 1) {
throw new IllegalArgumentException("Threshold quantile should be a value between 0 and 1 exclusive");
}
class NormCmp implements Comparable<NormCmp> {
private Double norm;
private int rowIndex;
private NormCmp(int rowIndex, Double norm) {
super();
this.rowIndex = rowIndex;
this.norm = norm;
}
@Override
public int compareTo(NormCmp o) {
return this.norm.compareTo(o.norm);
}
public int getRowIndex() {
return rowIndex;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((norm == null) ? 0 : norm.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (this.getClass() != obj.getClass())
return false;
NormCmp other = (NormCmp) obj;
if (norm == null) {
return other.norm == null;
} else
return norm.equals(other.norm);
}
}
// order rows by distance from the origin. This is proportional to the 1-norm.
Algebra a = new Algebra();
List<NormCmp> os = new ArrayList<>();
for (int i = 0; i < this.expressionData.rows(); i++) {
double[] row = this.getU().getRow(i);
DoubleMatrix1D rom = new DenseDoubleMatrix1D(row);
double norm1 = a.norm1(rom);
os.add(new NormCmp(i, norm1));
}
Collections.sort(os);
int quantileLimit = (int) Math.floor(this.expressionData.rows() * thresholdQuantile);
quantileLimit = Math.max(0, quantileLimit);
List<CompositeSequence> keepers = new ArrayList<>();
for (int i = 0; i < quantileLimit; i++) {
NormCmp x = os.get(i);
CompositeSequence d = this.expressionData.getDesignElementForRow(x.getRowIndex());
keepers.add(d);
}
// remove genes which are near the origin in SVD space. FIXME: make sure the missing values are still masked.
return new ExpressionDataDoubleMatrix(this.expressionData, keepers);
}
Aggregations