use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method run.
@Override
public DifferentialExpressionAnalysis run(ExpressionExperimentSubSet subset, DifferentialExpressionAnalysisConfig config) {
/*
* Start by setting it up like the full experiment.
*/
ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService.getProcessedExpressionDataMatrix(subset.getSourceExperiment());
ExperimentalFactor ef = config.getSubsetFactor();
Collection<BioMaterial> bmTmp = new HashSet<>();
for (BioAssay ba : subset.getBioAssays()) {
bmTmp.add(ba.getSampleUsed());
}
List<BioMaterial> samplesInSubset = new ArrayList<>(bmTmp);
FactorValue subsetFactorValue = null;
for (BioMaterial bm : samplesInSubset) {
Collection<FactorValue> fvs = bm.getFactorValues();
for (FactorValue fv : fvs) {
if (fv.getExperimentalFactor().equals(ef)) {
if (subsetFactorValue == null) {
subsetFactorValue = fv;
} else if (!subsetFactorValue.equals(fv)) {
throw new IllegalStateException("This subset has more than one factor value for the supposed subset factor: " + fv + " and " + subsetFactorValue);
}
}
}
}
samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset, config.getFactorsToInclude());
// slice.
ExpressionDataDoubleMatrix subsetMatrix = new ExpressionDataDoubleMatrix(samplesInSubset, dmatrix);
Collection<ExperimentalFactor> subsetFactors = this.fixFactorsForSubset(dmatrix, subset, config.getFactorsToInclude());
if (subsetFactors.isEmpty()) {
LinearModelAnalyzer.log.warn("Experimental design is not valid for subset: " + subsetFactorValue + "; skipping");
return null;
}
DifferentialExpressionAnalysisConfig subsetConfig = this.fixConfigForSubset(config.getFactorsToInclude(), config, subsetFactorValue);
DifferentialExpressionAnalysis analysis = this.doAnalysis(subset, subsetConfig, subsetMatrix, samplesInSubset, config.getFactorsToInclude(), subsetFactorValue);
if (analysis == null) {
throw new IllegalStateException("Subset could not be analyzed with config: " + config);
}
return analysis;
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method makeSubSets.
private Map<FactorValue, ExpressionDataDoubleMatrix> makeSubSets(DifferentialExpressionAnalysisConfig config, ExpressionDataDoubleMatrix dmatrix, List<BioMaterial> samplesUsed, ExperimentalFactor subsetFactor) {
if (subsetFactor.getType().equals(FactorType.CONTINUOUS)) {
throw new IllegalArgumentException("You cannot subset on a continuous factor (has a Measurement)");
}
if (config.getFactorsToInclude().contains(subsetFactor)) {
throw new IllegalArgumentException("You cannot analyze a factor and use it for subsetting at the same time.");
}
Map<FactorValue, List<BioMaterial>> subSetSamples = new HashMap<>();
for (FactorValue fv : subsetFactor.getFactorValues()) {
assert fv.getMeasurement() == null;
subSetSamples.put(fv, new ArrayList<BioMaterial>());
}
for (BioMaterial sample : samplesUsed) {
boolean ok = false;
for (FactorValue fv : sample.getFactorValues()) {
if (fv.getExperimentalFactor().equals(subsetFactor)) {
subSetSamples.get(fv).add(sample);
ok = true;
break;
}
}
if (!ok) {
throw new IllegalArgumentException("Cannot subset on a factor unless each sample has a value for it. Missing value for: " + sample + " " + sample.getBioAssaysUsedIn());
}
}
Map<FactorValue, ExpressionDataDoubleMatrix> subMatrices = new HashMap<>();
for (FactorValue fv : subSetSamples.keySet()) {
List<BioMaterial> samplesInSubset = subSetSamples.get(fv);
if (samplesInSubset.isEmpty()) {
throw new IllegalArgumentException("The subset was empty for fv: " + fv);
}
assert samplesInSubset.size() < samplesUsed.size();
samplesInSubset = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(samplesInSubset, config.getFactorsToInclude());
ExpressionDataDoubleMatrix subMatrix = new ExpressionDataDoubleMatrix(samplesInSubset, dmatrix);
subMatrices.put(fv, subMatrix);
}
return subMatrices;
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ExpressionDataSVD method equalize.
/**
* Implements the method described in the SPELL paper, alternative interpretation as related by Q. Morris. Set all
* components to have equal weight (set all singular values to 1)
*
* @return the reconstructed matrix; values that were missing before are re-masked.
*/
public ExpressionDataDoubleMatrix equalize() {
DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
for (int i = 0; i < copy.columns(); i++) {
copy.set(i, i, 1.0);
}
double[][] rawU = svd.getU().getRawMatrix();
double[][] rawS = copy.getRawMatrix();
double[][] rawV = svd.getV().getRawMatrix();
DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
Algebra a = new Algebra();
DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
// re-mask the missing values.
for (int i = 0; i < reconstructed.rows(); i++) {
for (int j = 0; j < reconstructed.columns(); j++) {
if (Double.isNaN(this.missingValueInfo.get(i, j))) {
reconstructed.set(i, j, Double.NaN);
}
}
}
return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorCreateHelperServiceImpl method reorderByDesign.
@Override
@Transactional
public void reorderByDesign(Long eeId) {
ExpressionExperiment ee = expressionExperimentDao.load(eeId);
if (ee.getExperimentalDesign().getExperimentalFactors().size() == 0) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have a populated experimental design, skipping");
return;
}
Collection<ProcessedExpressionDataVector> processedDataVectors = ee.getProcessedExpressionDataVectors();
if (processedDataVectors.size() == 0) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have processed data");
return;
}
Collection<BioAssayDimension> dims = this.eeService.getBioAssayDimensions(ee);
if (dims.size() > 1) {
this.checkAllBioAssayDimensionsMatch(dims);
}
BioAssayDimension bioassaydim = dims.iterator().next();
List<BioMaterial> start = new ArrayList<>();
for (BioAssay ba : bioassaydim.getBioAssays()) {
start.add(ba.getSampleUsed());
}
/*
* Get the ordering we want.
*/
List<BioMaterial> orderByExperimentalDesign = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(start, ee.getExperimentalDesign().getExperimentalFactors());
/*
* Map of biomaterials to the new order index.
*/
final Map<BioMaterial, Integer> ordering = new HashMap<>();
int i = 0;
for (BioMaterial bioMaterial : orderByExperimentalDesign) {
ordering.put(bioMaterial, i);
i++;
}
/*
* Map of the original order to new order of bioassays.
*/
Map<Integer, Integer> indexes = new HashMap<>();
Map<BioAssayDimension, BioAssayDimension> old2new = new HashMap<>();
for (BioAssayDimension bioAssayDimension : dims) {
Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
assert bioAssays != null;
/*
* Initialize the new bioassay list.
*/
List<BioAssay> resorted = new ArrayList<>(bioAssays.size());
for (int m = 0; m < bioAssays.size(); m++) {
resorted.add(null);
}
for (int oldIndex = 0; oldIndex < bioAssays.size(); oldIndex++) {
BioAssay bioAssay = ((List<BioAssay>) bioAssays).get(oldIndex);
BioMaterial sam1 = bioAssay.getSampleUsed();
if (ordering.containsKey(sam1)) {
Integer newIndex = ordering.get(sam1);
resorted.set(newIndex, bioAssay);
/*
* Should be the same for all dimensions....
*/
assert !indexes.containsKey(oldIndex) || indexes.get(oldIndex).equals(newIndex);
indexes.put(oldIndex, newIndex);
} else {
throw new IllegalStateException();
}
}
BioAssayDimension newBioAssayDimension = BioAssayDimension.Factory.newInstance();
newBioAssayDimension.setBioAssays(resorted);
newBioAssayDimension.setName("Processed data of ee " + ee.getShortName() + " ordered by design");
newBioAssayDimension.setDescription("Data was reordered based on the experimental design.");
newBioAssayDimension = bioAssayDimensionService.create(newBioAssayDimension);
old2new.put(bioAssayDimension, newBioAssayDimension);
}
ByteArrayConverter converter = new ByteArrayConverter();
for (ProcessedExpressionDataVector v : processedDataVectors) {
BioAssayDimension revisedBioAssayDimension = old2new.get(v.getBioAssayDimension());
assert revisedBioAssayDimension != null;
double[] data = converter.byteArrayToDoubles(v.getData());
/*
* Put the data in the order of the bioAssayDimension.
*/
Double[] resortedData = new Double[data.length];
for (int k = 0; k < data.length; k++) {
resortedData[k] = data[indexes.get(k)];
}
v.setData(converter.toBytes(resortedData));
v.setBioAssayDimension(revisedBioAssayDimension);
}
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Updating bioassay ordering of " + processedDataVectors.size() + " vectors");
this.auditTrailService.addUpdateEvent(ee, "Reordered the data vectors by experimental design");
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorCreateHelperServiceImpl method checkAllBioAssayDimensionsMatch.
/**
* Make sure we have only one ordering!!! If the sample matching is botched, there will be problems.
*/
private void checkAllBioAssayDimensionsMatch(Collection<BioAssayDimension> dims) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Data set has more than one bioassaydimension for its processed data vectors");
List<BioMaterial> ordering = new ArrayList<>();
int i = 0;
for (BioAssayDimension dim : dims) {
int j = 0;
for (BioAssay ba : dim.getBioAssays()) {
BioMaterial sample = ba.getSampleUsed();
if (i == 0) {
ordering.add(sample);
} else {
if (!ordering.get(j).equals(sample)) {
throw new IllegalStateException("Two dimensions didn't have the same BioMaterial ordering for the same data set.");
}
j++;
}
}
i++;
}
}
Aggregations