use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorCreateHelperServiceImpl method reorderByDesign.
@Override
@Transactional
public void reorderByDesign(Long eeId) {
ExpressionExperiment ee = expressionExperimentDao.load(eeId);
if (ee.getExperimentalDesign().getExperimentalFactors().size() == 0) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have a populated experimental design, skipping");
return;
}
Collection<ProcessedExpressionDataVector> processedDataVectors = ee.getProcessedExpressionDataVectors();
if (processedDataVectors.size() == 0) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have processed data");
return;
}
Collection<BioAssayDimension> dims = this.eeService.getBioAssayDimensions(ee);
if (dims.size() > 1) {
this.checkAllBioAssayDimensionsMatch(dims);
}
BioAssayDimension bioassaydim = dims.iterator().next();
List<BioMaterial> start = new ArrayList<>();
for (BioAssay ba : bioassaydim.getBioAssays()) {
start.add(ba.getSampleUsed());
}
/*
* Get the ordering we want.
*/
List<BioMaterial> orderByExperimentalDesign = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(start, ee.getExperimentalDesign().getExperimentalFactors());
/*
* Map of biomaterials to the new order index.
*/
final Map<BioMaterial, Integer> ordering = new HashMap<>();
int i = 0;
for (BioMaterial bioMaterial : orderByExperimentalDesign) {
ordering.put(bioMaterial, i);
i++;
}
/*
* Map of the original order to new order of bioassays.
*/
Map<Integer, Integer> indexes = new HashMap<>();
Map<BioAssayDimension, BioAssayDimension> old2new = new HashMap<>();
for (BioAssayDimension bioAssayDimension : dims) {
Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
assert bioAssays != null;
/*
* Initialize the new bioassay list.
*/
List<BioAssay> resorted = new ArrayList<>(bioAssays.size());
for (int m = 0; m < bioAssays.size(); m++) {
resorted.add(null);
}
for (int oldIndex = 0; oldIndex < bioAssays.size(); oldIndex++) {
BioAssay bioAssay = ((List<BioAssay>) bioAssays).get(oldIndex);
BioMaterial sam1 = bioAssay.getSampleUsed();
if (ordering.containsKey(sam1)) {
Integer newIndex = ordering.get(sam1);
resorted.set(newIndex, bioAssay);
/*
* Should be the same for all dimensions....
*/
assert !indexes.containsKey(oldIndex) || indexes.get(oldIndex).equals(newIndex);
indexes.put(oldIndex, newIndex);
} else {
throw new IllegalStateException();
}
}
BioAssayDimension newBioAssayDimension = BioAssayDimension.Factory.newInstance();
newBioAssayDimension.setBioAssays(resorted);
newBioAssayDimension.setName("Processed data of ee " + ee.getShortName() + " ordered by design");
newBioAssayDimension.setDescription("Data was reordered based on the experimental design.");
newBioAssayDimension = bioAssayDimensionService.create(newBioAssayDimension);
old2new.put(bioAssayDimension, newBioAssayDimension);
}
ByteArrayConverter converter = new ByteArrayConverter();
for (ProcessedExpressionDataVector v : processedDataVectors) {
BioAssayDimension revisedBioAssayDimension = old2new.get(v.getBioAssayDimension());
assert revisedBioAssayDimension != null;
double[] data = converter.byteArrayToDoubles(v.getData());
/*
* Put the data in the order of the bioAssayDimension.
*/
Double[] resortedData = new Double[data.length];
for (int k = 0; k < data.length; k++) {
resortedData[k] = data[indexes.get(k)];
}
v.setData(converter.toBytes(resortedData));
v.setBioAssayDimension(revisedBioAssayDimension);
}
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Updating bioassay ordering of " + processedDataVectors.size() + " vectors");
this.auditTrailService.addUpdateEvent(ee, "Reordered the data vectors by experimental design");
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorCreateHelperServiceImpl method checkAllBioAssayDimensionsMatch.
/**
* Make sure we have only one ordering!!! If the sample matching is botched, there will be problems.
*/
private void checkAllBioAssayDimensionsMatch(Collection<BioAssayDimension> dims) {
ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Data set has more than one bioassaydimension for its processed data vectors");
List<BioMaterial> ordering = new ArrayList<>();
int i = 0;
for (BioAssayDimension dim : dims) {
int j = 0;
for (BioAssay ba : dim.getBioAssays()) {
BioMaterial sample = ba.getSampleUsed();
if (i == 0) {
ordering.add(sample);
} else {
if (!ordering.get(j).equals(sample)) {
throw new IllegalStateException("Two dimensions didn't have the same BioMaterial ordering for the same data set.");
}
j++;
}
}
i++;
}
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class SVDServiceHelperImpl method getTopLoadedVectors.
@Override
public Map<ProbeLoading, DoubleVectorValueObject> getTopLoadedVectors(ExpressionExperiment ee, int component, int count) {
PrincipalComponentAnalysis pca = principalComponentAnalysisService.loadForExperiment(ee);
Map<ProbeLoading, DoubleVectorValueObject> result = new HashMap<>();
if (pca == null) {
return result;
}
List<ProbeLoading> topLoadedProbes = principalComponentAnalysisService.getTopLoadedProbes(ee, component, count);
if (topLoadedProbes == null) {
SVDServiceHelperImpl.log.warn("No probes?");
return result;
}
Map<Long, ProbeLoading> probes = new LinkedHashMap<>();
Set<CompositeSequence> p = new HashSet<>();
for (ProbeLoading probeLoading : topLoadedProbes) {
CompositeSequence probe = probeLoading.getProbe();
probes.put(probe.getId(), probeLoading);
p.add(probe);
}
if (probes.isEmpty())
return result;
assert probes.size() <= count;
Collection<ExpressionExperiment> ees = new HashSet<>();
ees.add(ee);
Collection<DoubleVectorValueObject> dvVos = processedExpressionDataVectorService.getProcessedDataArraysByProbe(ees, p);
if (dvVos.isEmpty()) {
SVDServiceHelperImpl.log.warn("No vectors came back from the call; check the Gene2CS table?");
return result;
}
// note that this might have come from a cache.
/*
* This is actually expected, because we go through the genes.
*/
BioAssayDimension bioAssayDimension = pca.getBioAssayDimension();
assert bioAssayDimension != null;
assert !bioAssayDimension.getBioAssays().isEmpty();
for (DoubleVectorValueObject vct : dvVos) {
ProbeLoading probeLoading = probes.get(vct.getDesignElement().getId());
if (probeLoading == null) {
/*
* This is okay, we will skip this probe. It was another probe for a gene that _was_ highly loaded.
*/
continue;
}
assert bioAssayDimension.getBioAssays().size() == vct.getData().length;
vct.setRank(probeLoading.getLoadingRank().doubleValue());
vct.setExpressionExperiment(new ExpressionExperimentValueObject(ee));
result.put(probeLoading, vct);
}
if (result.isEmpty()) {
SVDServiceHelperImpl.log.warn("No results, something went wrong; there were " + dvVos.size() + " vectors to start but they all got filtered out.");
}
return result;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class SVDServiceHelperImpl method svd.
@Override
public SVDValueObject svd(ExpressionExperiment ee) {
assert ee != null;
Collection<ProcessedExpressionDataVector> vectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
if (vectors.isEmpty()) {
throw new IllegalArgumentException("Experiment must have processed data already to do SVD");
}
processedExpressionDataVectorService.thaw(vectors);
ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(vectors);
SVDServiceHelperImpl.log.info("Starting SVD");
ExpressionDataSVD svd = new ExpressionDataSVD(mat);
SVDServiceHelperImpl.log.info("SVD done, postprocessing and storing results.");
/*
* Save the results
*/
DoubleMatrix<Integer, BioMaterial> v = svd.getV();
BioAssayDimension b = mat.getBestBioAssayDimension();
PrincipalComponentAnalysis pca = this.updatePca(ee, svd, v, b);
return this.svdFactorAnalysis(pca);
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class GeoDatasetServiceTest method testMatrixValue.
private void testMatrixValue(ExpressionExperiment exp, ExpressionDataMatrix<Double> matrix, String probeToTest, String sampleToTest, double expectedValue) {
CompositeSequence soughtDesignElement = null;
BioAssay soughtBioAssay = null;
Collection<RawExpressionDataVector> vectors = exp.getRawExpressionDataVectors();
for (DesignElementDataVector vector : vectors) {
CompositeSequence de = vector.getDesignElement();
if (de.getName().equals(probeToTest)) {
soughtDesignElement = de;
}
BioAssayDimension bad = vector.getBioAssayDimension();
for (BioAssay ba : bad.getBioAssays()) {
if (ba.getAccession().getAccession().equals(sampleToTest)) {
soughtBioAssay = ba;
}
}
}
if (soughtDesignElement == null || soughtBioAssay == null)
fail("didn't find values for " + sampleToTest);
Double actualValue = matrix.get(soughtDesignElement, soughtBioAssay);
assertNotNull("No value for " + soughtBioAssay, actualValue);
assertEquals(expectedValue, actualValue, 0.00001);
}
Aggregations