use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class DEDVController method getDEDVForDiffExVisualization.
/**
* AJAX exposed method - for ProbeLevelDiffExGrid, VisualizationDifferentialWindow,
* DifferentialExpressionAnalysesSummaryTree
*
* @param eeIds FIXME accommodate ExpressionExperimentSubSets. Currently we pass in the "source experiment" so we
* don't get the slice.
* @param geneIds (could be just one)
* @param threshold for 'significance'
* @param factorMap Collection of DiffExpressionSelectedFactorCommand showing which factors to use.
*/
public VisualizationValueObject[] getDEDVForDiffExVisualization(Collection<Long> eeIds, Collection<Long> geneIds, Double threshold, Collection<DiffExpressionSelectedFactorCommand> factorMap) {
if (eeIds.isEmpty() || geneIds.isEmpty())
return null;
StopWatch watch = new StopWatch();
watch.start();
Collection<? extends BioAssaySet> ees = expressionExperimentService.load(eeIds);
if (ees == null || ees.isEmpty())
return null;
Collection<Gene> genes = geneService.load(geneIds);
if (genes == null || genes.isEmpty())
return null;
Collection<DoubleVectorValueObject> dedvs = processedExpressionDataVectorService.getProcessedDataArrays(ees, geneIds);
watch.stop();
Long time = watch.getTime();
log.info("Retrieved " + dedvs.size() + " DEDVs for " + eeIds.size() + " EEs and " + geneIds.size() + " genes in " + time + " ms.");
watch = new StopWatch();
watch.start();
Map<Long, LinkedHashMap<BioAssayValueObject, LinkedHashMap<ExperimentalFactor, Double>>> layouts;
layouts = experimentalDesignVisualizationService.sortVectorDataByDesign(dedvs);
time = watch.getTime();
if (time > 100) {
log.info("Ran sortVectorDataByDesign on " + dedvs.size() + " DEDVs for 1 EE" + " in " + time + " ms (times <100ms not reported).");
}
// layouts = experimentalDesignVisualizationService.sortLayoutSamplesByFactor( layouts ); // required? yes, see
// GSE11859
time = watch.getTime();
if (time > 100) {
log.info("Ran sortLayoutSamplesByFactor on " + layouts.size() + " layouts" + " in " + time + " ms (times <100ms not reported).");
}
watch = new StopWatch();
watch.start();
Map<Long, Collection<DifferentialExpressionValueObject>> validatedProbes = getProbeDiffExValidation(genes, threshold, factorMap);
watch.stop();
time = watch.getTime();
log.info("Retrieved " + validatedProbes.size() + " valid probes in " + time + " ms.");
return makeDiffVisCollection(dedvs, new ArrayList<>(geneIds), validatedProbes, layouts);
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class DEDVController method format4File.
private String format4File(Collection<DoubleVectorValueObject> vectors) {
StringBuilder converted = new StringBuilder();
converted.append("# Generated by Gemma\n# ").append(new Date()).append("\n");
converted.append(ExpressionDataFileService.DISCLAIMER + "#\n");
boolean didHeader = false;
Map<Long, GeneValueObject> gmap = getGeneValueObjectsUsed(vectors);
for (DoubleVectorValueObject vec : vectors) {
if (!didHeader) {
converted.append(makeHeader(vec));
didHeader = true;
}
List<String> geneSymbols = new ArrayList<>();
List<String> geneNames = new ArrayList<>();
for (Long g : vec.getGenes()) {
GeneValueObject gene = gmap.get(g);
assert gene != null;
geneSymbols.add(gene.getOfficialSymbol());
geneNames.add(gene.getOfficialName());
}
converted.append(StringUtils.join(geneSymbols, "|")).append("\t").append(StringUtils.join(geneNames, "|")).append("\t");
converted.append(vec.getDesignElement().getName()).append("\t");
if (vec.getData() != null || vec.getData().length != 0) {
for (double data : vec.getData()) {
converted.append(String.format("%.3f", data)).append("\t");
}
// remove the trailing tab // FIXME just joind
converted.deleteCharAt(converted.length() - 1);
}
converted.append("\n");
}
return converted.toString();
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class DEDVController method getProbeLinkValidation.
/**
* Identify which probes were 'responsible' for the coexpression links.
* FIXME change this to actually compute the correlations.
*
* @return map of EEID -> collection ProbeIDs which underlie the stored coexpression links.
*/
private Map<Long, Collection<Long>> getProbeLinkValidation(Collection<ExpressionExperiment> ees, Gene queryGene, Gene coexpressedGene, Collection<DoubleVectorValueObject> dedvs) {
StopWatch watch = new StopWatch();
watch.start();
Map<Long, Collection<Long>> coexpressedEE2ProbeIds = new HashMap<>();
Map<Long, Collection<Long>> queryEE2ProbeIds = new HashMap<>();
/*
* Get the probes for the vectors, organize by ee.
*/
for (DoubleVectorValueObject dedv : dedvs) {
ExpressionExperimentValueObject ee = dedv.getExpressionExperiment();
if (dedv.getGenes().contains(queryGene.getId())) {
if (!queryEE2ProbeIds.containsKey(ee.getId())) {
queryEE2ProbeIds.put(ee.getId(), new HashSet<Long>());
}
queryEE2ProbeIds.get(ee.getId()).add(dedv.getDesignElement().getId());
} else if (dedv.getGenes().contains(coexpressedGene.getId())) {
if (!coexpressedEE2ProbeIds.containsKey(ee.getId())) {
coexpressedEE2ProbeIds.put(ee.getId(), new HashSet<Long>());
}
coexpressedEE2ProbeIds.get(ee.getId()).add(dedv.getDesignElement().getId());
} else {
log.error("Dedv doesn't belong to coexpressed or query gene. QueryGene= " + queryGene + "CoexpressedGene= " + coexpressedGene + "DEDV " + dedv.getId() + " has genes: " + dedv.getGenes());
}
}
Map<Long, Collection<Long>> validatedProbes = new HashMap<>();
for (ExpressionExperiment ee : ees) {
Collection<Long> queryProbeIds = queryEE2ProbeIds.get(ee.getId());
Collection<Long> coexpressedProbeIds = coexpressedEE2ProbeIds.get(ee.getId());
if (queryProbeIds == null || queryProbeIds.isEmpty()) {
log.warn("Unexpectedly no probes for " + queryGene + " in " + ee);
continue;
}
if (coexpressedProbeIds == null || coexpressedProbeIds.isEmpty()) {
log.warn("Unexpectedly no probes for " + coexpressedGene + " in " + ee);
}
/*
* Note: this does a probe-level query FIXME if we don't store data at probe-level we can't do this.
*/
// Collection<Long> probesInLinks = this.geneCoexpressionService.getCoexpressedProbes( queryProbeIds,
// coexpressedProbeIds, ee, queryGene.getTaxon().getCommonName() );
// if ( probesInLinks.isEmpty() ) {
// log.warn( "Unexpectedly no probes for link between " + queryGene + " -and- " + coexpressedGene + " in "
// + ee );
// }
//
// validatedProbes.put( ee.getId(), probesInLinks );
// FIXME FIXME
}
watch.stop();
Long time = watch.getTime();
if (time > 1000) {
log.info("Validation of probes for " + ees.size() + " experiments in " + time + "ms.");
}
return validatedProbes;
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class ProcessedExpressionDataCreateServiceTest method testComputeDevRankForExpressionExperimentMultiArrayWithGaps.
/**
* Three platforms, one sample was not run on GPL81. It's 'Norm-1a', but the name we use for the sample is random.
*/
@SuppressWarnings("unchecked")
@Test
public void testComputeDevRankForExpressionExperimentMultiArrayWithGaps() throws Exception {
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse482short")));
Collection<ExpressionExperiment> results = (Collection<ExpressionExperiment>) geoService.fetchAndLoad("GSE482", false, true, false);
this.ee = results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
this.ee = ((Collection<ExpressionExperiment>) e.getData()).iterator().next();
}
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
Collection<ProcessedExpressionDataVector> preferredVectors = this.processedExpressionDataVectorService.getProcessedDataVectors(ee);
ee = eeService.load(ee.getId());
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.thaw(preferredVectors);
ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(preferredVectors);
assertEquals(10, mat.columns());
boolean found = false;
for (int i = 0; i < mat.rows(); i++) {
Double[] row = mat.getRow(i);
// debugging
if (i == 0) {
for (int j = 0; j < row.length; j++) {
BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
System.err.println(ba.getName());
}
}
System.err.print(mat.getRowElement(i).getDesignElement().getName() + "\t");
for (double d : row) {
System.err.print(String.format("%4.2f\t", d));
}
System.err.print("\n");
CompositeSequence el = mat.getDesignElementForRow(i);
for (int j = 0; j < row.length; j++) {
BioAssay ba = mat.getBioAssaysForColumn(j).iterator().next();
if (ba.getName().matches("PGA-MurLungHyper-Norm-1a[ABC]v2-s2") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
assertEquals(Double.NaN, row[j], 0.0001);
found = true;
} else {
assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
}
}
}
assertTrue(found);
/*
* Now do this through the processedExpressionDataVectorService
*/
Collection<DoubleVectorValueObject> da = this.processedExpressionDataVectorService.getProcessedDataArrays(ee);
assertEquals(30, da.size());
found = false;
boolean first = true;
for (DoubleVectorValueObject v : da) {
CompositeSequenceValueObject el = v.getDesignElement();
double[] row = v.getData();
// debugging
if (first) {
for (int j = 0; j < row.length; j++) {
BioAssayValueObject ba = v.getBioAssays().get(j);
System.err.println(ba.getName());
}
first = false;
}
System.err.print(el.getName() + "\t");
for (double d : row) {
System.err.print(String.format("%4.2f\t", d));
}
System.err.print("\n");
assertEquals(10, row.length);
for (int j = 0; j < row.length; j++) {
assertNotNull(v.getBioAssays());
BioAssayValueObject ba = v.getBioAssays().get(j);
if (ba.getName().startsWith("Missing bioassay for biomaterial") && (el.getName().equals("100001_at") || el.getName().equals("100002_at") || el.getName().equals("100003_at") || el.getName().equals("100004_at") || el.getName().equals("100005_at") || el.getName().equals("100006_at") || el.getName().equals("100007_at") || el.getName().equals("100009_r_at") || el.getName().equals("100010_at") || el.getName().equals("100011_at"))) {
assertEquals(Double.NaN, row[j], 0.0001);
found = true;
} else {
assertTrue("Got unexpected NA value for " + ba.getName() + " for " + el.getName(), !Double.isNaN(row[j]));
}
}
}
assertTrue(found);
}
use of ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject in project Gemma by PavlidisLab.
the class VectorMergingServiceTest method test.
@Test
public final void test() throws Exception {
/*
* Need a persistent experiment that uses multiple array designs. Then merge the designs, switch the vectors,
* and merge the vectors. GSE3443
*/
/*
* The experiment uses the following GPLs
*
* GPL2868, GPL2933, GPL2934, GPL2935, GPL2936, GPL2937, GPL2938
*
* Example of a sequence appearing on more than one platform: N57553
*/
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse3443merge")));
Collection<?> results = geoService.fetchAndLoad("GSE3443", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
ee = this.eeService.thawLite(ee);
Collection<ArrayDesign> aas = eeService.getArrayDesignsUsed(ee);
assertEquals(7, aas.size());
/*
* Check number of sequences across all platforms. This is how many elements we need on the new platform, plus
* extras for duplicated sequences (e.g. elements that don't have a sequence...)
*/
Collection<ArrayDesign> taas = new HashSet<>();
Set<BioSequence> oldbs = new HashSet<>();
for (ArrayDesign arrayDesign : aas) {
arrayDesign = arrayDesignService.thaw(arrayDesign);
taas.add(arrayDesign);
for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
log.info(cs + " " + cs.getBiologicalCharacteristic());
oldbs.add(cs.getBiologicalCharacteristic());
}
}
assertEquals(63, oldbs.size());
/*
* Check total size of elements across all 7 platforms.
*/
int totalElements = 0;
for (ArrayDesign arrayDesign : taas) {
totalElements += arrayDesign.getCompositeSequences().size();
}
assertEquals(140, totalElements);
ArrayDesign firstaa = taas.iterator().next();
aas.remove(firstaa);
assertEquals(null, firstaa.getMergedInto());
mergedAA = arrayDesignMergeService.merge(firstaa, taas, "testMerge" + RandomStringUtils.randomAlphabetic(5), "merged" + RandomStringUtils.randomAlphabetic(5), false);
assertEquals(72, mergedAA.getCompositeSequences().size());
Set<BioSequence> seenBs = new HashSet<>();
for (CompositeSequence cs : mergedAA.getCompositeSequences()) {
seenBs.add(cs.getBiologicalCharacteristic());
}
assertEquals(63, seenBs.size());
// just to make this explicit. The new array design has to contain all the old sequences.
assertEquals(oldbs.size(), seenBs.size());
ee = eeService.thaw(ee);
assertEquals(1828, ee.getRawExpressionDataVectors().size());
ee = eePlatformSwitchService.switchExperimentToArrayDesign(ee, mergedAA);
ee = eeService.thaw(ee);
// check we actually got switched over.
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(mergedAA, ba.getArrayDesignUsed());
}
for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
assertEquals(mergedAA, v.getDesignElement().getArrayDesign());
}
assertEquals(15, ee.getQuantitationTypes().size());
assertEquals(1828, ee.getRawExpressionDataVectors().size());
ee = vectorMergingService.mergeVectors(ee);
// check we got the right processed data
Collection<ProcessedExpressionDataVector> pvs = processedExpressionDataVectorService.getProcessedDataVectors(ee);
assertEquals(72, pvs.size());
ee = eeService.thaw(ee);
Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee, 50);
assertEquals(50, processedDataArrays.size());
}
Aggregations