Search in sources :

Example 6 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorCreateHelperServiceImpl method reorderByDesign.

@Override
@Transactional
public void reorderByDesign(Long eeId) {
    ExpressionExperiment ee = expressionExperimentDao.load(eeId);
    if (ee.getExperimentalDesign().getExperimentalFactors().size() == 0) {
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have a populated experimental design, skipping");
        return;
    }
    Collection<ProcessedExpressionDataVector> processedDataVectors = ee.getProcessedExpressionDataVectors();
    if (processedDataVectors.size() == 0) {
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have processed data");
        return;
    }
    Collection<BioAssayDimension> dims = this.eeService.getBioAssayDimensions(ee);
    if (dims.size() > 1) {
        this.checkAllBioAssayDimensionsMatch(dims);
    }
    BioAssayDimension bioassaydim = dims.iterator().next();
    List<BioMaterial> start = new ArrayList<>();
    for (BioAssay ba : bioassaydim.getBioAssays()) {
        start.add(ba.getSampleUsed());
    }
    /*
         * Get the ordering we want.
         */
    List<BioMaterial> orderByExperimentalDesign = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(start, ee.getExperimentalDesign().getExperimentalFactors());
    /*
         * Map of biomaterials to the new order index.
         */
    final Map<BioMaterial, Integer> ordering = new HashMap<>();
    int i = 0;
    for (BioMaterial bioMaterial : orderByExperimentalDesign) {
        ordering.put(bioMaterial, i);
        i++;
    }
    /*
         * Map of the original order to new order of bioassays.
         */
    Map<Integer, Integer> indexes = new HashMap<>();
    Map<BioAssayDimension, BioAssayDimension> old2new = new HashMap<>();
    for (BioAssayDimension bioAssayDimension : dims) {
        Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
        assert bioAssays != null;
        /*
             * Initialize the new bioassay list.
             */
        List<BioAssay> resorted = new ArrayList<>(bioAssays.size());
        for (int m = 0; m < bioAssays.size(); m++) {
            resorted.add(null);
        }
        for (int oldIndex = 0; oldIndex < bioAssays.size(); oldIndex++) {
            BioAssay bioAssay = ((List<BioAssay>) bioAssays).get(oldIndex);
            BioMaterial sam1 = bioAssay.getSampleUsed();
            if (ordering.containsKey(sam1)) {
                Integer newIndex = ordering.get(sam1);
                resorted.set(newIndex, bioAssay);
                /*
                     * Should be the same for all dimensions....
                     */
                assert !indexes.containsKey(oldIndex) || indexes.get(oldIndex).equals(newIndex);
                indexes.put(oldIndex, newIndex);
            } else {
                throw new IllegalStateException();
            }
        }
        BioAssayDimension newBioAssayDimension = BioAssayDimension.Factory.newInstance();
        newBioAssayDimension.setBioAssays(resorted);
        newBioAssayDimension.setName("Processed data of ee " + ee.getShortName() + " ordered by design");
        newBioAssayDimension.setDescription("Data was reordered based on the experimental design.");
        newBioAssayDimension = bioAssayDimensionService.create(newBioAssayDimension);
        old2new.put(bioAssayDimension, newBioAssayDimension);
    }
    ByteArrayConverter converter = new ByteArrayConverter();
    for (ProcessedExpressionDataVector v : processedDataVectors) {
        BioAssayDimension revisedBioAssayDimension = old2new.get(v.getBioAssayDimension());
        assert revisedBioAssayDimension != null;
        double[] data = converter.byteArrayToDoubles(v.getData());
        /*
             * Put the data in the order of the bioAssayDimension.
             */
        Double[] resortedData = new Double[data.length];
        for (int k = 0; k < data.length; k++) {
            resortedData[k] = data[indexes.get(k)];
        }
        v.setData(converter.toBytes(resortedData));
        v.setBioAssayDimension(revisedBioAssayDimension);
    }
    ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Updating bioassay ordering of " + processedDataVectors.size() + " vectors");
    this.auditTrailService.addUpdateEvent(ee, "Reordered the data vectors by experimental design");
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DoubleArrayList(cern.colt.list.DoubleArrayList) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DoubleArrayList(cern.colt.list.DoubleArrayList) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Transactional(org.springframework.transaction.annotation.Transactional)

Example 7 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorCreateHelperServiceImpl method checkAllBioAssayDimensionsMatch.

/**
 * Make sure we have only one ordering!!! If the sample matching is botched, there will be problems.
 */
private void checkAllBioAssayDimensionsMatch(Collection<BioAssayDimension> dims) {
    ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Data set has more than one bioassaydimension for its processed data vectors");
    List<BioMaterial> ordering = new ArrayList<>();
    int i = 0;
    for (BioAssayDimension dim : dims) {
        int j = 0;
        for (BioAssay ba : dim.getBioAssays()) {
            BioMaterial sample = ba.getSampleUsed();
            if (i == 0) {
                ordering.add(sample);
            } else {
                if (!ordering.get(j).equals(sample)) {
                    throw new IllegalStateException("Two dimensions didn't have the same BioMaterial ordering for the same data set.");
                }
                j++;
            }
        }
        i++;
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DoubleArrayList(cern.colt.list.DoubleArrayList) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 8 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class SVDServiceHelperImpl method getTopLoadedVectors.

@Override
public Map<ProbeLoading, DoubleVectorValueObject> getTopLoadedVectors(ExpressionExperiment ee, int component, int count) {
    PrincipalComponentAnalysis pca = principalComponentAnalysisService.loadForExperiment(ee);
    Map<ProbeLoading, DoubleVectorValueObject> result = new HashMap<>();
    if (pca == null) {
        return result;
    }
    List<ProbeLoading> topLoadedProbes = principalComponentAnalysisService.getTopLoadedProbes(ee, component, count);
    if (topLoadedProbes == null) {
        SVDServiceHelperImpl.log.warn("No probes?");
        return result;
    }
    Map<Long, ProbeLoading> probes = new LinkedHashMap<>();
    Set<CompositeSequence> p = new HashSet<>();
    for (ProbeLoading probeLoading : topLoadedProbes) {
        CompositeSequence probe = probeLoading.getProbe();
        probes.put(probe.getId(), probeLoading);
        p.add(probe);
    }
    if (probes.isEmpty())
        return result;
    assert probes.size() <= count;
    Collection<ExpressionExperiment> ees = new HashSet<>();
    ees.add(ee);
    Collection<DoubleVectorValueObject> dvVos = processedExpressionDataVectorService.getProcessedDataArraysByProbe(ees, p);
    if (dvVos.isEmpty()) {
        SVDServiceHelperImpl.log.warn("No vectors came back from the call; check the Gene2CS table?");
        return result;
    }
    // note that this might have come from a cache.
    /*
         * This is actually expected, because we go through the genes.
         */
    BioAssayDimension bioAssayDimension = pca.getBioAssayDimension();
    assert bioAssayDimension != null;
    assert !bioAssayDimension.getBioAssays().isEmpty();
    for (DoubleVectorValueObject vct : dvVos) {
        ProbeLoading probeLoading = probes.get(vct.getDesignElement().getId());
        if (probeLoading == null) {
            /*
                 * This is okay, we will skip this probe. It was another probe for a gene that _was_ highly loaded.
                 */
            continue;
        }
        assert bioAssayDimension.getBioAssays().size() == vct.getData().length;
        vct.setRank(probeLoading.getLoadingRank().doubleValue());
        vct.setExpressionExperiment(new ExpressionExperimentValueObject(ee));
        result.put(probeLoading, vct);
    }
    if (result.isEmpty()) {
        SVDServiceHelperImpl.log.warn("No results, something went wrong; there were " + dvVos.size() + " vectors to start but they all got filtered out.");
    }
    return result;
}
Also used : ProbeLoading(ubic.gemma.model.analysis.expression.pca.ProbeLoading) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) PrincipalComponentAnalysis(ubic.gemma.model.analysis.expression.pca.PrincipalComponentAnalysis) ExpressionExperimentValueObject(ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)

Example 9 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class SVDServiceHelperImpl method svd.

@Override
public SVDValueObject svd(ExpressionExperiment ee) {
    assert ee != null;
    Collection<ProcessedExpressionDataVector> vectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    if (vectors.isEmpty()) {
        throw new IllegalArgumentException("Experiment must have processed data already to do SVD");
    }
    processedExpressionDataVectorService.thaw(vectors);
    ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(vectors);
    SVDServiceHelperImpl.log.info("Starting SVD");
    ExpressionDataSVD svd = new ExpressionDataSVD(mat);
    SVDServiceHelperImpl.log.info("SVD done, postprocessing and storing results.");
    /*
         * Save the results
         */
    DoubleMatrix<Integer, BioMaterial> v = svd.getV();
    BioAssayDimension b = mat.getBestBioAssayDimension();
    PrincipalComponentAnalysis pca = this.updatePca(ee, svd, v, b);
    return this.svdFactorAnalysis(pca);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) PrincipalComponentAnalysis(ubic.gemma.model.analysis.expression.pca.PrincipalComponentAnalysis) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)

Example 10 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class GeoDatasetServiceTest method testMatrixValue.

private void testMatrixValue(ExpressionExperiment exp, ExpressionDataMatrix<Double> matrix, String probeToTest, String sampleToTest, double expectedValue) {
    CompositeSequence soughtDesignElement = null;
    BioAssay soughtBioAssay = null;
    Collection<RawExpressionDataVector> vectors = exp.getRawExpressionDataVectors();
    for (DesignElementDataVector vector : vectors) {
        CompositeSequence de = vector.getDesignElement();
        if (de.getName().equals(probeToTest)) {
            soughtDesignElement = de;
        }
        BioAssayDimension bad = vector.getBioAssayDimension();
        for (BioAssay ba : bad.getBioAssays()) {
            if (ba.getAccession().getAccession().equals(sampleToTest)) {
                soughtBioAssay = ba;
            }
        }
    }
    if (soughtDesignElement == null || soughtBioAssay == null)
        fail("didn't find values for " + sampleToTest);
    Double actualValue = matrix.get(soughtDesignElement, soughtBioAssay);
    assertNotNull("No value for " + soughtBioAssay, actualValue);
    assertEquals(expectedValue, actualValue, 0.00001);
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)59 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)29 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)20 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)15 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)15 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)15 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)10 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)9 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)9 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 HashSet (java.util.HashSet)4 Test (org.junit.Test)4 Transactional (org.springframework.transaction.annotation.Transactional)4 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)4 StopWatch (org.apache.commons.lang3.time.StopWatch)3 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)3 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2