Search in sources :

Example 16 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionExperimentPlatformSwitchService method processVector.

/**
 * @param designElementMap   Mapping of sequences to probes for the platform that is being switch from. This is used
 *                           to identify new candidates.
 * @param usedDesignElements probes from the new design that have already been assigned to probes from the old
 *                           design. If things are done correctly (the old design was merged into the new) then there should be enough.
 *                           Map is of the new design probe to the old design probe it was used for (this is debugging information)
 * @param vector             vector
 * @param bad                BioAssayDimension to use, if necessary. If this is null or already the one used, it's igored.
 *                           Otherwise the vector data will be rewritten to match it.
 * @throws IllegalStateException if there is no (unused) design element matching the vector's biosequence
 */
private void processVector(Map<BioSequence, Collection<CompositeSequence>> designElementMap, Map<CompositeSequence, Collection<BioAssayDimension>> usedDesignElements, DesignElementDataVector vector, BioAssayDimension bad) {
    CompositeSequence oldDe = vector.getDesignElement();
    Collection<CompositeSequence> newElCandidates;
    BioSequence seq = oldDe.getBiologicalCharacteristic();
    if (seq == null) {
        newElCandidates = designElementMap.get(ExpressionExperimentPlatformSwitchService.NULL_BIOSEQUENCE);
    } else {
        newElCandidates = designElementMap.get(seq);
    }
    if (newElCandidates == null || newElCandidates.isEmpty()) {
        throw new IllegalStateException("There are no candidates probes for sequence: " + seq + "('null' should be okay)");
    }
    for (CompositeSequence newEl : newElCandidates) {
        if (!usedDesignElements.containsKey(newEl)) {
            vector.setDesignElement(newEl);
            usedDesignElements.put(newEl, new HashSet<BioAssayDimension>());
            usedDesignElements.get(newEl).add(vector.getBioAssayDimension());
            break;
        }
        if (!usedDesignElements.get(newEl).contains(vector.getBioAssayDimension())) {
            /*
                 * Then it's okay to use it.
                 */
            vector.setDesignElement(newEl);
            usedDesignElements.get(newEl).add(vector.getBioAssayDimension());
            break;
        }
    }
    if (bad != null && !vector.getBioAssayDimension().equals(bad)) {
        /*
             * 1. Check if they are already the same; then just switch it to the desired BAD
             * 2. If not, then the vector data has to be rewritten.
             */
        this.vectorReWrite(vector, bad);
    }
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 17 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionExperimentPlatformSwitchService method doMultiSample.

private BioAssayDimension doMultiSample(ExpressionExperiment ee, Collection<BioAssayDimension> unusedBADs, int maxSize) {
    BioAssayDimension maxBAD = null;
    for (BioAssay ba : ee.getBioAssays()) {
        Collection<BioAssayDimension> oldBioAssayDims = bioAssayService.findBioAssayDimensions(ba);
        for (BioAssayDimension bioAssayDim : oldBioAssayDims) {
            unusedBADs.add(bioAssayDim);
            int size = bioAssayDim.getBioAssays().size();
            if (size > maxSize) {
                maxSize = size;
                maxBAD = bioAssayDim;
            }
        }
    }
    // otherwise we shouldn't be here.
    assert unusedBADs.size() > 1;
    unusedBADs.remove(maxBAD);
    /*
         * Make sure all biomaterials in the study are included in the chosen bioassaydimension. If not, we'd have
         * to make a new BAD. I haven't implemented that case.
         */
    if (maxBAD != null) {
        Collection<BioMaterial> bmsInmaxBAD = new HashSet<>();
        for (BioAssay ba : maxBAD.getBioAssays()) {
            bmsInmaxBAD.add(ba.getSampleUsed());
        }
        for (BioAssay ba : ee.getBioAssays()) {
            if (!bmsInmaxBAD.contains(ba.getSampleUsed())) {
                ExpressionExperimentPlatformSwitchService.log.warn("This experiment looked like it had samples run on more than one platform, " + "but it also has no BioAssayDimension that is eligible to accomodate all samples (Example: " + ba.getSampleUsed() + ") The experiment will be switched to the merged platform, but no BioAssayDimension switch will be done.");
                maxBAD = null;
                break;
            }
        }
    }
    return maxBAD;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 18 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionExperimentPlatformSwitchService method switchExperimentToArrayDesign.

/**
 * If you know the arraydesigns are already in a merged state, you should use switchExperimentToMergedPlatform
 *
 * @param ee          ee
 * @param arrayDesign The array design to switch to. If some samples already use that array design, nothing will be
 *                    changed for them.
 */
public ExpressionExperiment switchExperimentToArrayDesign(ExpressionExperiment ee, ArrayDesign arrayDesign) {
    assert arrayDesign != null;
    // remove stuff that will be in the way.
    processedExpressionDataVectorService.removeProcessedDataVectors(ee);
    sampleCoexpressionMatrixService.delete(ee);
    for (ExpressionExperimentSubSet subset : expressionExperimentService.getSubSets(ee)) {
        subsetService.remove(subset);
    }
    // get relation between sequence and designelements.
    Map<BioSequence, Collection<CompositeSequence>> designElementMap = new HashMap<>();
    Collection<CompositeSequence> elsWithNoSeq = new HashSet<>();
    this.populateCSeq(arrayDesign, designElementMap, elsWithNoSeq);
    ee = expressionExperimentService.thaw(ee);
    ExpressionExperimentPlatformSwitchService.log.info(elsWithNoSeq.size() + " elements on the new platform have no associated sequence.");
    designElementMap.put(ExpressionExperimentPlatformSwitchService.NULL_BIOSEQUENCE, elsWithNoSeq);
    boolean multiPlatformPerSample = this.checkMultiPerSample(ee, arrayDesign);
    /*
         * For a multiplatform-per-sample case: (note that some samples might just be on one platform...)
         * 1. Pick a BAD that can be used for all DataVectors (it has all BioAssays in it).
         * 2. Switch vectors to use it - may require adding NaNs and reordering the vectors
         * 3. Delete the Bioassays that are using other BADs
         */
    /*
         * Now we have to get the BADs. Problem to watch out for: they might not be the same length, we need one that
         * includes all BioMaterials.
         */
    Collection<BioAssayDimension> unusedBADs = new HashSet<>();
    BioAssayDimension maxBAD = null;
    int maxSize = 0;
    if (multiPlatformPerSample) {
        maxBAD = this.doMultiSample(ee, unusedBADs, maxSize);
    }
    Collection<ArrayDesign> oldArrayDesigns = expressionExperimentService.getArrayDesignsUsed(ee);
    Map<CompositeSequence, Collection<BioAssayDimension>> usedDesignElements = new HashMap<>();
    for (ArrayDesign oldAd : oldArrayDesigns) {
        this.runOldAd(ee, arrayDesign, designElementMap, maxBAD, usedDesignElements, oldAd);
    }
    ee.setDescription(ee.getDescription() + " [Switched to use " + arrayDesign.getShortName() + " by Gemma]");
    helperService.persist(ee, arrayDesign);
    /*
         * This might need to be done inside the transaction we're using to make the switch.
         */
    if (maxBAD != null && !unusedBADs.isEmpty()) {
        this.checkUnused(unusedBADs, maxBAD);
    }
    return ee;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExpressionExperimentSubSet(ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension)

Example 19 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class SimpleExpressionDataLoaderServiceImpl method convertBioAssayDimension.

/**
 * @return BioAssayDimension
 */
private BioAssayDimension convertBioAssayDimension(ExpressionExperiment ee, ArrayDesign arrayDesign, Taxon taxon, DoubleMatrix<String, String> matrix) {
    BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
    bad.setName("For " + ee.getShortName());
    bad.setDescription("Generated from flat file");
    for (int i = 0; i < matrix.columns(); i++) {
        String columnName = matrix.getColName(i);
        BioMaterial bioMaterial = BioMaterial.Factory.newInstance();
        bioMaterial.setName(columnName);
        bioMaterial.setDescription("Generated by Gemma for: " + ee.getShortName());
        bioMaterial.setSourceTaxon(taxon);
        BioAssay assay = BioAssay.Factory.newInstance();
        assay.setName(columnName);
        assay.setArrayDesignUsed(arrayDesign);
        assay.setSampleUsed(bioMaterial);
        assay.setIsOutlier(false);
        assay.setSequencePairedReads(false);
        bad.getBioAssays().add(assay);
    }
    SimpleExpressionDataLoaderServiceImpl.log.info("Generated " + bad.getBioAssays().size() + " bioAssays");
    return bad;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 20 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionExperimentServiceImpl method getBioAssayDimensions.

@Override
@Transactional(readOnly = true)
public Collection<BioAssayDimension> getBioAssayDimensions(ExpressionExperiment expressionExperiment) {
    Collection<BioAssayDimension> bioAssayDimensions = this.expressionExperimentDao.getBioAssayDimensions(expressionExperiment);
    Collection<BioAssayDimension> thawedBioAssayDimensions = new HashSet<>();
    for (BioAssayDimension bioAssayDimension : bioAssayDimensions) {
        thawedBioAssayDimensions.add(this.bioAssayDimensionService.thaw(bioAssayDimension));
    }
    return thawedBioAssayDimensions;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) Transactional(org.springframework.transaction.annotation.Transactional)

Aggregations

BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)59 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)29 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)20 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)15 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)15 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)15 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)10 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)9 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)9 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 HashSet (java.util.HashSet)4 Test (org.junit.Test)4 Transactional (org.springframework.transaction.annotation.Transactional)4 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)4 StopWatch (org.apache.commons.lang3.time.StopWatch)3 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)3 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2