Search in sources :

Example 56 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class ExpressionExperimentDaoImpl method removeBioAssays.

private void removeBioAssays(Session session, Map<BioAssay, BioMaterial> copyOfRelations, Collection<BioMaterial> bioMaterialsToDelete, Collection<BioAssay> bioAssays) {
    for (BioAssay ba : bioAssays) {
        // relations to files cascade, so we only have to worry about biomaterials, which aren't cascaded from
        // anywhere. BioAssay -> BioMaterial is many-to-one, but bioassaySet (experiment) owns the bioAssay.
        BioMaterial biomaterial = ba.getSampleUsed();
        if (biomaterial == null)
            // shouldn't...
            continue;
        bioMaterialsToDelete.add(biomaterial);
        copyOfRelations.put(ba, biomaterial);
        session.buildLockRequest(LockOptions.NONE).lock(biomaterial);
        Hibernate.initialize(biomaterial);
        // this can easily end up with an unattached object.
        Hibernate.initialize(biomaterial.getBioAssaysUsedIn());
        biomaterial.getFactorValues().clear();
        biomaterial.getBioAssaysUsedIn().clear();
        ba.setSampleUsed(null);
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 57 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class PersistentDummyObjectHelper method getTestPersistentExpressionExperiment.

/**
 * Convenience method to provide an ExpressionExperiment that can be used to fill non-nullable associations in test
 * objects. This implementation does NOT fill in associations of the created object except for the creation of
 * persistent BioMaterials and BioAssays so that database taxon lookups for this experiment will work.
 *
 * @param taxon the experiment will have this taxon
 * @return EE
 */
public ExpressionExperiment getTestPersistentExpressionExperiment(Taxon taxon) {
    BioAssay ba;
    BioMaterial bm;
    ArrayDesign ad;
    bm = this.getTestPersistentBioMaterial(taxon);
    ad = this.getTestPersistentArrayDesign(4, true, true);
    ba = this.getTestPersistentBioAssay(ad, bm);
    Set<BioAssay> bas1 = new HashSet<>();
    bas1.add(ba);
    ExpressionExperiment ee = ExpressionExperiment.Factory.newInstance();
    ee.setName(RandomStringUtils.randomNumeric(PersistentDummyObjectHelper.RANDOM_STRING_LENGTH) + "_testee");
    ee.setShortName(RandomStringUtils.randomNumeric(PersistentDummyObjectHelper.RANDOM_STRING_LENGTH) + "_testee");
    ee.setBioAssays(bas1);
    Collection<FactorValue> allFactorValues = new HashSet<>();
    ExperimentalDesign ed = this.getExperimentalDesign(allFactorValues);
    ee.setExperimentalDesign(ed);
    ee.setOwner(this.getTestPersistentContact());
    log.debug("expression experiment => design element data vectors");
    Collection<RawExpressionDataVector> vectors = new HashSet<>();
    Collection<QuantitationType> quantitationTypes = this.addQuantitationTypes(new HashSet<QuantitationType>());
    assert quantitationTypes.size() > 0;
    ee.setQuantitationTypes(quantitationTypes);
    ee.setRawExpressionDataVectors(vectors);
    ArrayDesignsForExperimentCache c = persisterHelper.prepare(ee);
    return persisterHelper.persist(ee, c);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ArrayDesignsForExperimentCache(ubic.gemma.persistence.util.ArrayDesignsForExperimentCache) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 58 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class ExperimentalDesignVisualizationServiceImpl method getExperimentalDesignLayout.

/**
 * @param bds a BioAssayDimension that represents the BioAssayDimensionValueObject. This is only needed to avoid
 *            making ExpressionMatrix use value objects, otherwise we could use the BioAssayDimensionValueObject
 * @return A "Layout": a map of bioassays to map of factors to doubles that represent the position in the layout.
 */
private LinkedHashMap<BioAssayValueObject, LinkedHashMap<ExperimentalFactor, Double>> getExperimentalDesignLayout(ExpressionExperiment experiment, Collection<BioAssayDimension> bds) {
    LinkedHashMap<BioAssayValueObject, LinkedHashMap<ExperimentalFactor, Double>> result = new LinkedHashMap<>();
    ExpressionDataMatrix<Object> mat = new EmptyExpressionMatrix(bds);
    // This is the place the actual sort order is determined.
    List<BioMaterial> bms = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(mat);
    Map<Long, Double> fvV = new HashMap<>();
    assert experiment != null;
    assert experiment.getExperimentalDesign() != null;
    if (experiment.getExperimentalDesign().getExperimentalFactors().isEmpty()) {
        // Case of no experimental design; just put in a dummy factor.
        ExperimentalFactor dummyFactor = ExperimentalFactor.Factory.newInstance();
        dummyFactor.setName("No factors");
        for (BioMaterial bm : bms) {
            int j = mat.getColumnIndex(bm);
            Collection<BioAssay> bas = mat.getBioAssaysForColumn(j);
            for (BioAssay ba : bas) {
                BioAssayValueObject baVo = new BioAssayValueObject(ba, false);
                result.put(baVo, new LinkedHashMap<ExperimentalFactor, Double>());
                result.get(baVo).put(dummyFactor, 0.0);
            }
        }
        return result;
    }
    assert !experiment.getExperimentalDesign().getExperimentalFactors().isEmpty();
    // Map<ExperimentalFactor, Map<FactorValue, Double>> continuousRanges = new HashMap<>();
    for (ExperimentalFactor ef : experiment.getExperimentalDesign().getExperimentalFactors()) {
        if (ef.getFactorValues().isEmpty()) {
            // this can happen if the design isn't complete.
            continue;
        }
        for (FactorValue fv : ef.getFactorValues()) {
            assert fv.getId() != null;
            // the id is just used as a convenience.
            fvV.put(fv.getId(), new Double(fv.getId()));
        }
    }
    assert !fvV.isEmpty();
    assert !bms.isEmpty();
    // either bioassay dimension.
    for (BioMaterial bm : bms) {
        int j = mat.getColumnIndex(bm);
        Collection<BioAssay> bas = mat.getBioAssaysForColumn(j);
        Collection<FactorValue> fvs = bm.getFactorValues();
        for (BioAssay ba : bas) {
            BioAssayValueObject baVo = new BioAssayValueObject(ba, false);
            result.put(baVo, new LinkedHashMap<ExperimentalFactor, Double>(fvs.size()));
            for (FactorValue fv : fvs) {
                assert fv.getId() != null;
                assert fvV.containsKey(fv.getId());
                ExperimentalFactor ef = fv.getExperimentalFactor();
                Double value;
                if (fv.getMeasurement() != null) {
                    try {
                        value = Double.parseDouble(fv.getMeasurement().getValue());
                    } catch (NumberFormatException e) {
                        // not good.
                        value = fvV.get(fv.getId());
                    }
                } else {
                    value = fvV.get(fv.getId());
                }
                assert result.containsKey(baVo);
                assert value != null;
                result.get(baVo).put(ef, value);
            }
        }
    }
    return result;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) EmptyExpressionMatrix(ubic.gemma.core.datastructure.matrix.EmptyExpressionMatrix) BioAssayValueObject(ubic.gemma.model.expression.bioAssay.BioAssayValueObject) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssayValueObject(ubic.gemma.model.expression.bioAssay.BioAssayValueObject) BioAssayDimensionValueObject(ubic.gemma.model.expression.bioAssayData.BioAssayDimensionValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 59 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class AffyPowerToolsProbesetSummarize method processData.

/**
 * For either 3' or Exon arrays.
 *
 * @param ee                  ee
 * @param aptOutputFileToRead file
 * @param targetPlatform      deal with data from this platform (call multiple times if there is more than one platform)
 * @return raw data vectors
 * @throws IOException           io problem
 * @throws FileNotFoundException file not found
 */
public Collection<RawExpressionDataVector> processData(ExpressionExperiment ee, String aptOutputFileToRead, ArrayDesign targetPlatform) throws IOException {
    AffyPowerToolsProbesetSummarize.log.info("Parsing " + aptOutputFileToRead);
    try (InputStream is = new FileInputStream(aptOutputFileToRead)) {
        DoubleMatrix<String, String> matrix = this.parse(is);
        if (matrix.rows() == 0) {
            throw new IllegalStateException("Matrix from APT had no rows");
        }
        if (matrix.columns() == 0) {
            throw new IllegalStateException("Matrix from APT had no columns");
        }
        Collection<BioAssay> allBioAssays = ee.getBioAssays();
        Collection<BioAssay> bioAssaysToUse = new HashSet<>();
        for (BioAssay bioAssay : allBioAssays) {
            if (bioAssay.getArrayDesignUsed().equals(targetPlatform)) {
                bioAssaysToUse.add(bioAssay);
            }
        }
        if (allBioAssays.size() > bioAssaysToUse.size()) {
            AffyPowerToolsProbesetSummarize.log.info("Using " + bioAssaysToUse.size() + "/" + allBioAssays.size() + " bioassays (those on " + targetPlatform.getShortName() + ")");
        }
        if (matrix.columns() < bioAssaysToUse.size()) {
            // having > is okay, there can be extra.
            throw new IllegalStateException("Matrix from APT had the wrong number of colummns: expected " + bioAssaysToUse.size() + ", got " + matrix.columns());
        }
        AffyPowerToolsProbesetSummarize.log.info("Read " + matrix.rows() + " x " + matrix.columns() + ", matching with " + bioAssaysToUse.size() + " samples on " + targetPlatform);
        BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
        bad.setName("For " + ee.getShortName() + " on " + targetPlatform);
        bad.setDescription("Generated from output of apt-probeset-summarize");
        /*
             * Add them ...
             */
        Map<String, BioAssay> bmap = new HashMap<>();
        for (BioAssay bioAssay : bioAssaysToUse) {
            assert bioAssay.getArrayDesignUsed().equals(targetPlatform);
            if (bmap.containsKey(bioAssay.getAccession().getAccession()) || bmap.containsKey(bioAssay.getName())) {
                throw new IllegalStateException("Duplicate");
            }
            bmap.put(bioAssay.getAccession().getAccession(), bioAssay);
            bmap.put(bioAssay.getName(), bioAssay);
        }
        if (AffyPowerToolsProbesetSummarize.log.isDebugEnabled())
            AffyPowerToolsProbesetSummarize.log.debug("Will match result data file columns to bioassays referred to by any of the following strings:\n" + StringUtils.join(bmap.keySet(), "\n"));
        int found = 0;
        List<String> columnsToKeep = new ArrayList<>();
        for (int i = 0; i < matrix.columns(); i++) {
            String columnName = matrix.getColName(i);
            String sampleName = columnName.replaceAll(".(CEL|cel)$", "");
            /*
                 * Look for patterns like GSM476194_SK_09-BALBcJ_622.CEL
                 */
            BioAssay assay = null;
            if (sampleName.matches("^GSM[0-9]+_.+")) {
                String geoAcc = sampleName.split("_")[0];
                AffyPowerToolsProbesetSummarize.log.info("Found column for " + geoAcc);
                if (bmap.containsKey(geoAcc)) {
                    assay = bmap.get(geoAcc);
                } else {
                    AffyPowerToolsProbesetSummarize.log.warn("No bioassay for " + geoAcc);
                }
            } else {
                /*
                     * Sometimes column names are like Aud_19L.CEL or
                     */
                assay = bmap.get(sampleName);
            }
            if (assay == null) {
                /*
                     * This is okay, if we have extras
                     */
                if (matrix.columns() == bioAssaysToUse.size()) {
                    throw new IllegalStateException("No bioassay could be matched to CEL file identified by " + sampleName);
                }
                AffyPowerToolsProbesetSummarize.log.warn("No bioassay for " + sampleName);
                continue;
            }
            AffyPowerToolsProbesetSummarize.log.info("Matching CEL sample " + sampleName + " to bioassay " + assay + " [" + assay.getAccession().getAccession() + "]");
            columnsToKeep.add(columnName);
            assert assay.getArrayDesignUsed().equals(targetPlatform);
            bad.getBioAssays().add(assay);
            found++;
        }
        if (found != bioAssaysToUse.size()) {
            throw new IllegalStateException("Failed to find a data column for every bioassay on the given platform " + targetPlatform);
        }
        if (columnsToKeep.size() < matrix.columns()) {
            matrix = matrix.subsetColumns(columnsToKeep);
        }
        if (quantitationType == null) {
            quantitationType = AffyPowerToolsProbesetSummarize.makeAffyQuantitationType();
        }
        return this.convertDesignElementDataVectors(ee, bad, targetPlatform, matrix);
    }
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 60 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class ExperimentalDesignWriter method write.

/**
 * @param writeBaseHeader comments
 * @param writeHeader     column names
 * @param ee              ee
 * @param bioAssays       bas
 * @param writer          writer
 * @throws IOException when the write failed
 */
public void write(Writer writer, ExpressionExperiment ee, Collection<BioAssay> bioAssays, boolean writeBaseHeader, boolean writeHeader) throws IOException {
    ExperimentalDesign ed = ee.getExperimentalDesign();
    /*
         * See BaseExpressionDataMatrix.setUpColumnElements() for how this is constructed for the DataMatrix, and for
         * some notes about complications.
         */
    Map<BioMaterial, Collection<BioAssay>> bioMaterials = new HashMap<>();
    for (BioAssay bioAssay : bioAssays) {
        BioMaterial bm = bioAssay.getSampleUsed();
        if (!bioMaterials.containsKey(bm)) {
            bioMaterials.put(bm, new HashSet<BioAssay>());
        }
        bioMaterials.get(bm).add(bioAssay);
    }
    Collection<ExperimentalFactor> efs = ed.getExperimentalFactors();
    List<ExperimentalFactor> orderedFactors = new ArrayList<>(efs);
    StringBuffer buf = new StringBuffer();
    if (writeHeader) {
        this.writeHeader(ee, orderedFactors, writeBaseHeader, buf);
    }
    for (BioMaterial bioMaterial : bioMaterials.keySet()) {
        /* column 0 of the design matrix */
        String rowName = ExpressionDataWriterUtils.constructBioAssayName(bioMaterial, bioMaterials.get(bioMaterial));
        buf.append(rowName);
        buf.append("\t");
        /* column 1 */
        String externalId = ExpressionDataWriterUtils.getExternalId(bioMaterial, bioMaterials.get(bioMaterial));
        buf.append(externalId);
        /* columns 2 ... n where n+1 is the number of factors */
        Collection<FactorValue> candidateFactorValues = bioMaterial.getFactorValues();
        for (ExperimentalFactor ef : orderedFactors) {
            buf.append("\t");
            for (FactorValue candidateFactorValue : candidateFactorValues) {
                if (candidateFactorValue.getExperimentalFactor().equals(ef)) {
                    log.debug(candidateFactorValue.getExperimentalFactor() + " matched.");
                    String matchedFactorValue = ExpressionDataWriterUtils.constructFactorValueName(candidateFactorValue);
                    buf.append(matchedFactorValue);
                    break;
                }
                log.debug(candidateFactorValue.getExperimentalFactor() + " didn't match ... trying the next factor.");
            }
        }
        buf.append("\n");
    }
    if (log.isDebugEnabled())
        log.debug(buf.toString());
    writer.write(buf.toString());
    writer.flush();
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)144 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)67 Test (org.junit.Test)29 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)29 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)24 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)20 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)15 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)14 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)14 InputStream (java.io.InputStream)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)10 HashSet (java.util.HashSet)9 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)8 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)8 FactorValue (ubic.gemma.model.expression.experiment.FactorValue)8 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)7 ModelAndView (org.springframework.web.servlet.ModelAndView)7 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)7