Search in sources :

Example 46 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class ExperimentalDesignImporterImpl method mapBioMaterialsToNamePossibilities.

/**
 * @param bioMaterials bio materials
 * @return a map of various strings that we might find in a design importing file to the biomaterials.
 */
private Map<String, BioMaterial> mapBioMaterialsToNamePossibilities(Collection<BioMaterial> bioMaterials) {
    Map<String, BioMaterial> biomaterialsInExpressionExperiment = new HashMap<>();
    // this rather big loop is recomputed each time we call this method. No big deal, but could be more efficient.
    for (BioMaterial bm : bioMaterials) {
        biomaterialsInExpressionExperiment.put(bm.getName(), bm);
        // we allow multiple bioassays per biomaterial - e.g. two platforms run on the sa
        for (BioAssay ba : bm.getBioAssaysUsedIn()) {
            /*
                 * Allow matches to the accession (external id) of the bioassay; trying to be flexible! This _could_
                 * cause problems if there are multiple bioassays per biomaterial, thus the check here.
                 */
            if (ba.getAccession() != null && StringUtils.isNotBlank(ba.getAccession().getAccession())) {
                String accession = ba.getAccession().getAccession();
                /*
                     * We get at most one bioassay per biomaterial.
                     */
                biomaterialsInExpressionExperiment.put(accession, bm);
            }
            /*
                 * Similarly allow match on the bioassay name
                 */
            biomaterialsInExpressionExperiment.put(ba.getName(), bm);
        }
        /*
             * All put in the very-mangled name we use in the 'native' Gemma export format. This includes the ID, so not
             * useful for tests.
             */
        biomaterialsInExpressionExperiment.put(ExpressionDataWriterUtils.constructBioAssayName(bm, bm.getBioAssaysUsedIn()), bm);
    }
    return biomaterialsInExpressionExperiment;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 47 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class SimpleExpressionDataLoaderServiceImpl method convertBioAssayDimension.

/**
 * @return BioAssayDimension
 */
private BioAssayDimension convertBioAssayDimension(ExpressionExperiment ee, ArrayDesign arrayDesign, Taxon taxon, DoubleMatrix<String, String> matrix) {
    BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
    bad.setName("For " + ee.getShortName());
    bad.setDescription("Generated from flat file");
    for (int i = 0; i < matrix.columns(); i++) {
        String columnName = matrix.getColName(i);
        BioMaterial bioMaterial = BioMaterial.Factory.newInstance();
        bioMaterial.setName(columnName);
        bioMaterial.setDescription("Generated by Gemma for: " + ee.getShortName());
        bioMaterial.setSourceTaxon(taxon);
        BioAssay assay = BioAssay.Factory.newInstance();
        assay.setName(columnName);
        assay.setArrayDesignUsed(arrayDesign);
        assay.setSampleUsed(bioMaterial);
        assay.setIsOutlier(false);
        assay.setSequencePairedReads(false);
        bad.getBioAssays().add(assay);
    }
    SimpleExpressionDataLoaderServiceImpl.log.info("Generated " + bad.getBioAssays().size() + " bioAssays");
    return bad;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 48 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class GeoConverterImpl method convertSeriesSingle.

private ExpressionExperiment convertSeriesSingle(GeoSeries series) {
    if (series == null)
        return null;
    GeoConverterImpl.log.info("Converting series: " + series.getGeoAccession());
    Collection<GeoDataset> dataSets = series.getDatasets();
    Collection<String> dataSetsToSkip = new HashSet<>();
    Collection<GeoSample> samplesToSkip = new HashSet<>();
    this.checkForDataToSkip(series, dataSetsToSkip, samplesToSkip);
    if (dataSets.size() > 0 && dataSetsToSkip.size() == dataSets.size()) {
        return null;
    }
    if (!this.isUsable(series)) {
        GeoConverterImpl.log.warn("Series was not usable: types=" + StringUtils.join(series.getSeriesTypes(), " "));
        return null;
    }
    ExpressionExperiment expExp = ExpressionExperiment.Factory.newInstance();
    expExp.setDescription("");
    expExp.setDescription(series.getSummaries() + (series.getSummaries().endsWith("\n") ? "" : "\n"));
    if (series.getLastUpdateDate() != null) {
        expExp.setDescription(expExp.getDescription() + "Last Updated (by provider): " + series.getLastUpdateDate() + "\n");
    }
    expExp.setName(series.getTitle());
    expExp.setShortName(series.getGeoAccession());
    this.convertContacts(series, expExp);
    this.convertPubMedIds(series, expExp);
    expExp.setAccession(this.convertDatabaseEntry(series));
    LocalFile expExpRawDataFile = this.convertSupplementaryFileToLocalFile(series);
    expExp.setRawDataFile(expExpRawDataFile);
    ExperimentalDesign design = ExperimentalDesign.Factory.newInstance();
    design.setDescription("");
    design.setName("");
    Collection<GeoVariable> variables = series.getVariables().values();
    for (GeoVariable variable : variables) {
        GeoConverterImpl.log.debug("Adding variable " + variable);
        ExperimentalFactor ef = this.convertVariableToFactor(variable);
        this.convertVariableToFactorValue(variable, ef);
        design.getExperimentalFactors().add(ef);
        design.setName(variable.getDescription() + " " + design.getName());
    }
    if (series.getKeyWords().size() > 0) {
        for (String keyWord : series.getKeyWords()) {
            // design.setDescription( design.getDescription() + " Keyword: " + keyWord );
            Characteristic o = Characteristic.Factory.newInstance();
            o.setDescription("GEO Keyword");
            o.setValue(keyWord);
            o.setEvidenceCode(GOEvidenceCode.IIA);
            o.setDescription("Keyword from GEO series definition file.");
        }
    }
    if (series.getOverallDesign() != null) {
        design.setDescription(design.getDescription() + " Overall design: " + series.getOverallDesign());
    }
    Collection<GeoReplication> replication = series.getReplicates().values();
    for (GeoReplication replicate : replication) {
        GeoConverterImpl.log.debug("Adding replication " + replicate);
        ExperimentalFactor ef = this.convertReplicationToFactor(replicate);
        this.convertReplicationToFactorValue(replicate, ef);
        design.getExperimentalFactors().add(ef);
    }
    expExp.setExperimentalDesign(design);
    // GEO does not have the concept of a biomaterial.
    Collection<GeoSample> allSeriesSamples = series.getSamples();
    GeoConverterImpl.log.info("Series has " + series.getSamples().size() + " samples");
    if (samplesToSkip.size() > 0) {
        GeoConverterImpl.log.info(samplesToSkip.size() + " samples will be skipped");
    }
    expExp.setBioAssays(new HashSet<BioAssay>());
    if (series.getSampleCorrespondence().size() == 0) {
        throw new IllegalArgumentException("No sample correspondence!");
    }
    // spits out a big summary of the correspondence.
    if (GeoConverterImpl.log.isDebugEnabled())
        GeoConverterImpl.log.debug(series.getSampleCorrespondence());
    int numBioMaterials = 0;
    /*
         * For each _set_ of "corresponding" samples (from the same RNA, or so we think) we make up a new BioMaterial.
         */
    Collection<String> seen = new HashSet<>();
    for (Iterator<Set<String>> iter = series.getSampleCorrespondence().iterator(); iter.hasNext(); ) {
        Set<String> correspondingSamples = iter.next();
        if (correspondingSamples.isEmpty())
            // can happen after removing samples (multitaxon)
            continue;
        BioMaterial bioMaterial = BioMaterial.Factory.newInstance();
        String bioMaterialName = this.getBiomaterialPrefix(series, ++numBioMaterials);
        StringBuilder bioMaterialDescription = new StringBuilder(GeoConverterImpl.BIOMATERIAL_DESCRIPTION_PREFIX + series.getGeoAccession());
        // From the series samples, find the sample that corresponds and convert it.
        for (String cSample : correspondingSamples) {
            boolean found = false;
            for (GeoSample sample : allSeriesSamples) {
                if (sample == null || sample.getGeoAccession() == null) {
                    GeoConverterImpl.log.warn("Null sample or no accession for " + sample);
                    continue;
                }
                if (samplesToSkip.contains(sample)) {
                    continue;
                }
                String accession = sample.getGeoAccession();
                if (accession.equals(cSample)) {
                    if (seen.contains(accession)) {
                        GeoConverterImpl.log.error("Got " + accession + " twice, this time in set " + correspondingSamples);
                    }
                    seen.add(accession);
                    BioAssay ba = this.convertSample(sample, bioMaterial, expExp.getExperimentalDesign());
                    assert (ba != null);
                    LocalFile rawDataFile = this.convertSupplementaryFileToLocalFile(sample);
                    // deal with null at UI
                    ba.setRawDataFile(rawDataFile);
                    ba.setDescription(ba.getDescription() + "\nSource GEO sample is " + sample.getGeoAccession() + "\nLast updated (according to GEO): " + sample.getLastUpdateDate());
                    assert ba.getSampleUsed() != null;
                    bioMaterial.getBioAssaysUsedIn().add(ba);
                    bioMaterialDescription.append(",").append(sample);
                    expExp.getBioAssays().add(ba);
                    found = true;
                    break;
                }
            }
            if (!found) {
                if (GeoConverterImpl.log.isDebugEnabled())
                    GeoConverterImpl.log.debug("No sample found in " + series + " to match " + cSample + "; this can happen if some samples were not run on all platforms.");
            }
        }
        bioMaterial.setName(bioMaterialName);
        bioMaterial.setDescription(bioMaterialDescription.toString());
    }
    GeoConverterImpl.log.info("Expression Experiment from " + series + " has " + expExp.getBioAssays().size() + " bioassays and " + numBioMaterials + " biomaterials.");
    int expectedNumSamples = series.getSamples().size() - samplesToSkip.size();
    int actualNumSamples = expExp.getBioAssays().size();
    if (expectedNumSamples > actualNumSamples) {
        GeoConverterImpl.log.warn((expectedNumSamples - actualNumSamples) + " samples were not in the 'sample correspondence'" + " and have been omitted. Possibly they were in the Series (GSE) but not in the corresponding Dataset (GDS)?");
    }
    if (dataSets.size() == 0) {
        // we miss extra description and the subset information.
        if (series.getValues().hasData())
            this.convertSeriesDataVectors(series, expExp);
    } else {
        for (GeoDataset dataset : dataSets) {
            if (dataSetsToSkip.contains(dataset.getGeoAccession()))
                continue;
            this.convertDataset(dataset, expExp);
        }
    }
    return expExp;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 49 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class ExpressionExperimentServiceImpl method checkHasBatchInfo.

@Override
public boolean checkHasBatchInfo(ExpressionExperiment ee) {
    boolean hasBatchInformation = false;
    for (ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors()) {
        if (BatchInfoPopulationServiceImpl.isBatchFactor(ef)) {
            hasBatchInformation = true;
            break;
        }
    }
    if (!hasBatchInformation) {
        boolean allBAsHaveDate = true;
        ee = this.thawBioAssays(ee);
        for (BioAssay ba : ee.getBioAssays()) {
            if (ba.getProcessingDate() == null) {
                allBAsHaveDate = false;
                break;
            }
        }
        if (allBAsHaveDate) {
            hasBatchInformation = true;
        }
    }
    return hasBatchInformation;
}
Also used : BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 50 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class ExpressionExperimentServiceImpl method addRawVectors.

@Override
@Transactional
public ExpressionExperiment addRawVectors(ExpressionExperiment ee, Collection<RawExpressionDataVector> newVectors) {
    Collection<BioAssayDimension> BADs = new HashSet<>();
    Collection<QuantitationType> qts = new HashSet<>();
    for (RawExpressionDataVector vec : newVectors) {
        BADs.add(vec.getBioAssayDimension());
        qts.add(vec.getQuantitationType());
    }
    if (BADs.size() > 1) {
        throw new IllegalArgumentException("Vectors must share a common bioassay dimension");
    }
    if (qts.size() > 1) {
        throw new UnsupportedOperationException("Can only replace with one type of vector (only one quantitation type)");
    }
    BioAssayDimension bad = BADs.iterator().next();
    bad = this.bioAssayDimensionService.findOrCreate(bad);
    assert bad.getBioAssays().size() > 0;
    QuantitationType newQt = qts.iterator().next();
    if (newQt.getId() == null) {
        newQt = this.quantitationTypeDao.create(newQt);
    } else {
        AbstractService.log.warn("Quantitation type already had an ID...:" + newQt);
    }
    /*
         * This is probably a more or less redundant setting, but doesn't hurt to make sure.
         */
    ArrayDesign vectorAd = newVectors.iterator().next().getDesignElement().getArrayDesign();
    for (BioAssay ba : bad.getBioAssays()) {
        ba.setArrayDesignUsed(vectorAd);
    }
    for (RawExpressionDataVector vec : newVectors) {
        vec.setBioAssayDimension(bad);
        vec.setQuantitationType(newQt);
    }
    ee = rawExpressionDataVectorDao.addVectors(ee.getId(), newVectors);
    // this is a denormalization; easy to forget to update this.
    ee.getQuantitationTypes().add(newQt);
    AbstractService.log.info(ee.getRawExpressionDataVectors().size() + " vectors for experiment");
    return ee;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Transactional(org.springframework.transaction.annotation.Transactional)

Aggregations

BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)144 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)67 Test (org.junit.Test)29 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)29 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)24 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)20 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)15 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)14 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)14 InputStream (java.io.InputStream)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)10 HashSet (java.util.HashSet)9 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)8 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)8 FactorValue (ubic.gemma.model.expression.experiment.FactorValue)8 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)7 ModelAndView (org.springframework.web.servlet.ModelAndView)7 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)7