Search in sources :

Example 51 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class SimpleExpressionDataLoaderServiceImpl method convert.

@Override
public ExpressionExperiment convert(SimpleExpressionExperimentMetaData metaData, DoubleMatrix<String, String> matrix) {
    if (matrix == null || metaData == null) {
        throw new IllegalArgumentException("One or all of method arguments was null");
    }
    ExpressionExperiment experiment = ExpressionExperiment.Factory.newInstance();
    Taxon taxon = this.convertTaxon(metaData.getTaxon());
    experiment.setName(metaData.getName());
    experiment.setShortName(metaData.getShortName());
    experiment.setDescription(metaData.getDescription());
    experiment.setSource("Import via matrix flat file." + (StringUtils.isBlank(metaData.getSourceUrl()) ? "" : "Downloaded from " + metaData.getSourceUrl()));
    ExperimentalDesign ed = ExperimentalDesign.Factory.newInstance();
    experiment.setExperimentalDesign(ed);
    if (metaData.getPubMedId() != null) {
        PubMedXMLFetcher pubfetch = new PubMedXMLFetcher();
        BibliographicReference ref = pubfetch.retrieveByHTTP(metaData.getPubMedId());
        experiment.setPrimaryPublication(ref);
    }
    QuantitationType quantitationType = this.convertQuantitationType(metaData);
    /* set the quantitation types on the experiment */
    Collection<QuantitationType> qTypes = new HashSet<>();
    qTypes.add(quantitationType);
    experiment.setQuantitationTypes(qTypes);
    Collection<ArrayDesign> arrayDesigns = this.convertArrayDesigns(metaData, matrix);
    // Divide up multiple array designs into multiple BioAssayDimensions.
    Collection<RawExpressionDataVector> allVectors = new HashSet<>();
    Collection<BioAssay> allBioAssays = new HashSet<>();
    Collection<Object> usedDesignElements = new HashSet<>();
    for (ArrayDesign design : arrayDesigns) {
        SimpleExpressionDataLoaderServiceImpl.log.info("Processing " + design);
        DoubleMatrix<String, String> subMatrix = this.getSubMatrixForArrayDesign(matrix, usedDesignElements, design);
        if (subMatrix == null) {
            throw new IllegalStateException("Got a null matix");
        }
        BioAssayDimension bad = this.convertBioAssayDimension(experiment, design, taxon, subMatrix);
        Collection<RawExpressionDataVector> vectors = this.convertDesignElementDataVectors(experiment, bad, design, quantitationType, subMatrix);
        allVectors.addAll(vectors);
        allBioAssays.addAll(bad.getBioAssays());
    }
    // sanity
    if (usedDesignElements.size() != matrix.rows()) {
        SimpleExpressionDataLoaderServiceImpl.log.warn("Some rows of matrix were not matched to any of the given platforms (" + matrix.rows() + " rows, " + usedDesignElements.size() + " found");
    }
    experiment.setRawExpressionDataVectors(allVectors);
    experiment.setBioAssays(allBioAssays);
    return experiment;
}
Also used : ExperimentalDesign(ubic.gemma.model.expression.experiment.ExperimentalDesign) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BibliographicReference(ubic.gemma.model.common.description.BibliographicReference) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) PubMedXMLFetcher(ubic.gemma.core.loader.entrez.pubmed.PubMedXMLFetcher) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 52 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class SimpleExpressionDataLoaderServiceImpl method convertDesignElementDataVectors.

/**
 * @return Collection<DesignElementDataVector>
 */
private Collection<RawExpressionDataVector> convertDesignElementDataVectors(ExpressionExperiment expressionExperiment, BioAssayDimension bioAssayDimension, ArrayDesign arrayDesign, QuantitationType quantitationType, DoubleMatrix<String, String> matrix) {
    ByteArrayConverter bArrayConverter = new ByteArrayConverter();
    Collection<RawExpressionDataVector> vectors = new HashSet<>();
    Map<String, CompositeSequence> csMap = new HashMap<>();
    for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
        csMap.put(cs.getName(), cs);
    }
    for (int i = 0; i < matrix.rows(); i++) {
        byte[] bdata = bArrayConverter.doubleArrayToBytes(matrix.getRow(i));
        RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
        vector.setData(bdata);
        CompositeSequence cs = csMap.get(matrix.getRowName(i));
        if (cs == null) {
            continue;
        }
        vector.setDesignElement(cs);
        vector.setQuantitationType(quantitationType);
        vector.setExpressionExperiment(expressionExperiment);
        vector.setBioAssayDimension(bioAssayDimension);
        vectors.add(vector);
    }
    SimpleExpressionDataLoaderServiceImpl.log.info("Created " + vectors.size() + " data vectors");
    return vectors;
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 53 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class ExpressionExperimentPrePersistServiceImpl method prepare.

@Override
public ArrayDesignsForExperimentCache prepare(ExpressionExperiment ee, ArrayDesignsForExperimentCache cache) {
    Map<ArrayDesign, Collection<CompositeSequence>> newprobes = new HashMap<>();
    Collection<DesignElementDataVector> dataVectorsThatNeedNewProbes = new HashSet<>();
    /*
         * First time through.
         */
    Collection<RawExpressionDataVector> vectors = ee.getRawExpressionDataVectors();
    if (vectors.isEmpty()) {
        /*
             * That's okay; some data sets don't come with data.
             */
        this.prepareWithoutData(ee, cache);
    }
    for (DesignElementDataVector dataVector : vectors) {
        CompositeSequence probe = dataVector.getDesignElement();
        assert probe != null;
        ArrayDesign arrayDesign = probe.getArrayDesign();
        assert arrayDesign != null : probe + " does not have an array design";
        arrayDesign = this.loadOrPersistArrayDesignAndAddToCache(arrayDesign, cache);
        CompositeSequence cachedProbe = cache.getFromCache(probe);
        if (cachedProbe == null) {
            if (!newprobes.containsKey(arrayDesign)) {
                newprobes.put(arrayDesign, new HashSet<CompositeSequence>());
            }
            newprobes.get(arrayDesign).add(probe);
            dataVectorsThatNeedNewProbes.add(dataVector);
        } else {
            dataVector.setDesignElement(cachedProbe);
        }
    }
    /*
         * Second pass - to fill in vectors that needed probes after the first pass.
         */
    if (!dataVectorsThatNeedNewProbes.isEmpty()) {
        ExpressionExperimentPrePersistServiceImpl.log.info(dataVectorsThatNeedNewProbes.size() + " vectors don't have probes, may add to the platform.");
        newprobes = this.addNewDesignElementToPersistentArrayDesigns(newprobes);
        if (newprobes.isEmpty()) {
            ExpressionExperimentPrePersistServiceImpl.log.info("No probes were added");
        // this is okay if there were none to add, but a problem otherwise.
        } else {
            // don't forget to cache them.
            for (ArrayDesign ad : newprobes.keySet()) {
                for (CompositeSequence cs : newprobes.get(ad)) {
                    cache.addToCache(cs);
                }
            }
            // associate with vectors. This repeats code from above, needs refactoring...
            for (DesignElementDataVector v : dataVectorsThatNeedNewProbes) {
                CompositeSequence probe = v.getDesignElement();
                probe = cache.getFromCache(probe);
                if (probe == null || persisterHelper.isTransient(probe)) {
                    throw new IllegalStateException("All probes should be persistent by now");
                }
                v.setDesignElement(probe);
            }
        }
    }
    return cache;
}
Also used : RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) HashMap(java.util.HashMap) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Collection(java.util.Collection) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) HashSet(java.util.HashSet)

Aggregations

RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)53 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)16 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)16 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)16 Test (org.junit.Test)15 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)13 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)12 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)9 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)7 InputStream (java.io.InputStream)6 Collection (java.util.Collection)6 HashSet (java.util.HashSet)6 GZIPInputStream (java.util.zip.GZIPInputStream)6 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)6 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)6 GeoSeries (ubic.gemma.core.loader.expression.geo.model.GeoSeries)6 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)6 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 Transactional (org.springframework.transaction.annotation.Transactional)4