Search in sources :

Example 46 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class GeoConverterImpl method convertVectorsForPlatform.

/**
 * For data coming from a single platform, create vectors.
 *
 * @param values A GeoValues object holding the parsed results.
 */
private void convertVectorsForPlatform(GeoValues values, ExpressionExperiment expExp, List<GeoSample> datasetSamples, GeoPlatform geoPlatform) {
    assert datasetSamples.size() > 0 : "No samples in dataset";
    if (!geoPlatform.useDataFromGeo()) {
        // see bug 4181
        GeoConverterImpl.log.warn("Platform characteristics indicate data from GEO should be ignored or will not be present anyway (" + geoPlatform + ")");
        return;
    }
    GeoConverterImpl.log.info("Converting vectors for " + geoPlatform.getGeoAccession() + ", " + datasetSamples.size() + " samples.");
    BioAssayDimension bioAssayDimension = this.convertGeoSampleList(datasetSamples, expExp);
    if (bioAssayDimension.getBioAssays().size() == 0)
        throw new IllegalStateException("No bioAssays in the BioAssayDimension");
    this.sanityCheckQuantitationTypes(datasetSamples);
    List<String> quantitationTypes = datasetSamples.iterator().next().getColumnNames();
    List<String> quantitationTypeDescriptions = datasetSamples.iterator().next().getColumnDescriptions();
    boolean first = true;
    for (String quantitationType : quantitationTypes) {
        // skip the first quantitationType, it's the ID or ID_REF.
        if (first) {
            first = false;
            continue;
        }
        int columnAccordingToSample = quantitationTypes.indexOf(quantitationType);
        int quantitationTypeIndex = values.getQuantitationTypeIndex(geoPlatform, quantitationType);
        GeoConverterImpl.log.debug("Processing " + quantitationType + " (column=" + quantitationTypeIndex + " - according to sample, it's " + columnAccordingToSample + ")");
        Map<String, List<Object>> dataVectors = this.makeDataVectors(values, datasetSamples, quantitationTypeIndex);
        if (dataVectors == null || dataVectors.size() == 0) {
            GeoConverterImpl.log.debug("No data for " + quantitationType + " (column=" + quantitationTypeIndex + ")");
            continue;
        }
        GeoConverterImpl.log.info(dataVectors.size() + " data vectors for " + quantitationType);
        Object exampleValue = dataVectors.values().iterator().next().iterator().next();
        QuantitationType qt = QuantitationType.Factory.newInstance();
        qt.setName(quantitationType);
        String description = quantitationTypeDescriptions.get(columnAccordingToSample);
        qt.setDescription(description);
        QuantitationTypeParameterGuesser.guessQuantitationTypeParameters(qt, quantitationType, description, exampleValue);
        int count = 0;
        int skipped = 0;
        for (String designElementName : dataVectors.keySet()) {
            List<Object> dataVector = dataVectors.get(designElementName);
            if (dataVector == null || dataVector.size() == 0)
                continue;
            RawExpressionDataVector vector = this.convertDesignElementDataVector(geoPlatform, expExp, bioAssayDimension, designElementName, dataVector, qt);
            if (vector == null) {
                skipped++;
                if (GeoConverterImpl.log.isDebugEnabled())
                    GeoConverterImpl.log.debug("Null vector for DE=" + designElementName + " QT=" + quantitationType);
                continue;
            }
            if (GeoConverterImpl.log.isTraceEnabled()) {
                GeoConverterImpl.log.trace(designElementName + " " + qt.getName() + " " + qt.getRepresentation() + " " + dataVector.size() + " elements in vector");
            }
            expExp.getRawExpressionDataVectors().add(vector);
            if (++count % GeoConverterImpl.LOGGING_VECTOR_COUNT_UPDATE == 0 && GeoConverterImpl.log.isDebugEnabled()) {
                GeoConverterImpl.log.debug(count + " Data vectors added");
            }
        }
        if (count > 0) {
            expExp.getQuantitationTypes().add(qt);
            if (GeoConverterImpl.log.isDebugEnabled() && count > 1000) {
                GeoConverterImpl.log.debug(count + " Data vectors added for '" + quantitationType + "'");
            }
        } else {
            GeoConverterImpl.log.info("No vectors were retained for " + quantitationType + " -- usually this is due to all values being missing.");
        }
        if (skipped > 0) {
            GeoConverterImpl.log.info("Skipped " + skipped + " vectors");
        }
    }
    GeoConverterImpl.log.info("Total of " + expExp.getRawExpressionDataVectors().size() + " vectors on platform " + geoPlatform + ", " + expExp.getQuantitationTypes().size() + " quantitation types.");
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Example 47 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class DataUpdater method makeNewVectors.

private Collection<RawExpressionDataVector> makeNewVectors(ExpressionExperiment ee, ArrayDesign targetPlatform, ExpressionDataDoubleMatrix data, QuantitationType qt) {
    ByteArrayConverter bArrayConverter = new ByteArrayConverter();
    Collection<RawExpressionDataVector> vectors = new HashSet<>();
    BioAssayDimension bioAssayDimension = data.getBestBioAssayDimension();
    assert bioAssayDimension != null;
    assert !bioAssayDimension.getBioAssays().isEmpty();
    bioAssayDimension = assayDimensionService.findOrCreate(bioAssayDimension);
    assert !bioAssayDimension.getBioAssays().isEmpty();
    for (int i = 0; i < data.rows(); i++) {
        byte[] bdata = bArrayConverter.doubleArrayToBytes(data.getRow(i));
        RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
        vector.setData(bdata);
        CompositeSequence cs = data.getRowElement(i).getDesignElement();
        if (cs == null) {
            continue;
        }
        if (!cs.getArrayDesign().equals(targetPlatform)) {
            throw new IllegalArgumentException("Input data must use the target platform (was: " + cs.getArrayDesign() + ", expected: " + targetPlatform);
        }
        vector.setDesignElement(cs);
        vector.setQuantitationType(qt);
        vector.setExpressionExperiment(ee);
        vector.setBioAssayDimension(bioAssayDimension);
        vectors.add(vector);
    }
    return vectors;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 48 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class MatrixConversionTest method getDesignElementDataVectors.

/**
 * Creates an ugly (but not unusual) situation where there are two bioassay dimensions with different sizes,
 * referring to the same set of biomaterials.
 *
 * @return design element data vectors
 */
private Collection<DesignElementDataVector> getDesignElementDataVectors(Collection<QuantitationType> quantTypes) {
    Collection<DesignElementDataVector> vectors = new HashSet<>();
    ArrayDesign ad = ArrayDesign.Factory.newInstance();
    ad.setName("junk");
    List<CompositeSequence> sequences = this.getCompositeSequences(ad);
    ArrayDesign adb = ArrayDesign.Factory.newInstance();
    adb.setName("bjunk");
    List<CompositeSequence> sequencesb = this.getCompositeSequences(ad);
    // resused
    List<BioMaterial> bioMaterials = this.getBioMaterials();
    for (QuantitationType quantType : quantTypes) {
        /*
             * Create two bioassay dimension which overlap; "A" does not use all the biomaterials.
             */
        BioAssayDimension baDimA = BioAssayDimension.Factory.newInstance();
        Iterator<BioMaterial> bmita = bioMaterials.iterator();
        for (long i = 0; i < MatrixConversionTest.NUM_BIOMATERIALS - 20; i++) {
            BioAssay ba = ubic.gemma.model.expression.bioAssay.BioAssay.Factory.newInstance();
            ba.setName(RandomStringUtils.randomNumeric(5) + "_testbioassay");
            ba.setSampleUsed(bmita.next());
            ba.setArrayDesignUsed(ad);
            ba.setId(i);
            baDimA.getBioAssays().add(ba);
        }
        baDimA.setName(RandomStringUtils.randomAlphanumeric(10));
        BioAssayDimension baDimB = BioAssayDimension.Factory.newInstance();
        Iterator<BioMaterial> bmitb = bioMaterials.iterator();
        for (long i = 0; i < MatrixConversionTest.NUM_BIOMATERIALS; i++) {
            BioAssay ba = ubic.gemma.model.expression.bioAssay.BioAssay.Factory.newInstance();
            ba.setName(RandomStringUtils.randomNumeric(15) + "_testbioassay");
            ba.setSampleUsed(bmitb.next());
            ba.setArrayDesignUsed(adb);
            ba.setId(i + 20);
            baDimB.getBioAssays().add(ba);
        }
        baDimB.setName(RandomStringUtils.randomAlphanumeric(10));
        // bio.a gets cs 0-99, bio.b gets 100-199.
        long j = 0;
        j = this.loopVectors(vectors, sequencesb, quantType, baDimA, j, MatrixConversionTest.NUM_CS - 100);
        // noinspection UnusedAssignment // Better readability
        j = this.loopVectors(vectors, sequences, quantType, baDimB, j, MatrixConversionTest.NUM_CS);
    }
    return vectors;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 49 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class SimpleExpressionDataLoaderServiceImpl method convert.

@Override
public ExpressionExperiment convert(SimpleExpressionExperimentMetaData metaData, DoubleMatrix<String, String> matrix) {
    if (matrix == null || metaData == null) {
        throw new IllegalArgumentException("One or all of method arguments was null");
    }
    ExpressionExperiment experiment = ExpressionExperiment.Factory.newInstance();
    Taxon taxon = this.convertTaxon(metaData.getTaxon());
    experiment.setName(metaData.getName());
    experiment.setShortName(metaData.getShortName());
    experiment.setDescription(metaData.getDescription());
    experiment.setSource("Import via matrix flat file." + (StringUtils.isBlank(metaData.getSourceUrl()) ? "" : "Downloaded from " + metaData.getSourceUrl()));
    ExperimentalDesign ed = ExperimentalDesign.Factory.newInstance();
    experiment.setExperimentalDesign(ed);
    if (metaData.getPubMedId() != null) {
        PubMedXMLFetcher pubfetch = new PubMedXMLFetcher();
        BibliographicReference ref = pubfetch.retrieveByHTTP(metaData.getPubMedId());
        experiment.setPrimaryPublication(ref);
    }
    QuantitationType quantitationType = this.convertQuantitationType(metaData);
    /* set the quantitation types on the experiment */
    Collection<QuantitationType> qTypes = new HashSet<>();
    qTypes.add(quantitationType);
    experiment.setQuantitationTypes(qTypes);
    Collection<ArrayDesign> arrayDesigns = this.convertArrayDesigns(metaData, matrix);
    // Divide up multiple array designs into multiple BioAssayDimensions.
    Collection<RawExpressionDataVector> allVectors = new HashSet<>();
    Collection<BioAssay> allBioAssays = new HashSet<>();
    Collection<Object> usedDesignElements = new HashSet<>();
    for (ArrayDesign design : arrayDesigns) {
        SimpleExpressionDataLoaderServiceImpl.log.info("Processing " + design);
        DoubleMatrix<String, String> subMatrix = this.getSubMatrixForArrayDesign(matrix, usedDesignElements, design);
        if (subMatrix == null) {
            throw new IllegalStateException("Got a null matix");
        }
        BioAssayDimension bad = this.convertBioAssayDimension(experiment, design, taxon, subMatrix);
        Collection<RawExpressionDataVector> vectors = this.convertDesignElementDataVectors(experiment, bad, design, quantitationType, subMatrix);
        allVectors.addAll(vectors);
        allBioAssays.addAll(bad.getBioAssays());
    }
    // sanity
    if (usedDesignElements.size() != matrix.rows()) {
        SimpleExpressionDataLoaderServiceImpl.log.warn("Some rows of matrix were not matched to any of the given platforms (" + matrix.rows() + " rows, " + usedDesignElements.size() + " found");
    }
    experiment.setRawExpressionDataVectors(allVectors);
    experiment.setBioAssays(allBioAssays);
    return experiment;
}
Also used : ExperimentalDesign(ubic.gemma.model.expression.experiment.ExperimentalDesign) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BibliographicReference(ubic.gemma.model.common.description.BibliographicReference) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) PubMedXMLFetcher(ubic.gemma.core.loader.entrez.pubmed.PubMedXMLFetcher) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 50 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionPersister method getBioAssayDimensionFromCacheOrCreate.

private BioAssayDimension getBioAssayDimensionFromCacheOrCreate(DesignElementDataVector vector, ArrayDesignsForExperimentCache c) {
    if (!this.isTransient(vector.getBioAssayDimension()))
        return vector.getBioAssayDimension();
    String dimensionName = vector.getBioAssayDimension().getName();
    if (bioAssayDimensionCache.containsKey(dimensionName)) {
        vector.setBioAssayDimension(bioAssayDimensionCache.get(dimensionName));
    } else {
        vector.getBioAssayDimension().setId(null);
        BioAssayDimension bAd = this.persistBioAssayDimension(vector.getBioAssayDimension(), c);
        bioAssayDimensionCache.put(dimensionName, bAd);
        vector.setBioAssayDimension(bAd);
    }
    BioAssayDimension bioAssayDimension = bioAssayDimensionCache.get(dimensionName);
    assert !this.isTransient(bioAssayDimension);
    return bioAssayDimension;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension)

Aggregations

BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)59 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)29 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)20 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)15 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)15 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)15 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)10 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)9 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)9 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 HashSet (java.util.HashSet)4 Test (org.junit.Test)4 Transactional (org.springframework.transaction.annotation.Transactional)4 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)4 StopWatch (org.apache.commons.lang3.time.StopWatch)3 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)3 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2