Search in sources :

Example 96 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class VectorMergingServiceImpl method mergeVectors.

@Override
public ExpressionExperiment mergeVectors(ExpressionExperiment ee) {
    Collection<ArrayDesign> arrayDesigns = expressionExperimentService.getArrayDesignsUsed(ee);
    if (arrayDesigns.size() > 1) {
        throw new IllegalArgumentException("Cannot cope with more than one platform; switch experiment to use a (merged) platform first");
    }
    ee = expressionExperimentService.thaw(ee);
    Collection<QuantitationType> qts = expressionExperimentService.getQuantitationTypes(ee);
    VectorMergingServiceImpl.log.info(qts.size() + " quantitation types for potential merge");
    /*
         * Load all the bioassay dimensions, which will be merged.
         */
    Collection<BioAssayDimension> allOldBioAssayDims = new HashSet<>();
    for (BioAssay ba : ee.getBioAssays()) {
        Collection<BioAssayDimension> oldBioAssayDims = bioAssayService.findBioAssayDimensions(ba);
        for (BioAssayDimension bioAssayDim : oldBioAssayDims) {
            if (bioAssayDim.getDescription().startsWith(VectorMergingServiceImpl.MERGED_DIM_DESC_PREFIX)) {
                // not foolproof, but avoids some artifacts - e.g. if there were previous failed attempts at this.
                continue;
            }
            allOldBioAssayDims.add(bioAssayDim);
        }
    }
    if (allOldBioAssayDims.size() == 0) {
        throw new IllegalStateException("No bioAssayDimensions found to merge (previously merged ones are filtered, data may be corrupt?");
    }
    if (allOldBioAssayDims.size() == 1) {
        VectorMergingServiceImpl.log.warn("Experiment already has only a single bioAssayDimension, nothing seems to need merging. Bailing");
        return ee;
    }
    VectorMergingServiceImpl.log.info(allOldBioAssayDims.size() + " bioAssayDimensions to merge");
    List<BioAssayDimension> sortedOldDims = this.sortedBioAssayDimensions(allOldBioAssayDims);
    BioAssayDimension newBioAd = this.getNewBioAssayDimension(sortedOldDims);
    int totalBioAssays = newBioAd.getBioAssays().size();
    assert totalBioAssays == ee.getBioAssays().size() : "experiment has " + ee.getBioAssays().size() + " but new bioAssayDimension has " + totalBioAssays;
    Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = this.getVectors(ee, qts, allOldBioAssayDims);
    /*
         * This will run into problems if there are excess quantitation types
         */
    int numSuccessfulMergers = 0;
    for (QuantitationType type : qt2Vec.keySet()) {
        Collection<RawExpressionDataVector> oldVecs = qt2Vec.get(type);
        if (oldVecs.isEmpty()) {
            VectorMergingServiceImpl.log.warn("No vectors for " + type);
            continue;
        }
        Map<CompositeSequence, Collection<RawExpressionDataVector>> deVMap = this.getDevMap(oldVecs);
        if (deVMap == null) {
            VectorMergingServiceImpl.log.info("Vector merging will not be done for " + type + " as there is only one vector per element already");
            continue;
        }
        VectorMergingServiceImpl.log.info("Processing " + oldVecs.size() + " vectors  for " + type);
        Collection<RawExpressionDataVector> newVectors = new HashSet<>();
        int numAllMissing = 0;
        int missingValuesForQt = 0;
        for (CompositeSequence de : deVMap.keySet()) {
            RawExpressionDataVector vector = this.initializeNewVector(ee, newBioAd, type, de);
            Collection<RawExpressionDataVector> dedvs = deVMap.get(de);
            /*
                 * these ugly nested loops are to ENSURE that we get the vector reconstructed properly. For each of the
                 * old bioassayDimensions, find the designElementDataVector that uses it. If there isn't one, fill in
                 * the values for that dimension with missing data. We go through the dimensions in the same order that
                 * we joined them up.
                 */
            List<Object> data = new ArrayList<>();
            int totalMissingInVector = this.makeMergedData(sortedOldDims, newBioAd, type, de, dedvs, data);
            missingValuesForQt += totalMissingInVector;
            if (totalMissingInVector == totalBioAssays) {
                numAllMissing++;
                // we don't save data that is all missing.
                continue;
            }
            if (data.size() != totalBioAssays) {
                throw new IllegalStateException("Wrong number of values for " + de + " / " + type + ", expected " + totalBioAssays + ", got " + data.size());
            }
            byte[] newDataAr = converter.toBytes(data.toArray());
            vector.setData(newDataAr);
            newVectors.add(vector);
        }
        // TRANSACTION
        vectorMergingHelperService.persist(ee, type, newVectors);
        if (numAllMissing > 0) {
            VectorMergingServiceImpl.log.info(numAllMissing + " vectors had all missing values and were junked for " + type);
        }
        if (missingValuesForQt > 0) {
            VectorMergingServiceImpl.log.info(missingValuesForQt + " total missing values: " + type);
        }
        VectorMergingServiceImpl.log.info("Removing " + oldVecs.size() + " old vectors for " + type);
        rawExpressionDataVectorService.remove(oldVecs);
        ee.getRawExpressionDataVectors().removeAll(oldVecs);
        numSuccessfulMergers++;
    }
    if (numSuccessfulMergers == 0) {
        /*
             * Try to clean up
             */
        this.bioAssayDimensionService.remove(newBioAd);
        throw new IllegalStateException("Nothing was merged. Maybe all the vectors are effectively merged already");
    }
    expressionExperimentService.update(ee);
    // Several transactions
    this.cleanUp(ee, allOldBioAssayDims, newBioAd);
    // transaction
    this.audit(ee, "Vector merging performed, merged " + allOldBioAssayDims + " old bioassay dimensions for " + qts.size() + " quantitation types.");
    // several transactions
    try {
        preprocessorService.process(ee);
    } catch (PreprocessingException e) {
        VectorMergingServiceImpl.log.error("Error during postprocessing", e);
    }
    return ee;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 97 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class ArrayDesignProbeCleanupCLI method doWork.

@Override
protected Exception doWork(String[] args) {
    Exception err = this.processCommandLine(args);
    if (err != null)
        return err;
    File f = new File(file);
    if (!f.canRead()) {
        AbstractCLI.log.fatal("Cannot read from " + file);
        this.bail(ErrorCode.INVALID_OPTION);
    }
    if (this.arrayDesignsToProcess.size() > 1) {
        throw new IllegalArgumentException("Cannot be applied to more than one platform given to the '-a' option");
    }
    ArrayDesign arrayDesign = this.arrayDesignsToProcess.iterator().next();
    try (InputStream is = new FileInputStream(f);
        BufferedReader br = new BufferedReader(new InputStreamReader(is))) {
        String line;
        int count = 0;
        while ((line = br.readLine()) != null) {
            if (StringUtils.isBlank(line)) {
                continue;
            }
            String[] fields = line.split("\t");
            String probe = fields[0];
            CompositeSequence cs = compositeSequenceService.findByName(arrayDesign, probe);
            if (cs != null) {
                AbstractCLI.log.info("Removing: " + cs);
                rawExpressionDataVectorService.removeDataForCompositeSequence(cs);
                processedExpressionDataVectorService.removeDataForCompositeSequence(cs);
                compositeSequenceService.remove(cs);
                count++;
            }
        }
        AbstractCLI.log.info("Deleted " + count + " probes");
    } catch (IOException e) {
        return e;
    }
    return null;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 98 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class ArrayDesignSequenceAssociationCli method doWork.

@Override
protected Exception doWork(String[] args) {
    try {
        Exception err = this.processCommandLine(args);
        if (err != null)
            return err;
        // this is kind of an oddball function of this tool.
        if (this.hasOption('s')) {
            BioSequence updated = arrayDesignSequenceProcessingService.processSingleAccession(this.sequenceId, new String[] { "nt", "est_others", "est_human", "est_mouse" }, null, force);
            if (updated != null) {
                AbstractCLI.log.info("Updated or created " + updated);
            }
            return null;
        }
        for (ArrayDesign arrayDesign : this.arrayDesignsToProcess) {
            arrayDesign = this.thaw(arrayDesign);
            SequenceType sequenceTypeEn = SequenceType.fromString(sequenceType);
            if (sequenceTypeEn == null) {
                AbstractCLI.log.error("No sequenceType " + sequenceType + " found");
                this.bail(ErrorCode.INVALID_OPTION);
            }
            if (this.hasOption('f')) {
                try (InputStream sequenceFileIs = FileTools.getInputStreamFromPlainOrCompressedFile(sequenceFile)) {
                    if (sequenceFileIs == null) {
                        AbstractCLI.log.error("No file " + sequenceFile + " was readable");
                        this.bail(ErrorCode.INVALID_OPTION);
                        return null;
                    }
                    Taxon taxon = null;
                    if (this.hasOption('t')) {
                        taxon = taxonService.findByCommonName(this.taxonName);
                        if (taxon == null) {
                            throw new IllegalArgumentException("No taxon named " + taxonName);
                        }
                    }
                    AbstractCLI.log.info("Processing ArrayDesign...");
                    arrayDesignSequenceProcessingService.processArrayDesign(arrayDesign, sequenceFileIs, sequenceTypeEn, taxon);
                    this.audit(arrayDesign, "Sequences read from file: " + sequenceFile);
                }
            } else if (this.hasOption('i')) {
                try (InputStream idFileIs = FileTools.getInputStreamFromPlainOrCompressedFile(idFile)) {
                    if (idFileIs == null) {
                        AbstractCLI.log.error("No file " + idFile + " was readable");
                        this.bail(ErrorCode.INVALID_OPTION);
                    }
                    Taxon taxon = null;
                    if (this.hasOption('t')) {
                        taxon = taxonService.findByCommonName(this.taxonName);
                        if (taxon == null) {
                            throw new IllegalArgumentException("No taxon named " + taxonName);
                        }
                    }
                    AbstractCLI.log.info("Processing ArrayDesign...");
                    arrayDesignSequenceProcessingService.processArrayDesign(arrayDesign, idFileIs, new String[] { "nt", "est_others", "est_human", "est_mouse" }, null, taxon, force);
                    this.audit(arrayDesign, "Sequences identifiers from file: " + idFile);
                }
            } else {
                AbstractCLI.log.info("Retrieving sequences from BLAST databases");
                arrayDesignSequenceProcessingService.processArrayDesign(arrayDesign, new String[] { "nt", "est_others", "est_human", "est_mouse" }, null, force);
                this.audit(arrayDesign, "Sequence looked up from BLAST databases");
            }
        }
    } catch (Exception e) {
        AbstractCLI.log.error(e, e);
        return e;
    }
    return null;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) InputStream(java.io.InputStream) Taxon(ubic.gemma.model.genome.Taxon) SequenceType(ubic.gemma.model.genome.biosequence.SequenceType)

Example 99 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class PreprocessorServiceImpl method processForMissingValues.

private void processForMissingValues(ExpressionExperiment ee) {
    Collection<ArrayDesign> arrayDesignsUsed = expressionExperimentService.getArrayDesignsUsed(ee);
    if (arrayDesignsUsed.size() > 1) {
        throw new UnsupportedOperationException("Skipping postprocessing because experiment uses " + "multiple platform types. Please check valid entry and run postprocessing separately.");
    }
    ArrayDesign arrayDesignUsed = arrayDesignsUsed.iterator().next();
    TechnologyType tt = arrayDesignUsed.getTechnologyType();
    if (tt == TechnologyType.TWOCOLOR || tt == TechnologyType.DUALMODE) {
        PreprocessorServiceImpl.log.info(ee + " uses a two-color array design, processing for missing values ...");
        ee = expressionExperimentService.thawLite(ee);
        twoChannelMissingValueService.computeMissingValues(ee);
    }
}
Also used : TechnologyType(ubic.gemma.model.expression.arrayDesign.TechnologyType) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign)

Example 100 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorCreateHelperServiceImpl method loadIntensities.

/**
 * Computes expression intensities depending on which ArrayDesign TechnologyType is used.
 *
 * @return ExpressionDataDoubleMatrix
 */
private ExpressionDataDoubleMatrix loadIntensities(ExpressionExperiment ee, Collection<ProcessedExpressionDataVector> processedVectors) {
    Collection<ArrayDesign> arrayDesignsUsed = this.eeService.getArrayDesignsUsed(ee);
    assert !arrayDesignsUsed.isEmpty();
    ArrayDesign arrayDesign = arrayDesignsUsed.iterator().next();
    assert arrayDesign != null && arrayDesign.getTechnologyType() != null;
    ExpressionDataDoubleMatrix intensities;
    if (!arrayDesign.getTechnologyType().equals(TechnologyType.ONECOLOR) && !arrayDesign.getTechnologyType().equals(TechnologyType.NONE)) {
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Computing intensities for two-color data from underlying data");
        /*
             * Get vectors needed to compute intensities.
             */
        Collection<QuantitationType> quantitationTypes = eeService.getQuantitationTypes(ee);
        Collection<QuantitationType> usefulQuantitationTypes = ExpressionDataMatrixBuilder.getUsefulQuantitationTypes(quantitationTypes);
        if (usefulQuantitationTypes.isEmpty()) {
            throw new IllegalStateException("No useful quantitation types for " + ee.getShortName());
        }
        Collection<? extends DesignElementDataVector> vectors = rawExpressionDataVectorService.find(usefulQuantitationTypes);
        if (vectors.isEmpty()) {
            vectors = processedExpressionDataVectorService.find(usefulQuantitationTypes);
        }
        if (vectors.isEmpty()) {
            throw new IllegalStateException("No vectors for useful quantitation types for " + ee.getShortName());
        }
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Vectors loaded ...");
        Collection<DesignElementDataVector> vs = new HashSet<>(vectors);
        rawExpressionDataVectorService.thawRawAndProcessed(vs);
        ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(processedVectors, vectors);
        intensities = builder.getIntensity();
        ExpressionDataBooleanMatrix missingValues = builder.getMissingValueData();
        if (missingValues == null) {
            ProcessedExpressionDataVectorCreateHelperServiceImpl.log.warn("Could not locate missing value matrix for " + ee + ", rank computation skipped (needed for two-color data)");
            return intensities;
        }
        if (intensities == null) {
            ProcessedExpressionDataVectorCreateHelperServiceImpl.log.warn("Could not locate intensity matrix for " + ee + ", rank computation skipped (needed for two-color data)");
            return null;
        }
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Masking ...");
        this.maskMissingValues(intensities, missingValues);
    } else {
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Computing intensities directly from processed data");
        intensities = new ExpressionDataDoubleMatrix(processedVectors);
    }
    return intensities;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType)

Aggregations

ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)186 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)43 Test (org.junit.Test)32 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)26 InputStream (java.io.InputStream)25 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)24 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)24 Taxon (ubic.gemma.model.genome.Taxon)23 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)19 HashSet (java.util.HashSet)16 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 Collection (java.util.Collection)14 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)13 StopWatch (org.apache.commons.lang3.time.StopWatch)12 Before (org.junit.Before)12 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)12 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)9 GZIPInputStream (java.util.zip.GZIPInputStream)8 SimpleExpressionExperimentMetaData (ubic.gemma.core.loader.expression.simple.model.SimpleExpressionExperimentMetaData)8 File (java.io.File)7