Search in sources :

Example 1 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorCreateTaskImpl method execute.

@Override
public TaskResult execute() {
    ExpressionExperiment ee = taskCommand.getExpressionExperiment();
    Collection<ProcessedExpressionDataVector> processedVectors;
    if (taskCommand.isCorrelationMatrixOnly()) {
        // only create them if necessary. This is sort of stupid, it's just so I didn't have to create a whole other
        // task for the correlation matrix computation.
        processedVectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
        if (processedVectors.isEmpty()) {
            processedVectors = processedExpressionDataVectorService.computeProcessedExpressionData(ee);
        }
    } else {
        processedVectors = processedExpressionDataVectorService.computeProcessedExpressionData(ee);
    }
    coexpressionMatrixService.create(ee);
    return new TaskResult(taskCommand, processedVectors.size());
}
Also used : ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) TaskResult(ubic.gemma.core.job.TaskResult) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment)

Example 2 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixTest method testMatrixConversion.

@Test
public void testMatrixConversion() throws Exception {
    try {
        geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("")));
        Collection<?> results = geoService.fetchAndLoad("GSE8294", false, true, false);
        newee = (ExpressionExperiment) results.iterator().next();
    } catch (AlreadyExistsInSystemException e) {
        newee = (ExpressionExperiment) ((List<?>) e.getData()).iterator().next();
    }
    newee = expressionExperimentService.thaw(newee);
    // make sure we really thawRawAndProcessed them, so we can get the design element sequences.
    Collection<RawExpressionDataVector> vectors = newee.getRawExpressionDataVectors();
    rawExpressionDataVectorService.thaw(vectors);
    ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(vectors);
    ExpressionDataDoubleMatrix matrix = builder.getPreferredData();
    assertTrue(!Double.isNaN(matrix.get(10, 0)));
    assertEquals(66, matrix.rows());
    assertEquals(9, matrix.columns());
    /*
         * Additional tests for files and outlier marking.
         */
    processedDataVectorService.computeProcessedExpressionData(newee);
    File f1 = expressionDataFileService.writeOrLocateDataFile(expressionExperimentService.load(newee.getId()), true, true);
    assertNotNull(f1);
    assertTrue(f1.exists());
    expressionDataFileService.deleteAllFiles(newee);
    assertTrue(!f1.exists());
    /*
         * outlier removal.
         */
    BioAssay tba = newee.getBioAssays().iterator().next();
    Collection<BioAssay> ol = new HashSet<>();
    ol.add(tba);
    sampleRemoveService.markAsMissing(ol);
    assertTrue(tba.getIsOutlier());
    newee = expressionExperimentService.thaw(newee);
    Collection<ProcessedExpressionDataVector> vecs = newee.getProcessedExpressionDataVectors();
    this.processedDataVectorService.thaw(vecs);
    assertTrue(!vecs.isEmpty());
    ExpressionDataMatrixBuilder matrixBuilder = new ExpressionDataMatrixBuilder(vecs);
    ExpressionDataDoubleMatrix data = matrixBuilder.getProcessedData();
    assertNotNull(data);
    assertTrue(Double.isNaN(data.getColumn(tba)[10]));
    sampleRemoveService.unmarkAsMissing(ol);
    newee = expressionExperimentService.load(newee.getId());
    newee = expressionExperimentService.thaw(newee);
    vecs = newee.getProcessedExpressionDataVectors();
    this.processedDataVectorService.thaw(vecs);
    assertTrue(!vecs.isEmpty());
    matrixBuilder = new ExpressionDataMatrixBuilder(vecs);
    data = matrixBuilder.getProcessedData();
    assertTrue(!tba.getIsOutlier());
    assertTrue(!Double.isNaN(data.getColumn(tba)[10]));
}
Also used : ExpressionDataMatrixBuilder(ubic.gemma.core.analysis.preprocess.ExpressionDataMatrixBuilder) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) AlreadyExistsInSystemException(ubic.gemma.core.loader.util.AlreadyExistsInSystemException) File(java.io.File) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 3 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class PreprocessorServiceImpl method batchCorrect.

@Override
public void batchCorrect(ExpressionExperiment ee, boolean force) throws PreprocessingException {
    String note = "ComBat batch correction";
    String detail = null;
    /*
         * This leaves the raw data alone; it updates the processed data.
         */
    this.checkArrayDesign(ee);
    ee = expressionExperimentService.thawLite(ee);
    this.checkCorrectable(ee, force);
    /*
         * If there are predicted outliers, but which we decide are okay, we just go ahead.
         */
    if (!force) {
        this.checkOutliers(ee);
    } else {
        note = "[Forced]" + note;
        detail = "Batch correction skipped outlier check.";
        PreprocessorServiceImpl.log.warn(detail);
    }
    try {
        Collection<ProcessedExpressionDataVector> vecs = this.getProcessedExpressionDataVectors(ee);
        // TODO log-transform if not already, update QT. See https://github.com/PavlidisLab/Gemma/issues/50
        ExpressionDataDoubleMatrix correctedData = this.getCorrectedData(ee, vecs);
        // Convert to vectors
        processedExpressionDataVectorService.createProcessedDataVectors(ee, correctedData.toProcessedDataVectors());
        AuditEventType eventType = BatchCorrectionEvent.Factory.newInstance();
        String bConf = expressionExperimentService.getBatchConfound(ee);
        if (bConf != null && force) {
            String add = "Batch correction forced over a detected confound: " + bConf;
            // noinspection ConstantConditions // That is simply false.
            detail = (detail == null) ? add : detail + "\n" + add;
        }
        auditTrailService.addUpdateEvent(ee, eventType, note, detail);
        this.removeInvalidatedData(ee);
        this.processExceptForVectorCreate(ee);
    } catch (Exception e) {
        throw new PreprocessingException(e);
    }
}
Also used : AuditEventType(ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)

Example 4 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorCreateHelperServiceImpl method reorderByDesign.

@Override
@Transactional
public void reorderByDesign(Long eeId) {
    ExpressionExperiment ee = expressionExperimentDao.load(eeId);
    if (ee.getExperimentalDesign().getExperimentalFactors().size() == 0) {
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have a populated experimental design, skipping");
        return;
    }
    Collection<ProcessedExpressionDataVector> processedDataVectors = ee.getProcessedExpressionDataVectors();
    if (processedDataVectors.size() == 0) {
        ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info(ee.getShortName() + " does not have processed data");
        return;
    }
    Collection<BioAssayDimension> dims = this.eeService.getBioAssayDimensions(ee);
    if (dims.size() > 1) {
        this.checkAllBioAssayDimensionsMatch(dims);
    }
    BioAssayDimension bioassaydim = dims.iterator().next();
    List<BioMaterial> start = new ArrayList<>();
    for (BioAssay ba : bioassaydim.getBioAssays()) {
        start.add(ba.getSampleUsed());
    }
    /*
         * Get the ordering we want.
         */
    List<BioMaterial> orderByExperimentalDesign = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(start, ee.getExperimentalDesign().getExperimentalFactors());
    /*
         * Map of biomaterials to the new order index.
         */
    final Map<BioMaterial, Integer> ordering = new HashMap<>();
    int i = 0;
    for (BioMaterial bioMaterial : orderByExperimentalDesign) {
        ordering.put(bioMaterial, i);
        i++;
    }
    /*
         * Map of the original order to new order of bioassays.
         */
    Map<Integer, Integer> indexes = new HashMap<>();
    Map<BioAssayDimension, BioAssayDimension> old2new = new HashMap<>();
    for (BioAssayDimension bioAssayDimension : dims) {
        Collection<BioAssay> bioAssays = bioAssayDimension.getBioAssays();
        assert bioAssays != null;
        /*
             * Initialize the new bioassay list.
             */
        List<BioAssay> resorted = new ArrayList<>(bioAssays.size());
        for (int m = 0; m < bioAssays.size(); m++) {
            resorted.add(null);
        }
        for (int oldIndex = 0; oldIndex < bioAssays.size(); oldIndex++) {
            BioAssay bioAssay = ((List<BioAssay>) bioAssays).get(oldIndex);
            BioMaterial sam1 = bioAssay.getSampleUsed();
            if (ordering.containsKey(sam1)) {
                Integer newIndex = ordering.get(sam1);
                resorted.set(newIndex, bioAssay);
                /*
                     * Should be the same for all dimensions....
                     */
                assert !indexes.containsKey(oldIndex) || indexes.get(oldIndex).equals(newIndex);
                indexes.put(oldIndex, newIndex);
            } else {
                throw new IllegalStateException();
            }
        }
        BioAssayDimension newBioAssayDimension = BioAssayDimension.Factory.newInstance();
        newBioAssayDimension.setBioAssays(resorted);
        newBioAssayDimension.setName("Processed data of ee " + ee.getShortName() + " ordered by design");
        newBioAssayDimension.setDescription("Data was reordered based on the experimental design.");
        newBioAssayDimension = bioAssayDimensionService.create(newBioAssayDimension);
        old2new.put(bioAssayDimension, newBioAssayDimension);
    }
    ByteArrayConverter converter = new ByteArrayConverter();
    for (ProcessedExpressionDataVector v : processedDataVectors) {
        BioAssayDimension revisedBioAssayDimension = old2new.get(v.getBioAssayDimension());
        assert revisedBioAssayDimension != null;
        double[] data = converter.byteArrayToDoubles(v.getData());
        /*
             * Put the data in the order of the bioAssayDimension.
             */
        Double[] resortedData = new Double[data.length];
        for (int k = 0; k < data.length; k++) {
            resortedData[k] = data[indexes.get(k)];
        }
        v.setData(converter.toBytes(resortedData));
        v.setBioAssayDimension(revisedBioAssayDimension);
    }
    ProcessedExpressionDataVectorCreateHelperServiceImpl.log.info("Updating bioassay ordering of " + processedDataVectors.size() + " vectors");
    this.auditTrailService.addUpdateEvent(ee, "Reordered the data vectors by experimental design");
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) DoubleArrayList(cern.colt.list.DoubleArrayList) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DoubleArrayList(cern.colt.list.DoubleArrayList) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Transactional(org.springframework.transaction.annotation.Transactional)

Example 5 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class ExpressionExperimentBatchCorrectionServiceImpl method comBat.

@Override
public ExpressionDataDoubleMatrix comBat(ExpressionExperiment ee) {
    /*
         * is there a batch to use?
         */
    ExperimentalFactor batch = this.getBatchFactor(ee);
    if (batch == null) {
        ExpressionExperimentBatchCorrectionServiceImpl.log.warn("No batch factor found");
        return null;
    }
    /*
         * Extract data
         */
    Collection<ProcessedExpressionDataVector> vectos = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    processedExpressionDataVectorService.thaw(vectos);
    ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(vectos);
    return this.comBat(mat);
}
Also used : ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)

Aggregations

ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)26 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)10 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)9 Test (org.junit.Test)8 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)8 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)8 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)7 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)6 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)6 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)5 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)5 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)5 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)5 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)5 HashSet (java.util.HashSet)4 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)4 File (java.io.File)3 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)3 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2