Search in sources :

Example 31 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class DataUpdater method replaceData.

/**
 * Replace the data associated with the experiment (or add it if there is none). These data become the 'preferred'
 * quantitation type. Note that this replaces the "raw" data.
 * Similar to AffyPowerToolsProbesetSummarize.convertDesignElementDataVectors and code in
 * SimpleExpressionDataLoaderService.
 *
 * @param ee             the experiment to be modified
 * @param targetPlatform the platform for the new data (this can only be used for single-platform data sets)
 * @param data           the data to be used
 * @return ee
 */
public ExpressionExperiment replaceData(ExpressionExperiment ee, ArrayDesign targetPlatform, ExpressionDataDoubleMatrix data) {
    Collection<ArrayDesign> ads = experimentService.getArrayDesignsUsed(ee);
    if (ads.size() > 1) {
        throw new IllegalArgumentException("Can only replace data for an experiment that uses one platform; " + "you must switch/merge first and then provide appropriate replacement data.");
    }
    if (data.rows() == 0) {
        throw new IllegalArgumentException("Data had no rows");
    }
    ArrayDesign originalArrayDesign = ads.iterator().next();
    Collection<QuantitationType> qts = data.getQuantitationTypes();
    if (qts.size() > 1) {
        throw new IllegalArgumentException("Only supports a single quantitation type");
    }
    if (qts.isEmpty()) {
        throw new IllegalArgumentException("Please supply a quantitation type with the data");
    }
    QuantitationType qt = qts.iterator().next();
    qt.setIsPreferred(true);
    Collection<RawExpressionDataVector> vectors = this.makeNewVectors(ee, targetPlatform, data, qt);
    if (vectors.isEmpty()) {
        throw new IllegalStateException("no vectors!");
    }
    /*
         * remove all analyses, etc.
         */
    analysisUtilService.deleteOldAnalyses(ee);
    ee = experimentService.replaceRawVectors(ee, vectors);
    // audit if we switched platforms.
    if (!targetPlatform.equals(originalArrayDesign)) {
        AuditEventType eventType = ExpressionExperimentPlatformSwitchEvent.Factory.newInstance();
        auditTrailService.addUpdateEvent(ee, eventType, "Switched in course of updating vectors using data input (from " + originalArrayDesign.getShortName() + " to " + targetPlatform.getShortName() + ")");
    }
    this.audit(ee, "Data vector replacement for " + targetPlatform, true);
    experimentService.update(ee);
    ee = this.postprocess(ee);
    assert ee.getNumberOfDataVectors() != null;
    // debug code.
    for (BioAssay ba : ee.getBioAssays()) {
        assert ba.getArrayDesignUsed().equals(targetPlatform);
    }
    return ee;
}
Also used : RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) AuditEventType(ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 32 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class ExpressionPersister method fillInExpressionExperimentDataVectorAssociations.

private Collection<BioAssay> fillInExpressionExperimentDataVectorAssociations(ExpressionExperiment ee, ArrayDesignsForExperimentCache c) {
    AbstractPersister.log.info("Filling in DesignElementDataVectors...");
    Collection<BioAssay> bioAssays = new HashSet<>();
    StopWatch timer = new StopWatch();
    timer.start();
    int count = 0;
    for (RawExpressionDataVector dataVector : ee.getRawExpressionDataVectors()) {
        BioAssayDimension bioAssayDimension = this.fillInDesignElementDataVectorAssociations(dataVector, c);
        if (timer.getTime() > 5000) {
            if (count == 0) {
                AbstractPersister.log.info("Setup: " + timer.getTime());
            } else {
                AbstractPersister.log.info("Filled in " + (count) + " DesignElementDataVectors (" + timer.getTime() + "ms since last check)");
            }
            timer.reset();
            timer.start();
        }
        bioAssays.addAll(bioAssayDimension.getBioAssays());
        ++count;
        if (Thread.interrupted()) {
            AbstractPersister.log.info("Cancelled");
            return null;
        }
    }
    AbstractPersister.log.info("Filled in total of " + count + " DesignElementDataVectors, " + bioAssays.size() + " bioassays");
    return bioAssays;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 33 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesImpl method examineVector.

private int examineVector(ExpressionExperiment source, ExpressionDataDoubleMatrix preferred, ExpressionDataDoubleMatrix signalChannelA, ExpressionDataDoubleMatrix signalChannelB, ExpressionDataDoubleMatrix bkgChannelA, ExpressionDataDoubleMatrix bkgChannelB, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators, Collection<RawExpressionDataVector> results, ByteArrayConverter converter, int count, ExpressionDataDoubleMatrix baseChannel, Double signalThreshold, QuantitationType present, ExpressionDataMatrixRowElement element) {
    CompositeSequence designElement = element.getDesignElement();
    RawExpressionDataVector vect = RawExpressionDataVector.Factory.newInstance();
    vect.setQuantitationType(present);
    vect.setExpressionExperiment(source);
    vect.setDesignElement(designElement);
    assert baseChannel != null;
    vect.setBioAssayDimension(baseChannel.getBioAssayDimension(designElement));
    int numCols = preferred.columns(designElement);
    Boolean[] detectionCalls = new Boolean[numCols];
    Double[] prefRow = preferred.getRow(designElement);
    Double[] signalA = null;
    if (signalChannelA != null) {
        signalA = signalChannelA.getRow(designElement);
    }
    Double[] signalB = null;
    if (signalChannelB != null) {
        signalB = signalChannelB.getRow(designElement);
    }
    Double[] bkgA = null;
    Double[] bkgB = null;
    if (bkgChannelA != null)
        bkgA = bkgChannelA.getRow(designElement);
    if (bkgChannelB != null)
        bkgB = bkgChannelB.getRow(designElement);
    // columns only for this design element!
    // we use this to track
    boolean gaps = false;
    for (int col = 0; col < numCols; col++) {
        if (this.checkMissingValue(extraMissingValueIndicators, detectionCalls, prefRow, col))
            continue;
        Double bkgAV = Double.NaN;
        Double bkgBV = Double.NaN;
        if (bkgA != null)
            bkgAV = bkgA[col];
        if (bkgB != null)
            bkgBV = bkgB[col];
        Double sigAV = (signalA == null || signalA[col] == null) ? Double.NaN : signalA[col];
        Double sigBV = (signalB == null || signalB[col] == null) ? Double.NaN : signalB[col];
        /*
             * Missing values here wreak havoc. Sometimes in multiarray studies data are missing.
             */
        Boolean call = this.computeCall(signalToNoiseThreshold, signalThreshold, sigAV, sigBV, bkgAV, bkgBV);
        if (call == null)
            gaps = true;
        detectionCalls[col] = call;
    }
    if (gaps) {
        this.fillGapsInCalls(detectionCalls);
    }
    vect.setData(converter.booleanArrayToBytes(ArrayUtils.toPrimitive(detectionCalls)));
    results.add(vect);
    if (++count % 4000 == 0) {
        TwoChannelMissingValuesImpl.log.info(count + " vectors examined for missing values, " + results.size() + " vectors generated so far.");
    }
    return count;
}
Also used : RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 34 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesImpl method computeMissingValues.

@Override
public Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment ee, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
    ee = expressionExperimentService.thawLite(ee);
    Collection<QuantitationType> usefulQuantitationTypes = ExpressionDataMatrixBuilder.getUsefulQuantitationTypes(ee);
    StopWatch timer = new StopWatch();
    timer.start();
    TwoChannelMissingValuesImpl.log.info("Loading vectors ...");
    Collection<RawExpressionDataVector> rawVectors = rawExpressionDataVectorService.find(usefulQuantitationTypes);
    Collection<ProcessedExpressionDataVector> procVectors = new HashSet<>();
    if (rawVectors.isEmpty()) {
        procVectors = processedExpressionDataVectorService.find(usefulQuantitationTypes);
        processedExpressionDataVectorService.thaw(procVectors);
    } else {
        rawExpressionDataVectorService.thaw(rawVectors);
    }
    timer.stop();
    this.logTimeInfo(timer, procVectors.size() + rawVectors.size());
    Collection<? extends DesignElementDataVector> builderVectors = new HashSet<>(rawVectors.isEmpty() ? procVectors : rawVectors);
    System.out.println("Building matrix with vectors that I just thawed");
    ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(builderVectors);
    Collection<BioAssayDimension> dims = builder.getBioAssayDimensions();
    /*
         * Note we have to do this one array design at a time, because we are producing DesignElementDataVectors which
         * must be associated with the correct BioAssayDimension.
         */
    TwoChannelMissingValuesImpl.log.info("Study has " + dims.size() + " bioassaydimensions");
    if (extraMissingValueIndicators != null && extraMissingValueIndicators.size() > 0) {
        TwoChannelMissingValuesImpl.log.info("There are " + extraMissingValueIndicators.size() + " manually-set missing value indicators");
    }
    ExpressionDataDoubleMatrix preferredData = builder.getPreferredData();
    ExpressionDataDoubleMatrix bkgDataA = builder.getBackgroundChannelA();
    ExpressionDataDoubleMatrix bkgDataB = builder.getBackgroundChannelB();
    ExpressionDataDoubleMatrix signalDataA = builder.getSignalChannelA();
    ExpressionDataDoubleMatrix signalDataB = builder.getSignalChannelB();
    if (builder.isAnyMissing()) {
        if (bkgDataA != null) {
            for (QuantitationType qt : bkgDataA.getQuantitationTypes()) {
                if (builder.getNumMissingValues(qt) > 0) {
                    TwoChannelMissingValuesImpl.log.warn("Missing values in bkgDataA");
                    break;
                }
            }
        }
        if (bkgDataB != null) {
            for (QuantitationType qt : bkgDataB.getQuantitationTypes()) {
                if (builder.getNumMissingValues(qt) > 0) {
                    TwoChannelMissingValuesImpl.log.warn("Missing values in bkgDataB");
                    break;
                }
            }
        }
        if (signalDataA != null) {
            for (QuantitationType qt : signalDataA.getQuantitationTypes()) {
                if (builder.getNumMissingValues(qt) > 0) {
                    TwoChannelMissingValuesImpl.log.warn("Missing values in signalDataA");
                    break;
                }
            }
        }
        if (signalDataB != null) {
            for (QuantitationType qt : signalDataB.getQuantitationTypes()) {
                if (builder.getNumMissingValues(qt) > 0) {
                    TwoChannelMissingValuesImpl.log.warn("Missing values in signalDataB");
                    break;
                }
            }
        }
    }
    Collection<RawExpressionDataVector> dimRes = this.computeMissingValues(ee, preferredData, signalDataA, signalDataB, bkgDataA, bkgDataB, signalToNoiseThreshold, extraMissingValueIndicators);
    return new HashSet<>(dimRes);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) StopWatch(org.apache.commons.lang3.time.StopWatch) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) HashSet(java.util.HashSet)

Example 35 with RawExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesImpl method computeMissingValues.

/**
 * Attempt to compute 'missing value' information for a two-channel data set. We attempt to do this even if we are
 * missing background intensity information or one intensity channel, though obviously it is better to have all four
 * sets of values.
 *
 * @param bkgChannelA                 background channel A
 * @param bkgChannelB                 background channel B
 * @param extraMissingValueIndicators extra missing value indicators
 * @param preferred                   preferred matrix
 * @param signalChannelA              signal channel A
 * @param signalChannelB              signal channel B
 * @param signalToNoiseThreshold      noise threshold
 * @param source                      the source
 * @return DesignElementDataVectors corresponding to a new PRESENTCALL quantitation type for the design elements and
 * biomaterial dimension represented in the inputs.
 */
private Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment source, ExpressionDataDoubleMatrix preferred, ExpressionDataDoubleMatrix signalChannelA, ExpressionDataDoubleMatrix signalChannelB, ExpressionDataDoubleMatrix bkgChannelA, ExpressionDataDoubleMatrix bkgChannelB, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
    boolean okToProceed = this.validate(preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold);
    Collection<RawExpressionDataVector> results = new HashSet<>();
    if (!okToProceed) {
        TwoChannelMissingValuesImpl.log.warn("Missing value computation cannot proceed");
        return results;
    }
    ByteArrayConverter converter = new ByteArrayConverter();
    int count = 0;
    ExpressionDataDoubleMatrix baseChannel = signalChannelA == null ? signalChannelB : signalChannelA;
    Double signalThreshold = Double.NaN;
    if (bkgChannelA == null && bkgChannelB == null) {
        signalThreshold = this.computeSignalThreshold(preferred, signalChannelA, signalChannelB, baseChannel);
    }
    QuantitationType present = this.getMissingDataQuantitationType(signalToNoiseThreshold, signalThreshold);
    source.getQuantitationTypes().add(present);
    for (ExpressionDataMatrixRowElement element : baseChannel.getRowElements()) {
        count = this.examineVector(source, preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold, extraMissingValueIndicators, results, converter, count, baseChannel, signalThreshold, present, element);
    }
    TwoChannelMissingValuesImpl.log.info("Finished: " + count + " vectors examined for missing values");
    results = twoChannelMissingValueHelperService.persist(source, results);
    return results;
}
Also used : ExpressionDataMatrixRowElement(ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) HashSet(java.util.HashSet)

Aggregations

RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)53 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)16 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)16 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)16 Test (org.junit.Test)15 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)13 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)12 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)9 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)7 InputStream (java.io.InputStream)6 Collection (java.util.Collection)6 HashSet (java.util.HashSet)6 GZIPInputStream (java.util.zip.GZIPInputStream)6 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)6 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)6 GeoSeries (ubic.gemma.core.loader.expression.geo.model.GeoSeries)6 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)6 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 Transactional (org.springframework.transaction.annotation.Transactional)4