Search in sources :

Example 21 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class LinkAnalysisServiceImpl method processVectors.

@Override
public LinkAnalysis processVectors(Taxon t, Collection<ProcessedExpressionDataVector> dataVectors, FilterConfig filterConfig, LinkAnalysisConfig linkAnalysisConfig) {
    ExpressionDataDoubleMatrix datamatrix = expressionDataMatrixService.getFilteredMatrix(linkAnalysisConfig.getArrayName(), filterConfig, dataVectors);
    this.checkDatamatrix(datamatrix);
    LinkAnalysis la = new LinkAnalysis(linkAnalysisConfig);
    datamatrix = this.normalize(datamatrix, linkAnalysisConfig);
    this.setUpForAnalysis(t, la, dataVectors, datamatrix);
    la.analyze();
    try {
        this.writeLinks(la, filterConfig, new PrintWriter(System.out));
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return la;
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Example 22 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class DataUpdater method log2cpmFromCounts.

/**
 * For back filling log2cpm when only counts are available. This wouldn't be used routinely, because new experiments
 * get log2cpm computed when loaded.
 *
 * @param ee ee
 * @param qt qt
 */
public void log2cpmFromCounts(ExpressionExperiment ee, QuantitationType qt) {
    ee = experimentService.thawLite(ee);
    /*
         * Get the count data; Make sure it is currently preferred (so we don't do this twice by accident)
         * We need to do this from the Raw data, not the data that has been normalized etc.
         */
    Collection<RawExpressionDataVector> counts = rawExpressionDataVectorService.find(qt);
    ExpressionDataDoubleMatrix countMatrix = new ExpressionDataDoubleMatrix(counts);
    try {
        /*
             * Get the count data quantitation type and make it non-preferred
             */
        qt.setIsPreferred(false);
        qtService.update(qt);
        // so updated QT is attached.
        ee = experimentService.thawLite(ee);
        QuantitationType log2cpmQt = this.makelog2cpmQt();
        DoubleMatrix1D librarySize = MatrixStats.colSums(countMatrix.getMatrix());
        DoubleMatrix<CompositeSequence, BioMaterial> log2cpmMatrix = MatrixStats.convertToLog2Cpm(countMatrix.getMatrix(), librarySize);
        ExpressionDataDoubleMatrix log2cpmEEMatrix = new ExpressionDataDoubleMatrix(ee, log2cpmQt, log2cpmMatrix);
        assert log2cpmEEMatrix.getQuantitationTypes().iterator().next().getIsPreferred();
        Collection<ArrayDesign> platforms = experimentService.getArrayDesignsUsed(ee);
        if (platforms.size() > 1)
            throw new IllegalArgumentException("Cannot apply to multiplatform data sets");
        this.addData(ee, platforms.iterator().next(), log2cpmEEMatrix);
    } catch (Exception e) {
        DataUpdater.log.error(e, e);
        // try to recover.
        qt.setIsPreferred(true);
        qtService.update(qt);
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ConfigurationException(org.apache.commons.configuration.ConfigurationException) PreprocessingException(ubic.gemma.core.analysis.preprocess.PreprocessingException) IOException(java.io.IOException)

Example 23 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class SampleCoexpressionMatrixServiceImpl method create.

@Override
public DoubleMatrix<BioAssay, BioAssay> create(ExpressionExperiment ee, boolean useRegression, boolean removeOutliers) {
    // Load data and create matrix
    ExpressionDataDoubleMatrix mat = this.loadDataMatrix(ee, useRegression, this.loadVectors(ee));
    DoubleMatrix<BioAssay, BioAssay> cormat = this.loadCorMat(removeOutliers, mat);
    // Check consistency
    BioAssayDimension bestBioAssayDimension = mat.getBestBioAssayDimension();
    if (cormat.rows() != bestBioAssayDimension.getBioAssays().size()) {
        throw new IllegalStateException("Number of bioassays doesn't match length of the best bioAssayDimension. BAs in dimension: " + bestBioAssayDimension.getBioAssays().size() + ", rows in cormat: " + cormat.rows());
    }
    // Persist
    sampleCoexpressionMatrixHelperService.create(cormat, bestBioAssayDimension, mat.getExpressionExperiment());
    return cormat;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 24 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesImpl method computeMissingValues.

@Override
public Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment ee, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
    ee = expressionExperimentService.thawLite(ee);
    Collection<QuantitationType> usefulQuantitationTypes = ExpressionDataMatrixBuilder.getUsefulQuantitationTypes(ee);
    StopWatch timer = new StopWatch();
    timer.start();
    TwoChannelMissingValuesImpl.log.info("Loading vectors ...");
    Collection<RawExpressionDataVector> rawVectors = rawExpressionDataVectorService.find(usefulQuantitationTypes);
    Collection<ProcessedExpressionDataVector> procVectors = new HashSet<>();
    if (rawVectors.isEmpty()) {
        procVectors = processedExpressionDataVectorService.find(usefulQuantitationTypes);
        processedExpressionDataVectorService.thaw(procVectors);
    } else {
        rawExpressionDataVectorService.thaw(rawVectors);
    }
    timer.stop();
    this.logTimeInfo(timer, procVectors.size() + rawVectors.size());
    Collection<? extends DesignElementDataVector> builderVectors = new HashSet<>(rawVectors.isEmpty() ? procVectors : rawVectors);
    System.out.println("Building matrix with vectors that I just thawed");
    ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(builderVectors);
    Collection<BioAssayDimension> dims = builder.getBioAssayDimensions();
    /*
         * Note we have to do this one array design at a time, because we are producing DesignElementDataVectors which
         * must be associated with the correct BioAssayDimension.
         */
    TwoChannelMissingValuesImpl.log.info("Study has " + dims.size() + " bioassaydimensions");
    if (extraMissingValueIndicators != null && extraMissingValueIndicators.size() > 0) {
        TwoChannelMissingValuesImpl.log.info("There are " + extraMissingValueIndicators.size() + " manually-set missing value indicators");
    }
    ExpressionDataDoubleMatrix preferredData = builder.getPreferredData();
    ExpressionDataDoubleMatrix bkgDataA = builder.getBackgroundChannelA();
    ExpressionDataDoubleMatrix bkgDataB = builder.getBackgroundChannelB();
    ExpressionDataDoubleMatrix signalDataA = builder.getSignalChannelA();
    ExpressionDataDoubleMatrix signalDataB = builder.getSignalChannelB();
    if (builder.isAnyMissing()) {
        if (bkgDataA != null) {
            for (QuantitationType qt : bkgDataA.getQuantitationTypes()) {
                if (builder.getNumMissingValues(qt) > 0) {
                    TwoChannelMissingValuesImpl.log.warn("Missing values in bkgDataA");
                    break;
                }
            }
        }
        if (bkgDataB != null) {
            for (QuantitationType qt : bkgDataB.getQuantitationTypes()) {
                if (builder.getNumMissingValues(qt) > 0) {
                    TwoChannelMissingValuesImpl.log.warn("Missing values in bkgDataB");
                    break;
                }
            }
        }
        if (signalDataA != null) {
            for (QuantitationType qt : signalDataA.getQuantitationTypes()) {
                if (builder.getNumMissingValues(qt) > 0) {
                    TwoChannelMissingValuesImpl.log.warn("Missing values in signalDataA");
                    break;
                }
            }
        }
        if (signalDataB != null) {
            for (QuantitationType qt : signalDataB.getQuantitationTypes()) {
                if (builder.getNumMissingValues(qt) > 0) {
                    TwoChannelMissingValuesImpl.log.warn("Missing values in signalDataB");
                    break;
                }
            }
        }
    }
    Collection<RawExpressionDataVector> dimRes = this.computeMissingValues(ee, preferredData, signalDataA, signalDataB, bkgDataA, bkgDataB, signalToNoiseThreshold, extraMissingValueIndicators);
    return new HashSet<>(dimRes);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) StopWatch(org.apache.commons.lang3.time.StopWatch) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) HashSet(java.util.HashSet)

Example 25 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class TwoChannelMissingValuesImpl method computeMissingValues.

/**
 * Attempt to compute 'missing value' information for a two-channel data set. We attempt to do this even if we are
 * missing background intensity information or one intensity channel, though obviously it is better to have all four
 * sets of values.
 *
 * @param bkgChannelA                 background channel A
 * @param bkgChannelB                 background channel B
 * @param extraMissingValueIndicators extra missing value indicators
 * @param preferred                   preferred matrix
 * @param signalChannelA              signal channel A
 * @param signalChannelB              signal channel B
 * @param signalToNoiseThreshold      noise threshold
 * @param source                      the source
 * @return DesignElementDataVectors corresponding to a new PRESENTCALL quantitation type for the design elements and
 * biomaterial dimension represented in the inputs.
 */
private Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment source, ExpressionDataDoubleMatrix preferred, ExpressionDataDoubleMatrix signalChannelA, ExpressionDataDoubleMatrix signalChannelB, ExpressionDataDoubleMatrix bkgChannelA, ExpressionDataDoubleMatrix bkgChannelB, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
    boolean okToProceed = this.validate(preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold);
    Collection<RawExpressionDataVector> results = new HashSet<>();
    if (!okToProceed) {
        TwoChannelMissingValuesImpl.log.warn("Missing value computation cannot proceed");
        return results;
    }
    ByteArrayConverter converter = new ByteArrayConverter();
    int count = 0;
    ExpressionDataDoubleMatrix baseChannel = signalChannelA == null ? signalChannelB : signalChannelA;
    Double signalThreshold = Double.NaN;
    if (bkgChannelA == null && bkgChannelB == null) {
        signalThreshold = this.computeSignalThreshold(preferred, signalChannelA, signalChannelB, baseChannel);
    }
    QuantitationType present = this.getMissingDataQuantitationType(signalToNoiseThreshold, signalThreshold);
    source.getQuantitationTypes().add(present);
    for (ExpressionDataMatrixRowElement element : baseChannel.getRowElements()) {
        count = this.examineVector(source, preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold, extraMissingValueIndicators, results, converter, count, baseChannel, signalThreshold, present, element);
    }
    TwoChannelMissingValuesImpl.log.info("Finished: " + count + " vectors examined for missing values");
    results = twoChannelMissingValueHelperService.persist(source, results);
    return results;
}
Also used : ExpressionDataMatrixRowElement(ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) HashSet(java.util.HashSet)

Aggregations

ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)41 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)12 Test (org.junit.Test)9 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)7 ArrayList (java.util.ArrayList)6 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)5 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)4 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 InputStream (java.io.InputStream)4 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)4 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)4 Algebra (cern.colt.matrix.linalg.Algebra)3 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)3 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)3 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)3