Search in sources :

Example 11 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class ExpressionExperimentBatchCorrectionServiceImpl method comBat.

@Override
public ExpressionDataDoubleMatrix comBat(ExpressionExperiment ee) {
    /*
         * is there a batch to use?
         */
    ExperimentalFactor batch = this.getBatchFactor(ee);
    if (batch == null) {
        ExpressionExperimentBatchCorrectionServiceImpl.log.warn("No batch factor found");
        return null;
    }
    /*
         * Extract data
         */
    Collection<ProcessedExpressionDataVector> vectos = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    processedExpressionDataVectorService.thaw(vectos);
    ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(vectos);
    return this.comBat(mat);
}
Also used : ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)

Example 12 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class AffyProbeNameFilter method filter.

@Override
public ExpressionDataDoubleMatrix filter(ExpressionDataDoubleMatrix data) {
    int numRows = data.rows();
    List<CompositeSequence> kept = new ArrayList<>();
    for (int i = 0; i < numRows; i++) {
        CompositeSequence d = data.getDesignElementForRow(i);
        assert d != null;
        BioSequence sequence = d.getBiologicalCharacteristic();
        String name;
        if (sequence != null) {
            name = sequence.getName();
        } else {
            name = d.getName();
        }
        // apply the rules.
        if (skip_ST && name.contains("_st")) {
            // 'st' means sense strand.
            continue;
        }
        // control probes.
        if (skip_AFFX && name.contains("AFFX")) {
            continue;
        }
        // gene family.
        if (skip_F && name.contains("_f_at")) {
            continue;
        }
        if (skip_X && name.contains("_x_at")) {
            continue;
        }
        if (skip_G && name.contains("_g_at")) {
            continue;
        }
        kept.add(d);
    }
    AffyProbeNameFilter.log.info("There are " + kept.size() + " rows left after Affy probe name filtering.");
    return new ExpressionDataDoubleMatrix(data, kept);
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ArrayList(java.util.ArrayList) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 13 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class RowLevelFilter method filter.

@Override
public ExpressionDataDoubleMatrix filter(ExpressionDataDoubleMatrix data) {
    if (lowCut == -Double.MAX_VALUE && highCut == Double.MAX_VALUE) {
        RowLevelFilter.log.info("No filtering requested");
        return data;
    }
    int numRows = data.rows();
    DoubleArrayList criteria = new DoubleArrayList(new double[numRows]);
    int numAllNeg = this.computeCriteria(data, criteria);
    DoubleArrayList sortedCriteria = criteria.copy();
    sortedCriteria.sort();
    int consideredRows = numRows;
    int startIndex = 0;
    if (removeAllNegative) {
        consideredRows = numRows - numAllNeg;
        startIndex = numAllNeg;
    }
    double realHighCut = this.getHighThreshold(sortedCriteria, consideredRows);
    double realLowCut = this.getLowThreshold(numRows, sortedCriteria, consideredRows, startIndex);
    if (Double.isNaN(realHighCut)) {
        throw new IllegalStateException("High threshold cut is NaN");
    }
    RowLevelFilter.log.debug("Low cut = " + realLowCut);
    RowLevelFilter.log.debug("High cut = " + realHighCut);
    if (realHighCut <= realLowCut) {
        throw new RuntimeException("High cut " + realHighCut + " is lower or same as low cut " + realLowCut);
    }
    List<CompositeSequence> kept = new ArrayList<>();
    for (int i = 0; i < numRows; i++) {
        // values, zeros should always be removed
        if (criteria.get(i) > realLowCut && criteria.get(i) <= realHighCut) {
            kept.add(data.getDesignElementForRow(i));
        }
    }
    this.logInfo(numRows, kept);
    return new ExpressionDataDoubleMatrix(data, kept);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) DoubleArrayList(cern.colt.list.DoubleArrayList) ArrayList(java.util.ArrayList) DoubleArrayList(cern.colt.list.DoubleArrayList) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 14 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class SVDServiceHelperImpl method svd.

@Override
public SVDValueObject svd(ExpressionExperiment ee) {
    assert ee != null;
    Collection<ProcessedExpressionDataVector> vectors = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    if (vectors.isEmpty()) {
        throw new IllegalArgumentException("Experiment must have processed data already to do SVD");
    }
    processedExpressionDataVectorService.thaw(vectors);
    ExpressionDataDoubleMatrix mat = new ExpressionDataDoubleMatrix(vectors);
    SVDServiceHelperImpl.log.info("Starting SVD");
    ExpressionDataSVD svd = new ExpressionDataSVD(mat);
    SVDServiceHelperImpl.log.info("SVD done, postprocessing and storing results.");
    /*
         * Save the results
         */
    DoubleMatrix<Integer, BioMaterial> v = svd.getV();
    BioAssayDimension b = mat.getBestBioAssayDimension();
    PrincipalComponentAnalysis pca = this.updatePca(ee, svd, v, b);
    return this.svdFactorAnalysis(pca);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) PrincipalComponentAnalysis(ubic.gemma.model.analysis.expression.pca.PrincipalComponentAnalysis) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)

Example 15 with ExpressionDataDoubleMatrix

use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.

the class ExpressionDataSVDTest method testMatrixReconstruct.

@Test
public void testMatrixReconstruct() {
    ExpressionDataDoubleMatrix svdNormalize = svd.removeHighestComponents(0);
    assertNotNull(svdNormalize);
    RegressionTesting.closeEnough(testData.getMatrix(), svdNormalize.getMatrix(), 0.001);
}
Also used : ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) Test(org.junit.Test)

Aggregations

ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)41 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)12 Test (org.junit.Test)9 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)7 ArrayList (java.util.ArrayList)6 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 DenseDoubleMatrix (ubic.basecode.dataStructure.matrix.DenseDoubleMatrix)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)5 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)5 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)4 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)4 InputStream (java.io.InputStream)4 DoubleVectorValueObject (ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject)4 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)4 Algebra (cern.colt.matrix.linalg.Algebra)3 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)3 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)3 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)3