Search in sources :

Example 21 with FactorValue

use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.

the class SVDServiceHelperImpl method populateBMFMap.

public static void populateBMFMap(Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap, BioMaterial bm) {
    for (FactorValue fv : bm.getFactorValues()) {
        ExperimentalFactor experimentalFactor = fv.getExperimentalFactor();
        if (!bioMaterialFactorMap.containsKey(experimentalFactor)) {
            bioMaterialFactorMap.put(experimentalFactor, new HashMap<Long, Double>());
        }
        double valueToStore;
        if (fv.getMeasurement() != null) {
            try {
                valueToStore = Double.parseDouble(fv.getMeasurement().getValue());
            } catch (NumberFormatException e) {
                SVDServiceHelperImpl.log.warn("Measurement wasn't a number for " + fv);
                valueToStore = Double.NaN;
            }
        } else {
            /*
                 * This is a hack. We're storing the ID but as a double.
                 */
            valueToStore = fv.getId().doubleValue();
        }
        bioMaterialFactorMap.get(experimentalFactor).put(bm.getId(), valueToStore);
    }
}
Also used : FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor)

Example 22 with FactorValue

use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.

the class BatchConfound method factorBatchConfoundTest.

private static Collection<BatchConfoundValueObject> factorBatchConfoundTest(ExpressionExperiment ee, Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap) throws IllegalArgumentException {
    Map<Long, Long> batchMembership = new HashMap<>();
    ExperimentalFactor batchFactor = null;
    Map<Long, Integer> batchIndexes = new HashMap<>();
    for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
        if (ExperimentalDesignUtils.isBatch(ef)) {
            batchFactor = ef;
            Map<Long, Double> bmToFv = bioMaterialFactorMap.get(batchFactor);
            if (bmToFv == null) {
                log.warn("No biomaterial --> factor value map for batch factor: " + batchFactor);
                continue;
            }
            int index = 0;
            for (FactorValue fv : batchFactor.getFactorValues()) {
                batchIndexes.put(fv.getId(), index++);
            }
            for (Long bmId : bmToFv.keySet()) {
                batchMembership.put(bmId, bmToFv.get(bmId).longValue());
            }
            break;
        }
    }
    Set<BatchConfoundValueObject> result = new HashSet<>();
    if (batchFactor == null) {
        return result;
    }
    for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
        if (ef.equals(batchFactor))
            continue;
        Map<Long, Double> bmToFv = bioMaterialFactorMap.get(ef);
        int numBioMaterials = bmToFv.keySet().size();
        assert numBioMaterials > 0 : "No biomaterials for " + ef;
        double p = Double.NaN;
        double chiSquare;
        int df;
        int numBatches = batchFactor.getFactorValues().size();
        if (ExperimentalDesignUtils.isContinuous(ef)) {
            DoubleArrayList factorValues = new DoubleArrayList(numBioMaterials);
            factorValues.setSize(numBioMaterials);
            IntArrayList batches = new IntArrayList(numBioMaterials);
            batches.setSize(numBioMaterials);
            int j = 0;
            for (Long bmId : bmToFv.keySet()) {
                assert factorValues.size() > 0 : "Biomaterial to factorValue is empty for " + ef;
                factorValues.set(j, bmToFv.get(bmId));
                long batch = batchMembership.get(bmId);
                batches.set(j, batchIndexes.get(batch));
                j++;
            }
            p = KruskalWallis.test(factorValues, batches);
            df = KruskalWallis.dof(factorValues, batches);
            chiSquare = KruskalWallis.kwStatistic(factorValues, batches);
            log.debug("KWallis\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
        } else {
            Map<Long, Integer> factorValueIndexes = new HashMap<>();
            int index = 0;
            for (FactorValue fv : ef.getFactorValues()) {
                factorValueIndexes.put(fv.getId(), index++);
            }
            Map<Long, Long> factorValueMembership = new HashMap<>();
            for (Long bmId : bmToFv.keySet()) {
                factorValueMembership.put(bmId, bmToFv.get(bmId).longValue());
            }
            long[][] counts = new long[numBatches][ef.getFactorValues().size()];
            for (int i = 0; i < batchIndexes.size(); i++) {
                for (int j = 0; j < factorValueIndexes.size(); j++) {
                    counts[i][j] = 0;
                }
            }
            for (Long bm : bmToFv.keySet()) {
                long fv = factorValueMembership.get(bm);
                Long batch = batchMembership.get(bm);
                if (batch == null) {
                    log.warn("No batch membership for : " + bm);
                    continue;
                }
                int batchIndex = batchIndexes.get(batch);
                int factorIndex = factorValueIndexes.get(fv);
                counts[batchIndex][factorIndex]++;
            }
            ChiSquareTest cst = new ChiSquareTest();
            try {
                chiSquare = cst.chiSquare(counts);
            } catch (IllegalArgumentException e) {
                log.warn("IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e.getMessage());
                chiSquare = Double.NaN;
            }
            df = (counts.length - 1) * (counts[0].length - 1);
            ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
            if (!Double.isNaN(chiSquare)) {
                p = 1.0 - distribution.cumulativeProbability(chiSquare);
            }
            log.debug("ChiSq\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
        }
        BatchConfoundValueObject summary = new BatchConfoundValueObject(ee, ef, chiSquare, df, p, numBatches);
        result.add(summary);
    }
    return result;
}
Also used : ChiSquaredDistribution(org.apache.commons.math3.distribution.ChiSquaredDistribution) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) DoubleArrayList(cern.colt.list.DoubleArrayList) IntArrayList(cern.colt.list.IntArrayList) ChiSquareTest(org.apache.commons.math3.stat.inference.ChiSquareTest)

Example 23 with FactorValue

use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.

the class ExpressionExperimentBatchCorrectionServiceImpl method checkCorrectability.

@Override
public boolean checkCorrectability(ExpressionExperiment ee, boolean force) {
    for (QuantitationType qt : expressionExperimentService.getQuantitationTypes(ee)) {
        if (qt.getIsBatchCorrected()) {
            ExpressionExperimentBatchCorrectionServiceImpl.log.warn("Experiment already has a batch-corrected quantitation type: " + ee + ": " + qt);
            return false;
        }
    }
    ExperimentalFactor batch = this.getBatchFactor(ee);
    if (batch == null) {
        ExpressionExperimentBatchCorrectionServiceImpl.log.warn("No batch factor found: " + ee);
        return false;
    }
    String bConf = expressionExperimentService.getBatchConfound(ee);
    if (bConf != null && !force) {
        ExpressionExperimentBatchCorrectionServiceImpl.log.warn("Experiment can not be batch corrected: " + bConf);
        ExpressionExperimentBatchCorrectionServiceImpl.log.info("To force batch-correction of a confounded experiment, use the force option (note, that this option also allows outliers while batch correcting).");
        return false;
    }
    /*
         * Make sure we have at least two samples per batch. This generally won't happen if batches were defined by
         * Gemma.
         */
    Map<Long, Integer> batches = new HashMap<>();
    Set<BioMaterial> seen = new HashSet<>();
    for (BioAssay ba : ee.getBioAssays()) {
        BioMaterial bm = ba.getSampleUsed();
        if (seen.contains(bm))
            continue;
        seen.add(bm);
        for (FactorValue fv : bm.getFactorValues()) {
            if (fv.getExperimentalFactor().equals(batch)) {
                Long batchId = fv.getId();
                if (!batches.containsKey(batchId))
                    batches.put(batchId, 0);
                batches.put(batchId, batches.get(batchId) + 1);
            }
        }
    }
    /*
         * consider merging batches. - we already do this when we create the batch factor, so in general batches should
         * always have at least 2 samples
         */
    for (Long batchId : batches.keySet()) {
        if (batches.get(batchId) < 2) {
            ExpressionExperimentBatchCorrectionServiceImpl.log.info("Batch with only one sample detected, correction not possible: " + ee + ", batchId=" + batchId);
            return false;
        }
    }
    return true;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 24 with FactorValue

use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.

the class TTestAnalyzerTest method testOneSampleTtest.

@Test
public void testOneSampleTtest() throws Exception {
    if (!connected) {
        log.warn("Could not establish R connection.  Skipping test ...");
        return;
    }
    this.configureVectors(super.biomaterials, "/data/stat-tests/onesample-ttest-data.txt");
    this.configureMocks();
    Collection<ExperimentalFactor> factors = new HashSet<>();
    factors.add(super.experimentalFactorA_Area);
    /*
         * Remove factorValue from all the samples.
         */
    Iterator<FactorValue> iterator = experimentalFactorA_Area.getFactorValues().iterator();
    FactorValue toUse = iterator.next();
    FactorValue toRemove = iterator.next();
    experimentalFactorA_Area.getFactorValues().remove(toRemove);
    for (BioMaterial bm : super.biomaterials) {
        bm.getFactorValues().remove(toRemove);
        bm.getFactorValues().add(toUse);
    }
    // must be for one-sample to make sense.
    quantitationType.setIsRatio(true);
    quantitationType.setScale(ScaleType.LOG2);
    DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
    config.setFactorsToInclude(factors);
    Collection<DifferentialExpressionAnalysis> expressionAnalyses = analyzer.run(expressionExperiment, config);
    DifferentialExpressionAnalysis expressionAnalysis = expressionAnalyses.iterator().next();
    Collection<ExpressionAnalysisResultSet> resultSets = expressionAnalysis.getResultSets();
    ExpressionAnalysisResultSet resultSet = resultSets.iterator().next();
    assertEquals(null, resultSet.getBaselineGroup());
    int numResults = resultSet.getResults().size();
    assertEquals(BaseAnalyzerConfigurationTest.NUM_DESIGN_ELEMENTS - 4, numResults);
    // check
    for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
        CompositeSequence probe = r.getProbe();
        Double pvalue = r.getPvalue();
        // Double stat = probeAnalysisResult.getEffectSize();
        log.debug("probe: " + probe + "; p-value: " + pvalue);
        assertNotNull(pvalue);
        switch(probe.getName()) {
            case "probe_0":
                assertEquals(0.03505, pvalue, 0.00001);
                break;
            case "probe_16":
                assertEquals(0.03476, pvalue, 0.0001);
                break;
            case "probe_17":
                assertEquals(0.03578, pvalue, 0.0001);
                break;
            case "probe_75":
                assertEquals(0.8897, pvalue, 0.0001);
                // assertEquals( -0.1507, stat, 0.0001 );
                break;
            case "probe_94":
                assertEquals(0.002717, pvalue, 0.0001);
                // assertEquals( 6.6087, stat, 0.001 );
                break;
        }
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 25 with FactorValue

use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.

the class CharacteristicServiceTest method setup.

@Before
public void setup() {
    ee = this.getTestPersistentBasicExpressionExperiment();
    ee.setCharacteristics(this.getTestPersistentCharacteristics(2));
    Characteristic[] eeChars = ee.getCharacteristics().toArray(new Characteristic[0]);
    eeChar1 = eeChars[0];
    eeChar2 = eeChars[1];
    eeService.update(ee);
    BioAssay ba = ee.getBioAssays().toArray(new BioAssay[0])[0];
    BioMaterial bm = ba.getSampleUsed();
    bm.setCharacteristics(this.getTestPersistentCharacteristics(1));
    bmService.update(bm);
    for (ExperimentalFactor ef : testHelper.getExperimentalFactors(ee.getExperimentalDesign())) {
        eeService.addFactor(ee, ef);
    }
    ExperimentalFactor ef = ee.getExperimentalDesign().getExperimentalFactors().iterator().next();
    for (FactorValue f : testHelper.getFactorValues(ef)) {
        eeService.addFactorValue(ee, f);
    }
    FactorValue fv = ef.getFactorValues().iterator().next();
    fv.setCharacteristics(this.getTestPersistentCharacteristics(1));
    fvService.update(fv);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) FactorValue(ubic.gemma.model.expression.experiment.FactorValue) ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Before(org.junit.Before)

Aggregations

FactorValue (ubic.gemma.model.expression.experiment.FactorValue)55 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)30 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)27 Test (org.junit.Test)12 VocabCharacteristic (ubic.gemma.model.common.description.VocabCharacteristic)8 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)8 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)8 HashSet (java.util.HashSet)7 Characteristic (ubic.gemma.model.common.description.Characteristic)6 DifferentialExpressionAnalysis (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis)5 ArrayList (java.util.ArrayList)4 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)4 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)4 FactorValueValueObject (ubic.gemma.model.expression.experiment.FactorValueValueObject)4 StopWatch (org.apache.commons.lang3.time.StopWatch)3 DifferentialExpressionAnalysisResult (ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult)3 ExpressionAnalysisResultSet (ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet)3 AnnotationValueObject (ubic.gemma.model.common.description.AnnotationValueObject)3 Measurement (ubic.gemma.model.common.measurement.Measurement)3 BioAssaySet (ubic.gemma.model.expression.experiment.BioAssaySet)3