use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.
the class SVDServiceHelperImpl method populateBMFMap.
public static void populateBMFMap(Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap, BioMaterial bm) {
for (FactorValue fv : bm.getFactorValues()) {
ExperimentalFactor experimentalFactor = fv.getExperimentalFactor();
if (!bioMaterialFactorMap.containsKey(experimentalFactor)) {
bioMaterialFactorMap.put(experimentalFactor, new HashMap<Long, Double>());
}
double valueToStore;
if (fv.getMeasurement() != null) {
try {
valueToStore = Double.parseDouble(fv.getMeasurement().getValue());
} catch (NumberFormatException e) {
SVDServiceHelperImpl.log.warn("Measurement wasn't a number for " + fv);
valueToStore = Double.NaN;
}
} else {
/*
* This is a hack. We're storing the ID but as a double.
*/
valueToStore = fv.getId().doubleValue();
}
bioMaterialFactorMap.get(experimentalFactor).put(bm.getId(), valueToStore);
}
}
use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.
the class BatchConfound method factorBatchConfoundTest.
private static Collection<BatchConfoundValueObject> factorBatchConfoundTest(ExpressionExperiment ee, Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap) throws IllegalArgumentException {
Map<Long, Long> batchMembership = new HashMap<>();
ExperimentalFactor batchFactor = null;
Map<Long, Integer> batchIndexes = new HashMap<>();
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ExperimentalDesignUtils.isBatch(ef)) {
batchFactor = ef;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(batchFactor);
if (bmToFv == null) {
log.warn("No biomaterial --> factor value map for batch factor: " + batchFactor);
continue;
}
int index = 0;
for (FactorValue fv : batchFactor.getFactorValues()) {
batchIndexes.put(fv.getId(), index++);
}
for (Long bmId : bmToFv.keySet()) {
batchMembership.put(bmId, bmToFv.get(bmId).longValue());
}
break;
}
}
Set<BatchConfoundValueObject> result = new HashSet<>();
if (batchFactor == null) {
return result;
}
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ef.equals(batchFactor))
continue;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(ef);
int numBioMaterials = bmToFv.keySet().size();
assert numBioMaterials > 0 : "No biomaterials for " + ef;
double p = Double.NaN;
double chiSquare;
int df;
int numBatches = batchFactor.getFactorValues().size();
if (ExperimentalDesignUtils.isContinuous(ef)) {
DoubleArrayList factorValues = new DoubleArrayList(numBioMaterials);
factorValues.setSize(numBioMaterials);
IntArrayList batches = new IntArrayList(numBioMaterials);
batches.setSize(numBioMaterials);
int j = 0;
for (Long bmId : bmToFv.keySet()) {
assert factorValues.size() > 0 : "Biomaterial to factorValue is empty for " + ef;
factorValues.set(j, bmToFv.get(bmId));
long batch = batchMembership.get(bmId);
batches.set(j, batchIndexes.get(batch));
j++;
}
p = KruskalWallis.test(factorValues, batches);
df = KruskalWallis.dof(factorValues, batches);
chiSquare = KruskalWallis.kwStatistic(factorValues, batches);
log.debug("KWallis\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
} else {
Map<Long, Integer> factorValueIndexes = new HashMap<>();
int index = 0;
for (FactorValue fv : ef.getFactorValues()) {
factorValueIndexes.put(fv.getId(), index++);
}
Map<Long, Long> factorValueMembership = new HashMap<>();
for (Long bmId : bmToFv.keySet()) {
factorValueMembership.put(bmId, bmToFv.get(bmId).longValue());
}
long[][] counts = new long[numBatches][ef.getFactorValues().size()];
for (int i = 0; i < batchIndexes.size(); i++) {
for (int j = 0; j < factorValueIndexes.size(); j++) {
counts[i][j] = 0;
}
}
for (Long bm : bmToFv.keySet()) {
long fv = factorValueMembership.get(bm);
Long batch = batchMembership.get(bm);
if (batch == null) {
log.warn("No batch membership for : " + bm);
continue;
}
int batchIndex = batchIndexes.get(batch);
int factorIndex = factorValueIndexes.get(fv);
counts[batchIndex][factorIndex]++;
}
ChiSquareTest cst = new ChiSquareTest();
try {
chiSquare = cst.chiSquare(counts);
} catch (IllegalArgumentException e) {
log.warn("IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e.getMessage());
chiSquare = Double.NaN;
}
df = (counts.length - 1) * (counts[0].length - 1);
ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
if (!Double.isNaN(chiSquare)) {
p = 1.0 - distribution.cumulativeProbability(chiSquare);
}
log.debug("ChiSq\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
}
BatchConfoundValueObject summary = new BatchConfoundValueObject(ee, ef, chiSquare, df, p, numBatches);
result.add(summary);
}
return result;
}
use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.
the class ExpressionExperimentBatchCorrectionServiceImpl method checkCorrectability.
@Override
public boolean checkCorrectability(ExpressionExperiment ee, boolean force) {
for (QuantitationType qt : expressionExperimentService.getQuantitationTypes(ee)) {
if (qt.getIsBatchCorrected()) {
ExpressionExperimentBatchCorrectionServiceImpl.log.warn("Experiment already has a batch-corrected quantitation type: " + ee + ": " + qt);
return false;
}
}
ExperimentalFactor batch = this.getBatchFactor(ee);
if (batch == null) {
ExpressionExperimentBatchCorrectionServiceImpl.log.warn("No batch factor found: " + ee);
return false;
}
String bConf = expressionExperimentService.getBatchConfound(ee);
if (bConf != null && !force) {
ExpressionExperimentBatchCorrectionServiceImpl.log.warn("Experiment can not be batch corrected: " + bConf);
ExpressionExperimentBatchCorrectionServiceImpl.log.info("To force batch-correction of a confounded experiment, use the force option (note, that this option also allows outliers while batch correcting).");
return false;
}
/*
* Make sure we have at least two samples per batch. This generally won't happen if batches were defined by
* Gemma.
*/
Map<Long, Integer> batches = new HashMap<>();
Set<BioMaterial> seen = new HashSet<>();
for (BioAssay ba : ee.getBioAssays()) {
BioMaterial bm = ba.getSampleUsed();
if (seen.contains(bm))
continue;
seen.add(bm);
for (FactorValue fv : bm.getFactorValues()) {
if (fv.getExperimentalFactor().equals(batch)) {
Long batchId = fv.getId();
if (!batches.containsKey(batchId))
batches.put(batchId, 0);
batches.put(batchId, batches.get(batchId) + 1);
}
}
}
/*
* consider merging batches. - we already do this when we create the batch factor, so in general batches should
* always have at least 2 samples
*/
for (Long batchId : batches.keySet()) {
if (batches.get(batchId) < 2) {
ExpressionExperimentBatchCorrectionServiceImpl.log.info("Batch with only one sample detected, correction not possible: " + ee + ", batchId=" + batchId);
return false;
}
}
return true;
}
use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.
the class TTestAnalyzerTest method testOneSampleTtest.
@Test
public void testOneSampleTtest() throws Exception {
if (!connected) {
log.warn("Could not establish R connection. Skipping test ...");
return;
}
this.configureVectors(super.biomaterials, "/data/stat-tests/onesample-ttest-data.txt");
this.configureMocks();
Collection<ExperimentalFactor> factors = new HashSet<>();
factors.add(super.experimentalFactorA_Area);
/*
* Remove factorValue from all the samples.
*/
Iterator<FactorValue> iterator = experimentalFactorA_Area.getFactorValues().iterator();
FactorValue toUse = iterator.next();
FactorValue toRemove = iterator.next();
experimentalFactorA_Area.getFactorValues().remove(toRemove);
for (BioMaterial bm : super.biomaterials) {
bm.getFactorValues().remove(toRemove);
bm.getFactorValues().add(toUse);
}
// must be for one-sample to make sense.
quantitationType.setIsRatio(true);
quantitationType.setScale(ScaleType.LOG2);
DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
config.setFactorsToInclude(factors);
Collection<DifferentialExpressionAnalysis> expressionAnalyses = analyzer.run(expressionExperiment, config);
DifferentialExpressionAnalysis expressionAnalysis = expressionAnalyses.iterator().next();
Collection<ExpressionAnalysisResultSet> resultSets = expressionAnalysis.getResultSets();
ExpressionAnalysisResultSet resultSet = resultSets.iterator().next();
assertEquals(null, resultSet.getBaselineGroup());
int numResults = resultSet.getResults().size();
assertEquals(BaseAnalyzerConfigurationTest.NUM_DESIGN_ELEMENTS - 4, numResults);
// check
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
CompositeSequence probe = r.getProbe();
Double pvalue = r.getPvalue();
// Double stat = probeAnalysisResult.getEffectSize();
log.debug("probe: " + probe + "; p-value: " + pvalue);
assertNotNull(pvalue);
switch(probe.getName()) {
case "probe_0":
assertEquals(0.03505, pvalue, 0.00001);
break;
case "probe_16":
assertEquals(0.03476, pvalue, 0.0001);
break;
case "probe_17":
assertEquals(0.03578, pvalue, 0.0001);
break;
case "probe_75":
assertEquals(0.8897, pvalue, 0.0001);
// assertEquals( -0.1507, stat, 0.0001 );
break;
case "probe_94":
assertEquals(0.002717, pvalue, 0.0001);
// assertEquals( 6.6087, stat, 0.001 );
break;
}
}
}
use of ubic.gemma.model.expression.experiment.FactorValue in project Gemma by PavlidisLab.
the class CharacteristicServiceTest method setup.
@Before
public void setup() {
ee = this.getTestPersistentBasicExpressionExperiment();
ee.setCharacteristics(this.getTestPersistentCharacteristics(2));
Characteristic[] eeChars = ee.getCharacteristics().toArray(new Characteristic[0]);
eeChar1 = eeChars[0];
eeChar2 = eeChars[1];
eeService.update(ee);
BioAssay ba = ee.getBioAssays().toArray(new BioAssay[0])[0];
BioMaterial bm = ba.getSampleUsed();
bm.setCharacteristics(this.getTestPersistentCharacteristics(1));
bmService.update(bm);
for (ExperimentalFactor ef : testHelper.getExperimentalFactors(ee.getExperimentalDesign())) {
eeService.addFactor(ee, ef);
}
ExperimentalFactor ef = ee.getExperimentalDesign().getExperimentalFactors().iterator().next();
for (FactorValue f : testHelper.getFactorValues(ef)) {
eeService.addFactorValue(ee, f);
}
FactorValue fv = ef.getFactorValues().iterator().next();
fv.setCharacteristics(this.getTestPersistentCharacteristics(1));
fvService.update(fv);
}
Aggregations