use of ubic.gemma.model.expression.experiment.ExperimentalFactor in project Gemma by PavlidisLab.
the class SVDServiceHelperImpl method getImportantFactors.
@Override
public Set<ExperimentalFactor> getImportantFactors(ExpressionExperiment ee, Collection<ExperimentalFactor> experimentalFactors, Double importanceThreshold) {
Set<ExperimentalFactor> importantFactors = new HashSet<>();
if (experimentalFactors.isEmpty()) {
return importantFactors;
}
Map<Long, ExperimentalFactor> factors = EntityUtils.getIdMap(experimentalFactors);
SVDValueObject svdFactorAnalysis = this.svdFactorAnalysis(ee);
if (svdFactorAnalysis == null) {
return importantFactors;
}
Map<Integer, Map<Long, Double>> factorPVals = svdFactorAnalysis.getFactorPvals();
for (Integer cmp : factorPVals.keySet()) {
Map<Long, Double> factorPv = factorPVals.get(cmp);
for (Long efId : factorPv.keySet()) {
Double pvalue = factorPv.get(efId);
ExperimentalFactor ef = factors.get(efId);
if (pvalue < importanceThreshold) {
assert factors.containsKey(efId);
SVDServiceHelperImpl.log.info(ef + " retained at p=" + String.format("%.2g", pvalue) + " for PC" + cmp);
importantFactors.add(ef);
} else {
SVDServiceHelperImpl.log.info(ef + " not retained at p=" + String.format("%.2g", pvalue) + " for PC" + cmp);
}
}
}
return importantFactors;
}
use of ubic.gemma.model.expression.experiment.ExperimentalFactor in project Gemma by PavlidisLab.
the class SVDServiceHelperImpl method svdFactorAnalysis.
@Override
public SVDValueObject svdFactorAnalysis(PrincipalComponentAnalysis pca) {
BioAssayDimension bad = pca.getBioAssayDimension();
List<BioAssay> bioAssays = bad.getBioAssays();
SVDValueObject svo;
try {
svo = new SVDValueObject(pca);
} catch (Exception e) {
SVDServiceHelperImpl.log.error(e.getLocalizedMessage());
return null;
}
Map<Long, Date> bioMaterialDates = new HashMap<>();
Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap = new HashMap<>();
this.prepareForFactorComparisons(svo, bioAssays, bioMaterialDates, bioMaterialFactorMap);
if (bioMaterialDates.isEmpty() && bioMaterialFactorMap.isEmpty()) {
SVDServiceHelperImpl.log.warn("No factor or date information to compare to the eigenGenes");
return svo;
}
Long[] svdBioMaterials = svo.getBioMaterialIds();
svo.getDateCorrelations().clear();
svo.getFactorCorrelations().clear();
svo.getDates().clear();
svo.getFactors().clear();
for (int componentNumber = 0; componentNumber < Math.min(svo.getvMatrix().columns(), SVDServiceHelperImpl.MAX_EIGEN_GENES_TO_TEST); componentNumber++) {
this.analyzeComponent(svo, componentNumber, svo.getvMatrix(), bioMaterialDates, bioMaterialFactorMap, svdBioMaterials);
}
return svo;
}
use of ubic.gemma.model.expression.experiment.ExperimentalFactor in project Gemma by PavlidisLab.
the class SVDServiceHelperImpl method populateBMFMap.
public static void populateBMFMap(Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap, BioMaterial bm) {
for (FactorValue fv : bm.getFactorValues()) {
ExperimentalFactor experimentalFactor = fv.getExperimentalFactor();
if (!bioMaterialFactorMap.containsKey(experimentalFactor)) {
bioMaterialFactorMap.put(experimentalFactor, new HashMap<Long, Double>());
}
double valueToStore;
if (fv.getMeasurement() != null) {
try {
valueToStore = Double.parseDouble(fv.getMeasurement().getValue());
} catch (NumberFormatException e) {
SVDServiceHelperImpl.log.warn("Measurement wasn't a number for " + fv);
valueToStore = Double.NaN;
}
} else {
/*
* This is a hack. We're storing the ID but as a double.
*/
valueToStore = fv.getId().doubleValue();
}
bioMaterialFactorMap.get(experimentalFactor).put(bm.getId(), valueToStore);
}
}
use of ubic.gemma.model.expression.experiment.ExperimentalFactor in project Gemma by PavlidisLab.
the class BatchConfound method getBioMaterialFactorMap.
private static Map<ExperimentalFactor, Map<Long, Double>> getBioMaterialFactorMap(ExpressionExperiment ee) {
Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap = new HashMap<>();
for (BioAssay bioAssay : ee.getBioAssays()) {
BioMaterial bm = bioAssay.getSampleUsed();
SVDServiceHelperImpl.populateBMFMap(bioMaterialFactorMap, bm);
}
return bioMaterialFactorMap;
}
use of ubic.gemma.model.expression.experiment.ExperimentalFactor in project Gemma by PavlidisLab.
the class BatchConfound method factorBatchConfoundTest.
private static Collection<BatchConfoundValueObject> factorBatchConfoundTest(ExpressionExperiment ee, Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap) throws IllegalArgumentException {
Map<Long, Long> batchMembership = new HashMap<>();
ExperimentalFactor batchFactor = null;
Map<Long, Integer> batchIndexes = new HashMap<>();
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ExperimentalDesignUtils.isBatch(ef)) {
batchFactor = ef;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(batchFactor);
if (bmToFv == null) {
log.warn("No biomaterial --> factor value map for batch factor: " + batchFactor);
continue;
}
int index = 0;
for (FactorValue fv : batchFactor.getFactorValues()) {
batchIndexes.put(fv.getId(), index++);
}
for (Long bmId : bmToFv.keySet()) {
batchMembership.put(bmId, bmToFv.get(bmId).longValue());
}
break;
}
}
Set<BatchConfoundValueObject> result = new HashSet<>();
if (batchFactor == null) {
return result;
}
for (ExperimentalFactor ef : bioMaterialFactorMap.keySet()) {
if (ef.equals(batchFactor))
continue;
Map<Long, Double> bmToFv = bioMaterialFactorMap.get(ef);
int numBioMaterials = bmToFv.keySet().size();
assert numBioMaterials > 0 : "No biomaterials for " + ef;
double p = Double.NaN;
double chiSquare;
int df;
int numBatches = batchFactor.getFactorValues().size();
if (ExperimentalDesignUtils.isContinuous(ef)) {
DoubleArrayList factorValues = new DoubleArrayList(numBioMaterials);
factorValues.setSize(numBioMaterials);
IntArrayList batches = new IntArrayList(numBioMaterials);
batches.setSize(numBioMaterials);
int j = 0;
for (Long bmId : bmToFv.keySet()) {
assert factorValues.size() > 0 : "Biomaterial to factorValue is empty for " + ef;
factorValues.set(j, bmToFv.get(bmId));
long batch = batchMembership.get(bmId);
batches.set(j, batchIndexes.get(batch));
j++;
}
p = KruskalWallis.test(factorValues, batches);
df = KruskalWallis.dof(factorValues, batches);
chiSquare = KruskalWallis.kwStatistic(factorValues, batches);
log.debug("KWallis\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
} else {
Map<Long, Integer> factorValueIndexes = new HashMap<>();
int index = 0;
for (FactorValue fv : ef.getFactorValues()) {
factorValueIndexes.put(fv.getId(), index++);
}
Map<Long, Long> factorValueMembership = new HashMap<>();
for (Long bmId : bmToFv.keySet()) {
factorValueMembership.put(bmId, bmToFv.get(bmId).longValue());
}
long[][] counts = new long[numBatches][ef.getFactorValues().size()];
for (int i = 0; i < batchIndexes.size(); i++) {
for (int j = 0; j < factorValueIndexes.size(); j++) {
counts[i][j] = 0;
}
}
for (Long bm : bmToFv.keySet()) {
long fv = factorValueMembership.get(bm);
Long batch = batchMembership.get(bm);
if (batch == null) {
log.warn("No batch membership for : " + bm);
continue;
}
int batchIndex = batchIndexes.get(batch);
int factorIndex = factorValueIndexes.get(fv);
counts[batchIndex][factorIndex]++;
}
ChiSquareTest cst = new ChiSquareTest();
try {
chiSquare = cst.chiSquare(counts);
} catch (IllegalArgumentException e) {
log.warn("IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e.getMessage());
chiSquare = Double.NaN;
}
df = (counts.length - 1) * (counts[0].length - 1);
ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
if (!Double.isNaN(chiSquare)) {
p = 1.0 - distribution.cumulativeProbability(chiSquare);
}
log.debug("ChiSq\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName() + "\t" + String.format("%.2f", chiSquare) + "\t" + df + "\t" + String.format("%.2g", p) + "\t" + numBatches);
}
BatchConfoundValueObject summary = new BatchConfoundValueObject(ee, ef, chiSquare, df, p, numBatches);
result.add(summary);
}
return result;
}
Aggregations