use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method fixFactorsForSubset.
/**
* Remove factors which are no longer usable, based on the subset.
*/
private Collection<ExperimentalFactor> fixFactorsForSubset(ExpressionDataDoubleMatrix dmatrix, ExpressionExperimentSubSet eesubSet, List<ExperimentalFactor> factors) {
List<ExperimentalFactor> result = new ArrayList<>();
QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
ExperimentalFactor interceptFactor = this.determineInterceptFactor(factors, quantitationType);
/*
* Remove any constant factors, unless they are that intercept.
*/
for (ExperimentalFactor f : factors) {
if (f.getType().equals(FactorType.CONTINUOUS)) {
result.add(f);
} else if (interceptFactor != null && interceptFactor.equals(f)) {
result.add(f);
} else {
Collection<FactorValue> levels = new HashSet<>();
LinearModelAnalyzer.populateFactorValuesFromBASet(eesubSet, f, levels);
if (levels.size() > 1) {
result.add(f);
} else {
LinearModelAnalyzer.log.info("Dropping " + f + " from subset");
}
}
}
return result;
}
use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method doAnalysis.
/**
* @param bioAssaySet source data, could be a SubSet
* @param dmatrix data
* @param samplesUsed analyzed
* @param factors included in the model
* @param subsetFactorValue null unless analyzing a subset (only used for book-keeping)
* @return analysis, or null if there was a problem.
*/
private DifferentialExpressionAnalysis doAnalysis(BioAssaySet bioAssaySet, DifferentialExpressionAnalysisConfig config, ExpressionDataDoubleMatrix dmatrix, List<BioMaterial> samplesUsed, List<ExperimentalFactor> factors, FactorValue subsetFactorValue) {
// We may want to change this to fall back to running normally, though the real fix is to just finish the ebayes implementation.
if (config.getModerateStatistics() && dmatrix.hasMissingValues()) {
throw new UnsupportedOperationException("Ebayes cannot be used when there are values missing in the data");
}
if (factors.isEmpty()) {
LinearModelAnalyzer.log.error("Must provide at least one factor");
return null;
}
if (samplesUsed.size() <= factors.size()) {
LinearModelAnalyzer.log.error("Must have more samples than factors");
return null;
}
final Map<String, Collection<ExperimentalFactor>> label2Factors = this.getRNames(factors);
Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
this.dropIncompleteFactors(samplesUsed, factors, baselineConditions);
if (factors.isEmpty()) {
LinearModelAnalyzer.log.error("Must provide at least one factor; they were all removed due to incomplete values");
return null;
}
QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
ExperimentalFactor interceptFactor = this.determineInterceptFactor(factors, quantitationType);
/*
* Build our factor terms, with interactions handled specially
*/
List<String[]> interactionFactorLists = new ArrayList<>();
ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
config.setBaseLineFactorValues(baselineConditions);
boolean oneSampleTTest = interceptFactor != null && factors.size() == 1;
if (!oneSampleTTest) {
this.buildModelFormula(config, label2Factors, interactionFactorLists);
}
// calculate library size before log transformation (FIXME we compute this twice)
DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
/*
* FIXME: remove columns that are marked as outliers.
*/
dmatrix = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform(quantitationType, dmatrix);
DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
if (LinearModelAnalyzer.log.isDebugEnabled())
this.outputForDebugging(dmatrix, designMatrix);
/*
* PREPARATION FOR 'NATIVE' FITTING
*/
DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
DesignMatrix properDesignMatrix = this.makeDesignMatrix(designMatrix, interactionFactorLists, baselineConditions);
/*
* Run the analysis NOTE this can be simplified if we strip out R code.
*/
final Map<String, LinearModelSummary> rawResults = this.runAnalysis(namedMatrix, sNamedMatrix, properDesignMatrix, librarySize, config);
if (rawResults.size() == 0) {
LinearModelAnalyzer.log.error("Got no results from the analysis");
return null;
}
/*
* Initialize data structures we need to hold results.
*/
// this used to be a Set, but a List is much faster.
Map<String, List<DifferentialExpressionAnalysisResult>> resultLists = new HashMap<>();
Map<String, List<Double>> pvaluesForQvalue = new HashMap<>();
for (String factorName : label2Factors.keySet()) {
resultLists.put(factorName, new ArrayList<DifferentialExpressionAnalysisResult>());
pvaluesForQvalue.put(factorName, new ArrayList<Double>());
}
for (String[] fs : interactionFactorLists) {
String intF = StringUtils.join(fs, ":");
resultLists.put(intF, new ArrayList<DifferentialExpressionAnalysisResult>());
pvaluesForQvalue.put(intF, new ArrayList<Double>());
}
if (pvaluesForQvalue.isEmpty()) {
LinearModelAnalyzer.log.warn("No results were obtained for the current stage of analysis.");
return null;
}
/*
* Create result objects for each model fit. Keeping things in order is important.
*/
final Transformer rowNameExtractor = TransformerUtils.invokerTransformer("getId");
boolean warned = false;
int notUsable = 0;
int processed = 0;
for (CompositeSequence el : namedMatrix.getRowNames()) {
if (++processed % 15000 == 0) {
LinearModelAnalyzer.log.info("Processed results for " + processed + " elements ...");
}
LinearModelSummary lm = rawResults.get(rowNameExtractor.transform(el).toString());
if (LinearModelAnalyzer.log.isDebugEnabled())
LinearModelAnalyzer.log.debug(el.getName() + "\n" + lm);
if (lm == null) {
if (!warned) {
LinearModelAnalyzer.log.warn("No result for " + el + ", further warnings suppressed");
warned = true;
}
notUsable++;
continue;
}
for (String factorName : label2Factors.keySet()) {
if (!pvaluesForQvalue.containsKey(factorName)) {
// was dropped.
continue;
}
Double overallPValue;
DifferentialExpressionAnalysisResult probeAnalysisResult = DifferentialExpressionAnalysisResult.Factory.newInstance();
probeAnalysisResult.setProbe(el);
if (lm.getCoefficients() == null) {
// probeAnalysisResult.setPvalue( null );
// pvaluesForQvalue.get( factorName ).add( overallPValue );
// resultLists.get( factorName ).add( probeAnalysisResult );
notUsable++;
continue;
}
Collection<ExperimentalFactor> factorsForName = label2Factors.get(factorName);
if (factorsForName.size() > 1) {
/*
* Interactions
*/
if (factorsForName.size() > 2) {
LinearModelAnalyzer.log.error("Handling more than two-way interactions is not implemented");
return null;
}
assert factorName.contains(":");
String[] factorNames = StringUtils.split(factorName, ":");
assert factorNames.length == factorsForName.size();
overallPValue = lm.getInteractionEffectP(factorNames);
if (overallPValue != null && !Double.isNaN(overallPValue)) {
Map<String, Double> interactionContrastTStats = lm.getContrastTStats(factorName);
Map<String, Double> interactionContrastCoeffs = lm.getContrastCoefficients(factorName);
Map<String, Double> interactionContrastPValues = lm.getContrastPValues(factorName);
for (String term : interactionContrastPValues.keySet()) {
Double contrastPvalue = interactionContrastPValues.get(term);
this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue, interactionContrastTStats, interactionContrastCoeffs);
}
} else {
if (!warned) {
LinearModelAnalyzer.log.warn("Interaction could not be computed for " + el + ", further warnings suppressed");
warned = true;
}
if (LinearModelAnalyzer.log.isDebugEnabled())
LinearModelAnalyzer.log.debug("Interaction could not be computed for " + el + ", further warnings suppressed");
// will over count?
notUsable++;
continue;
}
} else {
/*
* Main effect
*/
assert factorsForName.size() == 1;
ExperimentalFactor experimentalFactor = factorsForName.iterator().next();
if (interceptFactor != null && factorsForName.size() == 1 && experimentalFactor.equals(interceptFactor)) {
overallPValue = lm.getInterceptP();
} else {
overallPValue = lm.getMainEffectP(factorName);
}
/*
* Add contrasts unless overall pvalue is NaN
*/
if (overallPValue != null && !Double.isNaN(overallPValue)) {
Map<String, Double> mainEffectContrastTStats = lm.getContrastTStats(factorName);
Map<String, Double> mainEffectContrastPvalues = lm.getContrastPValues(factorName);
Map<String, Double> mainEffectContrastCoeffs = lm.getContrastCoefficients(factorName);
for (String term : mainEffectContrastPvalues.keySet()) {
Double contrastPvalue = mainEffectContrastPvalues.get(term);
this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue, mainEffectContrastTStats, mainEffectContrastCoeffs);
}
} else {
if (!warned) {
LinearModelAnalyzer.log.warn("ANOVA could not be done for " + experimentalFactor + " on " + el + ", further warnings suppressed");
warned = true;
}
if (LinearModelAnalyzer.log.isDebugEnabled())
LinearModelAnalyzer.log.debug("ANOVA could not be done for " + experimentalFactor + " on " + el);
// will over count?
notUsable++;
continue;
}
}
assert !Double.isNaN(overallPValue) : "We should not be keeping non-number pvalues (null or NaNs)";
probeAnalysisResult.setPvalue(this.nan2Null(overallPValue));
pvaluesForQvalue.get(factorName).add(overallPValue);
resultLists.get(factorName).add(probeAnalysisResult);
}
// over terms
}
if (notUsable > 0) {
LinearModelAnalyzer.log.info(notUsable + " elements or results were not usable - model could not be fit, etc.");
}
this.getRanksAndQvalues(resultLists, pvaluesForQvalue);
DifferentialExpressionAnalysis expressionAnalysis = this.makeAnalysisEntity(bioAssaySet, config, label2Factors, baselineConditions, interceptFactor, interactionFactorLists, oneSampleTTest, resultLists, subsetFactorValue);
LinearModelAnalyzer.log.info("Analysis processing phase done ...");
return expressionAnalysis;
}
use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.
the class MeanVarianceServiceImpl method create.
@Override
public MeanVarianceRelation create(ExpressionExperiment ee, boolean forceRecompute) {
if (ee == null) {
log.warn("Experiment is null");
return null;
}
log.info("Starting mean-variance computation");
MeanVarianceRelation mvr = ee.getMeanVarianceRelation();
if (forceRecompute || mvr == null) {
log.info("Recomputing mean-variance");
ee = expressionExperimentService.thawLiter(ee);
mvr = ee.getMeanVarianceRelation();
ExpressionDataDoubleMatrix intensities = meanVarianceServiceHelper.getIntensities(ee);
if (intensities == null) {
throw new IllegalStateException("Could not locate intensity matrix for " + ee.getShortName());
}
Collection<QuantitationType> qtList = expressionExperimentService.getPreferredQuantitationType(ee);
QuantitationType qt;
if (qtList.size() == 0) {
log.error("Did not find any preferred quantitation type. Mean-variance relation was not computed.");
} else {
qt = qtList.iterator().next();
if (qtList.size() > 1) {
// Really this should be an error condition.
log.warn("Found more than one preferred quantitation type. Only the first preferred quantitation type (" + qt + ") will be used.");
}
try {
intensities = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform(qt, intensities);
} catch (UnknownLogScaleException e) {
log.warn("Problem log transforming data. Check that the appropriate log scale is used. Mean-variance will be computed as is.");
}
mvr = calculateMeanVariance(intensities, mvr);
meanVarianceServiceHelper.createMeanVariance(ee, mvr);
}
}
log.info("Mean-variance computation is complete");
return mvr;
}
use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method getVectors.
/**
* Get the current set of vectors that need to be updated.
*
* @param expExp ee
* @param qts - only used to check for problems.
* @param allOldBioAssayDims old BA dims
* @return map
*/
private Map<QuantitationType, Collection<RawExpressionDataVector>> getVectors(ExpressionExperiment expExp, Collection<QuantitationType> qts, Collection<BioAssayDimension> allOldBioAssayDims) {
Collection<RawExpressionDataVector> oldVectors = new HashSet<>();
for (BioAssayDimension dim : allOldBioAssayDims) {
oldVectors.addAll(rawExpressionDataVectorService.find(dim));
}
if (oldVectors.isEmpty()) {
throw new IllegalStateException("No vectors");
}
rawExpressionDataVectorService.thaw(oldVectors);
Map<QuantitationType, Collection<RawExpressionDataVector>> qt2Vec = new HashMap<>();
Collection<QuantitationType> qtsToAdd = new HashSet<>();
for (RawExpressionDataVector v : oldVectors) {
QuantitationType qt = v.getQuantitationType();
if (!qts.contains(qt)) {
/*
* Guard against QTs that are broken. Sometimes the QTs for the EE don't include the ones that the DEDVs
* have, due to corruption.
*/
qtsToAdd.add(qt);
}
if (!qt2Vec.containsKey(qt)) {
qt2Vec.put(qt, new HashSet<RawExpressionDataVector>());
}
qt2Vec.get(qt).add(v);
}
if (!qtsToAdd.isEmpty()) {
expExp.getQuantitationTypes().addAll(qtsToAdd);
VectorMergingServiceImpl.log.info("Adding " + qtsToAdd.size() + " missing quantitation types to experiment");
expressionExperimentService.update(expExp);
}
return qt2Vec;
}
use of ubic.gemma.model.common.quantitationtype.QuantitationType in project Gemma by PavlidisLab.
the class VectorMergingServiceImpl method print.
@SuppressWarnings("unused")
private void print(Collection<DesignElementDataVector> newVectors) {
StringBuilder buf = new StringBuilder();
ByteArrayConverter conv = new ByteArrayConverter();
for (DesignElementDataVector vector : newVectors) {
buf.append(vector.getDesignElement());
QuantitationType qtype = vector.getQuantitationType();
if (qtype.getRepresentation().equals(PrimitiveType.DOUBLE)) {
double[] vals = conv.byteArrayToDoubles(vector.getData());
for (double d : vals) {
buf.append("\t").append(d);
}
} else if (qtype.getRepresentation().equals(PrimitiveType.INT)) {
int[] vals = conv.byteArrayToInts(vector.getData());
for (int i : vals) {
buf.append("\t").append(i);
}
} else if (qtype.getRepresentation().equals(PrimitiveType.BOOLEAN)) {
boolean[] vals = conv.byteArrayToBooleans(vector.getData());
for (boolean d : vals) {
buf.append("\t").append(d);
}
} else if (qtype.getRepresentation().equals(PrimitiveType.STRING)) {
String[] vals = conv.byteArrayToStrings(vector.getData());
for (String d : vals) {
buf.append("\t").append(d);
}
}
buf.append("\n");
}
VectorMergingServiceImpl.log.info("\n" + buf);
}
Aggregations