use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class AffyProbeReaderTest method getProbeMatchingName.
private CompositeSequence getProbeMatchingName(String name) {
Collection<CompositeSequence> keySet = apr.getKeySet();
CompositeSequence cs = null;
for (CompositeSequence compositeSequence : keySet) {
if (compositeSequence.getName().equals(name)) {
cs = compositeSequence;
break;
}
}
return cs;
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class TwoWayAnovaWithInteractionsAnalyzerTest method checkResults.
/**
* @param resultSet the result set to check
*/
private void checkResults(ExpressionAnalysisResultSet resultSet) {
Collection<ExperimentalFactor> factors = resultSet.getExperimentalFactors();
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
CompositeSequence probe = r.getProbe();
Double pvalue = r.getPvalue();
pvalue = pvalue == null ? Double.NaN : pvalue;
Double qvalue = r.getCorrectedPvalue();
assertNotNull(probe);
if (factors.size() == 1) {
ExperimentalFactor f = factors.iterator().next();
if (f.equals(super.experimentalFactorA_Area)) {
assertEquals(factorValueA2, resultSet.getBaselineGroup());
switch(probe.getName()) {
case "probe_98":
assertEquals(0.8769, pvalue, 0.001);
assertNotNull(qvalue);
break;
case "probe_10":
assertEquals(5.158e-10, pvalue, 1e-12);
break;
case "probe_4":
assertEquals(0.0048, pvalue, 0.0001);
break;
}
} else {
assertEquals(factorValueB2, resultSet.getBaselineGroup());
if (probe.getName().equals("probe_98")) {
assertEquals(0.6888, pvalue, 0.001);
} else if (probe.getName().equals("probe_10")) {
assertEquals(0.07970, pvalue, 0.00001);
}
}
} else {
assertEquals(null, resultSet.getBaselineGroup());
switch(probe.getName()) {
case "probe_98":
assertEquals(0.7893, pvalue, 0.001);
break;
case "probe_10":
assertEquals(0.04514, pvalue, 0.00001);
break;
case "probe_4":
assertEquals(Double.NaN, pvalue, 0.00001);
break;
}
}
}
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class TwoWayAnovaWithoutInteractionsAnalyzerTest method checkResults.
/**
* @param resultSet the result set to check
*/
private void checkResults(ExpressionAnalysisResultSet resultSet) {
Collection<ExperimentalFactor> factors = resultSet.getExperimentalFactors();
assertEquals("Should not have an interaction term", 1, factors.size());
ExperimentalFactor f = factors.iterator().next();
boolean found = false;
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
CompositeSequence probe = r.getProbe();
Double pvalue = r.getPvalue();
if (f.equals(super.experimentalFactorB) && probe.getName().equals("probe_1")) {
assertEquals(0.501040, pvalue, 0.001);
found = true;
}
Collection<ContrastResult> contrasts = r.getContrasts();
Double stat;
if (contrasts.isEmpty()) {
continue;
}
stat = contrasts.iterator().next().getTstat();
assertNotNull(probe);
if (f.equals(super.experimentalFactorA_Area)) {
assertEquals(factorValueA2, resultSet.getBaselineGroup());
switch(probe.getName()) {
case // id=1001
"probe_1":
assertEquals(0.001814, pvalue, 0.00001);
assertNotNull(stat);
assertEquals(-287.061, stat, 0.001);
found = true;
break;
case // id 1097
"probe_97":
assertEquals(0.3546, pvalue, 0.001);
break;
case "probe_0":
assertEquals(1.36e-12, pvalue, 1e-10);
assertNotNull(stat);
assertEquals(-425.3, stat, 0.1);
break;
}
} else {
assertEquals(factorValueB2, resultSet.getBaselineGroup());
if (probe.getName().equals("probe_97")) {
assertEquals(0.4449, pvalue, 0.001);
}
}
}
assertTrue("Did not find expected results for probe_1", found);
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class ArrayDesignSequenceProcessorTest method testFetchAndLoadWithIdentifiers.
@Test
public void testFetchAndLoadWithIdentifiers() throws Exception {
String fastacmdExe = Settings.getString(SimpleFastaCmd.FASTA_CMD_ENV_VAR);
if (fastacmdExe == null) {
log.warn("No fastacmd executable is configured, skipping test");
return;
}
File fi = new File(fastacmdExe);
if (!fi.canRead()) {
log.warn(fastacmdExe + " not found, skipping test");
return;
}
GeoService geoService = this.getBean(GeoService.class);
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
@SuppressWarnings("unchecked") final Collection<ArrayDesign> ads = (Collection<ArrayDesign>) geoService.fetchAndLoad("GPL226", true, true, false, true, true);
result = ads.iterator().next();
result = arrayDesignService.thaw(result);
// have to specify taxon as this has two taxons in it
try (InputStream f = this.getClass().getResourceAsStream("/data/loader/expression/arrayDesign/identifierTest.txt")) {
Collection<BioSequence> res = app.processArrayDesign(result, f, new String[] { "testblastdb", "testblastdbPartTwo" }, FileTools.resourceToPath("/data/loader/genome/blast"), taxon, true);
assertNotNull(res);
for (BioSequence sequence : res) {
assertNotNull(sequence.getSequence());
}
for (CompositeSequence cs : result.getCompositeSequences()) {
assert cs.getBiologicalCharacteristic() != null;
}
}
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class LinearModelAnalyzer method doAnalysis.
/**
* @param bioAssaySet source data, could be a SubSet
* @param dmatrix data
* @param samplesUsed analyzed
* @param factors included in the model
* @param subsetFactorValue null unless analyzing a subset (only used for book-keeping)
* @return analysis, or null if there was a problem.
*/
private DifferentialExpressionAnalysis doAnalysis(BioAssaySet bioAssaySet, DifferentialExpressionAnalysisConfig config, ExpressionDataDoubleMatrix dmatrix, List<BioMaterial> samplesUsed, List<ExperimentalFactor> factors, FactorValue subsetFactorValue) {
// We may want to change this to fall back to running normally, though the real fix is to just finish the ebayes implementation.
if (config.getModerateStatistics() && dmatrix.hasMissingValues()) {
throw new UnsupportedOperationException("Ebayes cannot be used when there are values missing in the data");
}
if (factors.isEmpty()) {
LinearModelAnalyzer.log.error("Must provide at least one factor");
return null;
}
if (samplesUsed.size() <= factors.size()) {
LinearModelAnalyzer.log.error("Must have more samples than factors");
return null;
}
final Map<String, Collection<ExperimentalFactor>> label2Factors = this.getRNames(factors);
Map<ExperimentalFactor, FactorValue> baselineConditions = ExperimentalDesignUtils.getBaselineConditions(samplesUsed, factors);
this.dropIncompleteFactors(samplesUsed, factors, baselineConditions);
if (factors.isEmpty()) {
LinearModelAnalyzer.log.error("Must provide at least one factor; they were all removed due to incomplete values");
return null;
}
QuantitationType quantitationType = dmatrix.getQuantitationTypes().iterator().next();
ExperimentalFactor interceptFactor = this.determineInterceptFactor(factors, quantitationType);
/*
* Build our factor terms, with interactions handled specially
*/
List<String[]> interactionFactorLists = new ArrayList<>();
ObjectMatrix<String, String, Object> designMatrix = ExperimentalDesignUtils.buildDesignMatrix(factors, samplesUsed, baselineConditions);
config.setBaseLineFactorValues(baselineConditions);
boolean oneSampleTTest = interceptFactor != null && factors.size() == 1;
if (!oneSampleTTest) {
this.buildModelFormula(config, label2Factors, interactionFactorLists);
}
// calculate library size before log transformation (FIXME we compute this twice)
DoubleMatrix1D librarySize = MatrixStats.colSums(dmatrix.getMatrix());
/*
* FIXME: remove columns that are marked as outliers.
*/
dmatrix = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform(quantitationType, dmatrix);
DoubleMatrix<CompositeSequence, BioMaterial> namedMatrix = dmatrix.getMatrix();
if (LinearModelAnalyzer.log.isDebugEnabled())
this.outputForDebugging(dmatrix, designMatrix);
/*
* PREPARATION FOR 'NATIVE' FITTING
*/
DoubleMatrix<String, String> sNamedMatrix = this.makeDataMatrix(designMatrix, namedMatrix);
DesignMatrix properDesignMatrix = this.makeDesignMatrix(designMatrix, interactionFactorLists, baselineConditions);
/*
* Run the analysis NOTE this can be simplified if we strip out R code.
*/
final Map<String, LinearModelSummary> rawResults = this.runAnalysis(namedMatrix, sNamedMatrix, properDesignMatrix, librarySize, config);
if (rawResults.size() == 0) {
LinearModelAnalyzer.log.error("Got no results from the analysis");
return null;
}
/*
* Initialize data structures we need to hold results.
*/
// this used to be a Set, but a List is much faster.
Map<String, List<DifferentialExpressionAnalysisResult>> resultLists = new HashMap<>();
Map<String, List<Double>> pvaluesForQvalue = new HashMap<>();
for (String factorName : label2Factors.keySet()) {
resultLists.put(factorName, new ArrayList<DifferentialExpressionAnalysisResult>());
pvaluesForQvalue.put(factorName, new ArrayList<Double>());
}
for (String[] fs : interactionFactorLists) {
String intF = StringUtils.join(fs, ":");
resultLists.put(intF, new ArrayList<DifferentialExpressionAnalysisResult>());
pvaluesForQvalue.put(intF, new ArrayList<Double>());
}
if (pvaluesForQvalue.isEmpty()) {
LinearModelAnalyzer.log.warn("No results were obtained for the current stage of analysis.");
return null;
}
/*
* Create result objects for each model fit. Keeping things in order is important.
*/
final Transformer rowNameExtractor = TransformerUtils.invokerTransformer("getId");
boolean warned = false;
int notUsable = 0;
int processed = 0;
for (CompositeSequence el : namedMatrix.getRowNames()) {
if (++processed % 15000 == 0) {
LinearModelAnalyzer.log.info("Processed results for " + processed + " elements ...");
}
LinearModelSummary lm = rawResults.get(rowNameExtractor.transform(el).toString());
if (LinearModelAnalyzer.log.isDebugEnabled())
LinearModelAnalyzer.log.debug(el.getName() + "\n" + lm);
if (lm == null) {
if (!warned) {
LinearModelAnalyzer.log.warn("No result for " + el + ", further warnings suppressed");
warned = true;
}
notUsable++;
continue;
}
for (String factorName : label2Factors.keySet()) {
if (!pvaluesForQvalue.containsKey(factorName)) {
// was dropped.
continue;
}
Double overallPValue;
DifferentialExpressionAnalysisResult probeAnalysisResult = DifferentialExpressionAnalysisResult.Factory.newInstance();
probeAnalysisResult.setProbe(el);
if (lm.getCoefficients() == null) {
// probeAnalysisResult.setPvalue( null );
// pvaluesForQvalue.get( factorName ).add( overallPValue );
// resultLists.get( factorName ).add( probeAnalysisResult );
notUsable++;
continue;
}
Collection<ExperimentalFactor> factorsForName = label2Factors.get(factorName);
if (factorsForName.size() > 1) {
/*
* Interactions
*/
if (factorsForName.size() > 2) {
LinearModelAnalyzer.log.error("Handling more than two-way interactions is not implemented");
return null;
}
assert factorName.contains(":");
String[] factorNames = StringUtils.split(factorName, ":");
assert factorNames.length == factorsForName.size();
overallPValue = lm.getInteractionEffectP(factorNames);
if (overallPValue != null && !Double.isNaN(overallPValue)) {
Map<String, Double> interactionContrastTStats = lm.getContrastTStats(factorName);
Map<String, Double> interactionContrastCoeffs = lm.getContrastCoefficients(factorName);
Map<String, Double> interactionContrastPValues = lm.getContrastPValues(factorName);
for (String term : interactionContrastPValues.keySet()) {
Double contrastPvalue = interactionContrastPValues.get(term);
this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue, interactionContrastTStats, interactionContrastCoeffs);
}
} else {
if (!warned) {
LinearModelAnalyzer.log.warn("Interaction could not be computed for " + el + ", further warnings suppressed");
warned = true;
}
if (LinearModelAnalyzer.log.isDebugEnabled())
LinearModelAnalyzer.log.debug("Interaction could not be computed for " + el + ", further warnings suppressed");
// will over count?
notUsable++;
continue;
}
} else {
/*
* Main effect
*/
assert factorsForName.size() == 1;
ExperimentalFactor experimentalFactor = factorsForName.iterator().next();
if (interceptFactor != null && factorsForName.size() == 1 && experimentalFactor.equals(interceptFactor)) {
overallPValue = lm.getInterceptP();
} else {
overallPValue = lm.getMainEffectP(factorName);
}
/*
* Add contrasts unless overall pvalue is NaN
*/
if (overallPValue != null && !Double.isNaN(overallPValue)) {
Map<String, Double> mainEffectContrastTStats = lm.getContrastTStats(factorName);
Map<String, Double> mainEffectContrastPvalues = lm.getContrastPValues(factorName);
Map<String, Double> mainEffectContrastCoeffs = lm.getContrastCoefficients(factorName);
for (String term : mainEffectContrastPvalues.keySet()) {
Double contrastPvalue = mainEffectContrastPvalues.get(term);
this.makeContrast(probeAnalysisResult, factorsForName, term, factorName, contrastPvalue, mainEffectContrastTStats, mainEffectContrastCoeffs);
}
} else {
if (!warned) {
LinearModelAnalyzer.log.warn("ANOVA could not be done for " + experimentalFactor + " on " + el + ", further warnings suppressed");
warned = true;
}
if (LinearModelAnalyzer.log.isDebugEnabled())
LinearModelAnalyzer.log.debug("ANOVA could not be done for " + experimentalFactor + " on " + el);
// will over count?
notUsable++;
continue;
}
}
assert !Double.isNaN(overallPValue) : "We should not be keeping non-number pvalues (null or NaNs)";
probeAnalysisResult.setPvalue(this.nan2Null(overallPValue));
pvaluesForQvalue.get(factorName).add(overallPValue);
resultLists.get(factorName).add(probeAnalysisResult);
}
// over terms
}
if (notUsable > 0) {
LinearModelAnalyzer.log.info(notUsable + " elements or results were not usable - model could not be fit, etc.");
}
this.getRanksAndQvalues(resultLists, pvaluesForQvalue);
DifferentialExpressionAnalysis expressionAnalysis = this.makeAnalysisEntity(bioAssaySet, config, label2Factors, baselineConditions, interceptFactor, interactionFactorLists, oneSampleTTest, resultLists, subsetFactorValue);
LinearModelAnalyzer.log.info("Analysis processing phase done ...");
return expressionAnalysis;
}
Aggregations