use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.
the class DiffExTest method testGSE35930.
/**
* Test where probes have constant values. See bug 3177.
*/
@Test
public void testGSE35930() throws Exception {
ExpressionExperiment ee;
// eeService.remove( eeService.findByShortName( "GSE35930" ) );
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("GSE35930")));
Collection<?> results = geoService.fetchAndLoad("GSE35930", false, true, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
// OK.
if (e.getData() instanceof List) {
ee = (ExpressionExperiment) ((List<?>) e.getData()).iterator().next();
} else {
ee = (ExpressionExperiment) e.getData();
}
}
ee = this.eeService.thawLite(ee);
processedExpressionDataVectorService.computeProcessedExpressionData(ee);
if (ee.getExperimentalDesign().getExperimentalFactors().isEmpty()) {
ee = eeService.load(ee.getId());
ee = this.eeService.thawLite(ee);
try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/geo/GSE35930/design.txt")) {
experimentalDesignImporter.importDesign(ee, is);
}
ee = eeService.load(ee.getId());
ee = this.eeService.thawLite(ee);
}
DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
assertNotNull(analyses);
assertEquals(1, analyses.size());
DifferentialExpressionAnalysis results = analyses.iterator().next();
boolean found = false;
ExpressionAnalysisResultSet resultSet = results.getResultSets().iterator().next();
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
// this probe has a constant value
if (r.getProbe().getName().equals("1622910_at")) {
fail("Should not have found a result for constant probe");
// found = true;
// assertTrue( "Got: " + pvalue, pvalue == null || pvalue.equals( Double.NaN ) );
} else {
// got to have something...
found = true;
}
}
assertTrue(found);
}
use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.
the class DiffExTest method testCountData.
/**
* Test differential expression analysis on RNA-seq data. See bug 3383. R code in voomtest.R
*/
@Test
public void testCountData() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
ExpressionExperiment ee = eeService.findByShortName("GSE29006");
if (ee != null) {
eeService.remove(ee);
}
assertTrue(eeService.findByShortName("GSE29006") == null);
try {
Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
throw new IllegalStateException("Need to remove this data set before test is run");
}
ee = eeService.thaw(ee);
try (InputStream is = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_design.txt")) {
assertNotNull(is);
experimentalDesignImporter.importDesign(ee, is);
}
// Load the data from a text file.
DoubleMatrixReader reader = new DoubleMatrixReader();
ArrayDesign targetArrayDesign;
try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt")) {
DoubleMatrix<String, String> countMatrix = reader.read(countData);
Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors();
assertEquals(1, experimentalFactors.size());
List<String> probeNames = countMatrix.getRowNames();
assertEquals(199, probeNames.size());
// we have to find the right generic platform to use.
targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
// the experiment has 8 samples but the data has 4 columns so allow missing samples
// GSM718707 GSM718708 GSM718709 GSM718710
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, null, 36, true, true);
}
// make sure to do a thawRawAndProcessed() to get the addCountData() updates
ee = eeService.thaw(ee);
// verify rows and columns
Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee);
assertEquals(199, processedDataArrays.size());
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(4, v.getBioAssays().size());
}
// I confirmed that log2cpm is working same as voom here; not bothering to test directly.
TestUtils.assertBAs(ee, targetArrayDesign, "GSM718709", 320383);
// DE analysis without weights to assist comparison to R
DifferentialExpressionAnalysisConfig config = new DifferentialExpressionAnalysisConfig();
config.setUseWeights(false);
config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
Collection<DifferentialExpressionAnalysis> analyses = analyzer.run(ee, config);
assertNotNull(analyses);
assertEquals(1, analyses.size());
DifferentialExpressionAnalysis results = analyses.iterator().next();
boolean found = false;
ExpressionAnalysisResultSet resultSet = results.getResultSets().iterator().next();
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
if (r.getProbe().getName().equals("ENSG00000000938")) {
found = true;
ContrastResult contrast = r.getContrasts().iterator().next();
assertEquals(0.007055717, r.getPvalue(), // R: 0.006190738; coeff = 2.2695215; t=12.650422; R with our weights: 0.009858270, 2.2317534; t=9.997007
0.00001);
// up to sign
assertEquals(2.2300049, Math.abs(contrast.getCoefficient()), 0.001);
break;
}
}
assertTrue(found);
// With weights
config = new DifferentialExpressionAnalysisConfig();
// <----
config.setUseWeights(true);
config.setFactorsToInclude(ee.getExperimentalDesign().getExperimentalFactors());
analyses = analyzer.run(ee, config);
results = analyses.iterator().next();
resultSet = results.getResultSets().iterator().next();
for (DifferentialExpressionAnalysisResult r : resultSet.getResults()) {
if (r.getProbe().getName().equals("ENSG00000000938")) {
assertEquals(1, r.getContrasts().size());
ContrastResult contrast = r.getContrasts().iterator().next();
// yes!
assertEquals(2.232816, Math.abs(contrast.getCoefficient()), 0.001);
assertEquals(0.000311, contrast.getPvalue(), 0.00001);
assertEquals(56.66342, Math.abs(contrast.getTstat()), 0.001);
assertEquals(0.007068, r.getPvalue(), 0.00001);
break;
}
}
}
use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.
the class ExonArrayDataAddIntegrationTest method testAddAffyExonArrayDataExpressionExperiment.
@Test
public void testAddAffyExonArrayDataExpressionExperiment() throws Exception {
if (!hasApt) {
log.warn("Test skipped due to lack of Affy Power Tools executable");
return;
}
ExpressionExperiment ee;
try {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
Collection<?> results = geoService.fetchAndLoad("GSE12135", false, true, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
}
/*
* Add the raw data.
*/
dataUpdater.addAffyExonArrayData(ee);
experimentService.load(ee.getId());
}
use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.
the class GeoServiceImpl method checkSamplesAreNew.
/**
* Another common case, typified by samples in GSE3193. We must confirm that all samples included in the data set
* are not included in other data sets. In GEO this primarily occurs in 'superseries' that combine other series.
*/
private void checkSamplesAreNew(GeoSeries series) {
Collection<GeoSample> toSkip = new HashSet<>();
for (GeoSample sample : series.getSamples()) {
if (!sample.appearsInMultipleSeries()) {
// nothing to worry about: if this series is not loaded, then we're guaranteed to be new.
continue;
}
Collection<BioAssay> existingBioAssays = bioAssayService.findByAccession(sample.getGeoAccession());
for (BioAssay ba : existingBioAssays) {
DatabaseEntry acc = ba.getAccession();
if (acc == null)
continue;
String sampleId = sample.getGeoAccession();
String existingAcc = acc.getAccession();
if (existingAcc.equals(sampleId) && ba.getAccession().getExternalDatabase().getName().equals(GeoServiceImpl.GEO_DB_NAME)) {
AbstractGeoService.log.debug(sampleId + " appears in an expression experiment already in the system, skipping");
toSkip.add(sample);
}
}
}
if (!toSkip.isEmpty()) {
AbstractGeoService.log.info("Found " + toSkip.size() + " samples that are already in the system; they will be removed from the new set (example: " + toSkip.iterator().next().getGeoAccession() + ")");
}
for (GeoSample gs : toSkip) {
series.getSamples().remove(gs);
series.getSampleCorrespondence().removeSample(gs.getGeoAccession());
}
for (GeoDataset gds : series.getDatasets()) {
for (GeoSubset gSub : gds.getSubsets()) {
for (GeoSample gs : toSkip) {
gSub.getSamples().remove(gs);
}
}
}
// update the description, so we keep some kind of record.
if (toSkip.size() > 0) {
series.setSummaries(series.getSummaries() + "\nNote: " + toSkip.size() + " samples from this series, which appear in other Expression Experiments in Gemma, " + "were not imported from the GEO source. The following samples were removed: " + StringUtils.join(toSkip, ","));
}
if (series.getSamples().size() == 0) {
throw new AlreadyExistsInSystemException("All the samples in " + series + " are in the system already (in other ExpressionExperiments)");
}
if (series.getSamples().size() < 2) /* we don't really have a lower limit set anywhere else */
{
throw new IllegalStateException("After removing samples already in the system, this data set is too small to load: " + series.getSamples().size() + " left (removed " + toSkip.size() + ")");
}
AbstractGeoService.log.info("Series now contains " + series.getSamples().size() + " (removed " + toSkip.size() + ")");
}
use of ubic.gemma.core.loader.util.AlreadyExistsInSystemException in project Gemma by PavlidisLab.
the class GeoServiceImpl method checkForExisting.
private void checkForExisting(Collection<DatabaseEntry> projectedAccessions) {
if (projectedAccessions == null || projectedAccessions.size() == 0) {
// that's okay, it might have been a GPL.
return;
}
for (DatabaseEntry entry : projectedAccessions) {
Collection<ExpressionExperiment> existing = expressionExperimentService.findByAccession(entry);
if (!existing.isEmpty()) {
String message = "There is already an expression experiment that matches " + entry.getAccession();
AbstractGeoService.log.info(message);
throw new AlreadyExistsInSystemException(message, existing);
}
}
}
Aggregations