use of ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator in project Gemma by PavlidisLab.
the class LoadExpressionDataCli method doWork.
@Override
protected Exception doWork(String[] args) {
Exception err = this.processCommandLine(args);
if (err != null) {
return err;
}
try {
GeoService geoService = this.getBean(GeoService.class);
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
if (accessions == null && accessionFile == null) {
return new IllegalArgumentException("You must specific either a file or accessions on the command line");
}
if (accessions != null) {
AbstractCLI.log.info("Got accession(s) from command line " + accessions);
String[] accsToRun = StringUtils.split(accessions, ',');
for (String accession : accsToRun) {
accession = StringUtils.strip(accession);
if (StringUtils.isBlank(accession)) {
continue;
}
if (platformOnly) {
Collection<?> designs = geoService.fetchAndLoad(accession, true, true, false, true, true);
ArrayDesignService ads = this.getBean(ArrayDesignService.class);
for (Object object : designs) {
assert object instanceof ArrayDesign;
ArrayDesign ad = (ArrayDesign) object;
ad = ads.thawLite(ad);
successObjects.add(ad.getName() + " (" + ad.getExternalReferences().iterator().next().getAccession() + ")");
}
} else {
this.processAccession(geoService, accession);
}
}
}
if (accessionFile != null) {
AbstractCLI.log.info("Loading accessions from " + accessionFile);
InputStream is = new FileInputStream(accessionFile);
try (BufferedReader br = new BufferedReader(new InputStreamReader(is))) {
String accession;
while ((accession = br.readLine()) != null) {
if (StringUtils.isBlank(accession)) {
continue;
}
this.processAccession(geoService, accession);
}
}
}
this.summarizeProcessing();
} catch (Exception e) {
AbstractCLI.log.error(e);
return e;
}
return null;
}
use of ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator in project Gemma by PavlidisLab.
the class GeoServiceImpl method addElements.
@Override
public ArrayDesign addElements(ArrayDesign targetPlatform) {
if (!targetPlatform.getCompositeSequences().isEmpty()) {
throw new IllegalArgumentException("Only call this if you are filling in an empty platform");
}
String geoAccession = targetPlatform.getExternalReferences().iterator().next().getAccession();
Collection<? extends GeoData> platforms = geoDomainObjectGenerator.generate(geoAccession);
if (platforms.size() == 0) {
throw new IllegalStateException();
}
/*
* We do this to get a fresh instantiation of GeoConverter (prototype scope)
*/
GeoConverter geoConverter = (GeoConverter) this.beanFactory.getBean("geoConverter");
if (this.geoDomainObjectGenerator == null) {
this.geoDomainObjectGenerator = new GeoDomainObjectGenerator();
} else {
this.geoDomainObjectGenerator.initialize();
}
geoDomainObjectGenerator.setProcessPlatformsOnly(true);
geoConverter.setForceConvertElements(true);
Collection<Object> arrayDesigns = geoConverter.convert(platforms);
Collection<CompositeSequence> els = ((ArrayDesign) arrayDesigns.iterator().next()).getCompositeSequences();
for (CompositeSequence cs : els) {
cs.setArrayDesign(targetPlatform);
cs.setBiologicalCharacteristic((BioSequence) persisterHelper.persist(cs.getBiologicalCharacteristic()));
}
AbstractGeoService.log.info("Adding " + els.size() + " elements to " + targetPlatform);
targetPlatform.getCompositeSequences().addAll(els);
arrayDesignService.update(targetPlatform);
this.arrayDesignReportService.generateArrayDesignReport(targetPlatform.getId());
return targetPlatform;
}
use of ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator in project Gemma by PavlidisLab.
the class GeoServiceImpl method fetchAndLoad.
/**
* Given a GEO GSE or GDS (or GPL, but support might not be complete)
* <ol>
* <li>Check that it doesn't already exist in the system</li>
* <li>Download and parse GDS files and GSE file needed</li>
* <li>Convert the GDS and GSE into a ExpressionExperiment (or just the ArrayDesigns)
* <li>Load the resulting ExpressionExperiment and/or ArrayDesigns into Gemma</li>
* </ol>
*/
@Override
public Collection<?> fetchAndLoad(String geoAccession, boolean loadPlatformOnly, boolean doSampleMatching, boolean splitByPlatform, boolean allowSuperSeriesImport, boolean allowSubSeriesImport) {
/*
* We do this to get a fresh instantiation of GeoConverter (prototype scope)
*/
GeoConverter geoConverter = (GeoConverter) this.beanFactory.getBean("geoConverter");
if (this.geoDomainObjectGenerator == null) {
this.geoDomainObjectGenerator = new GeoDomainObjectGenerator();
} else {
this.geoDomainObjectGenerator.initialize();
}
geoDomainObjectGenerator.setProcessPlatformsOnly(geoAccession.startsWith("GPL") || loadPlatformOnly);
geoDomainObjectGenerator.setDoSampleMatching(doSampleMatching && !splitByPlatform);
Collection<DatabaseEntry> projectedAccessions = geoDomainObjectGenerator.getProjectedAccessions(geoAccession);
this.checkForExisting(projectedAccessions);
if (loadPlatformOnly) {
Collection<? extends GeoData> platforms = geoDomainObjectGenerator.generate(geoAccession);
if (platforms.size() == 0) {
AbstractGeoService.log.warn("GeoService.fetchAndLoad( targetPlatformAcc, true, false, false, false );t no results");
return null;
}
geoConverter.setForceConvertElements(true);
Collection<Object> arrayDesigns = geoConverter.convert(platforms);
return persisterHelper.persist(arrayDesigns);
}
Collection<? extends GeoData> parseResult = geoDomainObjectGenerator.generate(geoAccession);
if (parseResult.size() == 0) {
AbstractGeoService.log.warn("Got no results");
return null;
}
AbstractGeoService.log.debug("Generated GEO domain objects for " + geoAccession);
Object obj = parseResult.iterator().next();
if (!(obj instanceof GeoSeries)) {
throw new RuntimeException("Got a " + obj.getClass().getName() + " instead of a " + GeoSeries.class.getName() + " (you may need to load platforms only).");
}
GeoSeries series = (GeoSeries) obj;
String seriesAccession = series.getGeoAccession();
if (series.isSuperSeries()) {
if (allowSuperSeriesImport) {
AbstractGeoService.log.info(" ========= SuperSeries Detected! =========");
AbstractGeoService.log.info("Please make sure you want to import this as a superseries and not the individual subseries");
} else {
throw new IllegalStateException("SuperSeries detected, set 'allowSuperSeriesImport' to 'true' to allow this dataset to load");
}
}
if (series.isSubSeries()) {
if (allowSubSeriesImport) {
AbstractGeoService.log.info(" ========= Subseries Detected! =========");
AbstractGeoService.log.info("Please make sure you want to import this as a subseries and not the superseries");
} else {
throw new IllegalStateException("SubSeries detected, set 'allowSubSeriesImport' to 'true' to allow this dataset to load");
}
}
this.confirmPlatformUniqueness(series, doSampleMatching && !splitByPlatform);
ArrayDesignsForExperimentCache c = new ArrayDesignsForExperimentCache();
this.matchToExistingPlatforms(geoConverter, series, c);
this.checkSamplesAreNew(series);
this.getSubSeriesInformation(series);
geoConverter.clear();
geoConverter.setSplitByPlatform(splitByPlatform);
// noinspection unchecked
Collection<ExpressionExperiment> result = (Collection<ExpressionExperiment>) geoConverter.convert(series);
this.check(result);
this.getPubMedInfo(result);
AbstractGeoService.log.debug("Converted " + seriesAccession);
assert persisterHelper != null;
Collection<ExpressionExperiment> persistedResult = new HashSet<>();
for (ExpressionExperiment ee : result) {
c = expressionExperimentPrePersistService.prepare(ee, c);
ee = persisterHelper.persist(ee, c);
persistedResult.add(ee);
AbstractGeoService.log.debug("Persisted " + seriesAccession);
}
this.updateReports(persistedResult);
return persistedResult;
}
use of ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator in project Gemma by PavlidisLab.
the class MeanVarianceServiceTest method testServiceCreateCountData.
@Test
public final void testServiceCreateCountData() throws Exception {
// so it doesn't look for soft files
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
ee = eeService.findByShortName("GSE29006");
if (ee != null) {
eeService.remove(ee);
}
assertNull(eeService.findByShortName("GSE29006"));
try {
Collection<?> results = geoService.fetchAndLoad("GSE29006", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
throw new IllegalStateException("Need to remove this data set before test is run");
}
ee = eeService.thaw(ee);
qt = this.createOrUpdateQt(ScaleType.COUNT);
// Load the data from a text file.
DoubleMatrixReader reader = new DoubleMatrixReader();
try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_count.test.txt");
InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE29006_expression_RPKM.test.txt")) {
DoubleMatrix<String, String> countMatrix = reader.read(countData);
DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
List<String> probeNames = countMatrix.getRowNames();
// we have to find the right generic platform to use.
ArrayDesign targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
try {
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
fail("Should have gotten an exception");
} catch (IllegalArgumentException e) {
// Expected
}
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, true);
}
ee = eeService.thaw(this.ee);
assertNotNull(ee.getId());
MeanVarianceRelation mvr = meanVarianceService.create(ee, true);
// convert byte[] to array[]
// warning: order may have changed
double[] means = MeanVarianceServiceTest.bac.byteArrayToDoubles(mvr.getMeans());
double[] variances = MeanVarianceServiceTest.bac.byteArrayToDoubles(mvr.getVariances());
if (means != null) {
Arrays.sort(means);
}
if (variances != null) {
Arrays.sort(variances);
}
// check sizes
int expectedMeanVarianceLength = 199;
// NAs removed
int expectedLowessLength = 197;
assert means != null;
assertEquals(expectedMeanVarianceLength, means.length);
assert variances != null;
assertEquals(expectedMeanVarianceLength, variances.length);
int idx = 0;
assertEquals(1.037011, means[idx], 0.0001);
assertEquals(0.00023724336, variances[idx], 0.000001);
idx = expectedLowessLength - 1;
assertEquals(15.23313, means[idx], 0.0001);
assertEquals(4.84529, variances[idx], 0.0001);
}
use of ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator in project Gemma by PavlidisLab.
the class DataUpdaterTest method testLoadRNASeqData.
/*
* More realistic test of RNA seq. GSE19166
*
*/
@Test
public void testLoadRNASeqData() throws Exception {
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGenerator());
ExpressionExperiment ee;
try {
Collection<?> results = geoService.fetchAndLoad("GSE19166", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
}
ee = experimentService.thaw(ee);
// Load the data from a text file.
DoubleMatrixReader reader = new DoubleMatrixReader();
try (InputStream countData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_count.test.txt");
InputStream rpkmData = this.getClass().getResourceAsStream("/data/loader/expression/flatfileload/GSE19166_expression_RPKM.test.txt")) {
DoubleMatrix<String, String> countMatrix = reader.read(countData);
DoubleMatrix<String, String> rpkmMatrix = reader.read(rpkmData);
List<String> probeNames = countMatrix.getRowNames();
assertEquals(199, probeNames.size());
// we have to find the right generic platform to use.
targetArrayDesign = this.getTestPersistentArrayDesign(probeNames, taxonService.findByCommonName("human"));
targetArrayDesign = arrayDesignService.thaw(targetArrayDesign);
assertEquals(199, targetArrayDesign.getCompositeSequences().size());
// Main step.
dataUpdater.addCountData(ee, targetArrayDesign, countMatrix, rpkmMatrix, 36, true, false);
}
ee = experimentService.thaw(ee);
// should have: log2cpm, counts, rpkm, and counts-masked ('preferred')
assertEquals(4, ee.getQuantitationTypes().size());
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
assertNotNull(ee.getNumberOfDataVectors());
assertEquals(199, ee.getNumberOfDataVectors().intValue());
// GSM475204 GSM475205 GSM475206 GSM475207 GSM475208 GSM475209
// 3949585 3929008 3712314 3693219 3574068 3579631
ExpressionDataDoubleMatrix mat = dataMatrixService.getProcessedExpressionDataMatrix(ee);
assertEquals(199, mat.rows());
TestUtils.assertBAs(ee, targetArrayDesign, "GSM475204", 3949585);
assertEquals(3 * 199, ee.getRawExpressionDataVectors().size());
assertEquals(199, ee.getProcessedExpressionDataVectors().size());
Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
assertEquals(199, processedDataArrays.size());
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(6, v.getBioAssays().size());
}
assertTrue(!dataVectorService.getProcessedDataVectors(experimentService.load(ee.getId())).isEmpty());
}
Aggregations