use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class BatchInfoParser method matchBioAssaysToRawDataFiles.
/**
* From the file names, match to the bioassays. GEO names things consistently (??) so this should work but not
* ideal.
*
* @param files files
* @param assayAccessions accessions
* @return map
*/
// Better readability
@SuppressWarnings("StatementWithEmptyBody")
private Map<BioAssay, File> matchBioAssaysToRawDataFiles(Collection<LocalFile> files, Map<String, BioAssay> assayAccessions) {
Pattern regex = Pattern.compile("(GSM[0-9]+).+");
Map<BioAssay, File> bioAssays2Files = new HashMap<>();
for (LocalFile file : files) {
File f = file.asFile();
String n = f.getName();
/*
* We only support the newer style of storing these.
*/
if (!n.startsWith("GSM")) {
continue;
}
if (n.toUpperCase().contains(".CHP") || n.toUpperCase().contains(".DAT") || n.toUpperCase().contains(".EXP") || n.toUpperCase().contains(".RPT") || n.toUpperCase().contains(".TIF")) {
continue;
}
/*
* keep just the GSMNNNNNN part. FIXME: only works with GEO
*/
Matcher matcher = regex.matcher(n);
if (!matcher.matches()) {
continue;
}
String acc = matcher.group(1);
assert acc.matches("GSM[0-9]+");
BioAssay ba = assayAccessions.get(acc);
if (ba == null) {
/*
* Warn? Throw exception?
*/
continue;
}
if (bioAssays2Files.containsKey(ba)) {
/*
* Don't clobber a valid file. For affymetrix, CEL is what we want. Other cases harder to predict, but
* .txt files can be either good or bad. (We could do this check earlier)
*/
if (bioAssays2Files.get(ba).getName().toUpperCase().contains(".CEL")) {
BatchInfoParser.log.debug("Retaining CEL file, ignoring " + f.getName());
continue;
} else if (f.getName().toUpperCase().contains(".CEL")) {
// we displace the old file with this CEL file, but there is no need to warn.
} else {
BatchInfoParser.log.warn("Multiple files matching " + ba + ": " + bioAssays2Files.get(ba) + "; using new file: " + f);
}
}
bioAssays2Files.put(ba, f);
}
return bioAssays2Files;
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class DataUpdater method reprocessAffyThreePrimeArrayData.
/**
* @param ee ee
* @return This replaces the existing raw data with the CEL file data. CEL file(s) must be found by configuration
*/
// Possible external use
@SuppressWarnings("UnusedReturnValue")
public ExpressionExperiment reprocessAffyThreePrimeArrayData(ExpressionExperiment ee) {
Collection<ArrayDesign> arrayDesignsUsed = this.experimentService.getArrayDesignsUsed(ee);
ee = experimentService.thawLite(ee);
RawDataFetcher f = new RawDataFetcher();
Collection<LocalFile> files = f.fetch(ee.getAccession().getAccession());
if (files.isEmpty()) {
throw new RuntimeException("Data was apparently not available");
}
Collection<RawExpressionDataVector> vectors = new HashSet<>();
// Use the same QT for each one
QuantitationType qt = AffyPowerToolsProbesetSummarize.makeAffyQuantitationType();
qt = quantitationTypeService.create(qt);
for (ArrayDesign ad : arrayDesignsUsed) {
DataUpdater.log.info("Processing data for " + ad);
String cdfFileName = this.findCdf(ad).getAbsolutePath();
ad = arrayDesignService.thaw(ad);
AffyPowerToolsProbesetSummarize apt = new AffyPowerToolsProbesetSummarize(qt);
vectors.addAll(apt.processThreeprimeArrayData(ee, cdfFileName, ad, files));
}
if (vectors.isEmpty()) {
throw new IllegalStateException("No vectors were returned for " + ee);
}
ee = experimentService.replaceRawVectors(ee, vectors);
this.audit(ee, "Data vector computation from CEL files using AffyPowerTools for " + StringUtils.join(arrayDesignsUsed, "; "), true);
if (arrayDesignsUsed.size() == 1) {
this.postprocess(ee);
} else {
DataUpdater.log.warn("Skipping postprocessing for mult-platform experiment");
}
return ee;
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class GeoDomainObjectGenerator method processPlatform.
private GeoPlatform processPlatform(String geoAccession) {
assert platformFetcher != null;
Collection<LocalFile> platforms = platformFetcher.fetch(geoAccession);
if (platforms == null) {
throw new RuntimeException("No series file found for " + geoAccession);
}
LocalFile platformFile = (platforms.iterator()).next();
String platformPath;
platformPath = platformFile.getLocalURL().getPath();
parser.setProcessPlatformsOnly(true);
try {
parser.parse(platformPath);
} catch (IOException e1) {
throw new RuntimeException(e1);
}
return ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get(geoAccession);
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class GeoDomainObjectGenerator method fetchDataSetToLocalFile.
private String fetchDataSetToLocalFile(String geoDataSetAccession) {
Collection<LocalFile> result = datasetFetcher.fetch(geoDataSetAccession);
if (result == null)
return null;
if (result.size() != 1) {
throw new IllegalStateException("Got " + result.size() + " files for " + geoDataSetAccession + ", expected only one.");
}
LocalFile dataSetFile = (result.iterator()).next();
String dataSetPath;
dataSetPath = dataSetFile.getLocalURL().getPath();
return dataSetPath;
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class GeoDomainObjectGenerator method processSeriesPlatforms.
/**
* @param seriesAccession series accession
*/
private Collection<GeoPlatform> processSeriesPlatforms(String seriesAccession) {
Collection<LocalFile> fullSeries = seriesFetcher.fetch(seriesAccession);
if (fullSeries == null) {
throw new RuntimeException("No series file found for " + seriesAccession);
}
LocalFile seriesFile = (fullSeries.iterator()).next();
String seriesPath;
seriesPath = seriesFile.getLocalURL().getPath();
parser.setProcessPlatformsOnly(this.processPlatformsOnly);
try {
parser.parse(seriesPath);
} catch (IOException e1) {
throw new RuntimeException(e1);
}
return ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().values();
}
Aggregations