use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class GeoDomainObjectGenerator method processSeries.
/**
* Download and parse a GEO series.
*
* @param seriesAccession series accession
*/
private GeoSeries processSeries(String seriesAccession) {
Collection<LocalFile> fullSeries = seriesFetcher.fetch(seriesAccession);
if (fullSeries == null) {
GeoDomainObjectGenerator.log.warn("No series file found for " + seriesAccession);
return null;
}
LocalFile seriesFile = (fullSeries.iterator()).next();
String seriesPath = seriesFile.getLocalURL().getPath();
parser.setProcessPlatformsOnly(this.processPlatformsOnly);
try {
parser.parse(seriesPath);
} catch (IOException e1) {
throw new RuntimeException(e1);
}
// Only allow one series...
GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get(seriesAccession);
if (series == null) {
throw new RuntimeException("No series was parsed for " + seriesAccession);
}
Collection<String> datasetsToProcess = DatasetCombiner.findGDSforGSE(seriesAccession);
if (datasetsToProcess != null) {
for (String dataSetAccession : datasetsToProcess) {
this.processDataSet(series, dataSetAccession);
}
}
DatasetCombiner datasetCombiner = new DatasetCombiner(this.doSampleMatching);
GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
assert correspondence != null;
series.setSampleCorrespondence(correspondence);
return series;
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class AffyPowerToolsProbesetSummarize method getCelFiles.
/**
* @param files files
* @param accessionsOfInterest Used for multiplatform studies; if null, ignored
* @return strings
*/
private List<String> getCelFiles(Collection<LocalFile> files, Collection<String> accessionsOfInterest) {
Set<String> celfiles = new HashSet<>();
for (LocalFile f : files) {
try {
File fi = new File(f.getLocalURL().toURI());
// If both unpacked and packed files are there, it looks at both of them. No major problem - the dups are resolved - just a little ugly.
if (fi.canRead() && (fi.getName().toUpperCase().endsWith(".CEL") || fi.getName().toUpperCase().endsWith(".CEL.GZ"))) {
if (accessionsOfInterest != null) {
String acc = fi.getName().replaceAll("(GSM[0-9]+).+", "$1");
if (!accessionsOfInterest.contains(acc)) {
continue;
}
}
if (FileTools.isGZipped(fi.getName())) {
AffyPowerToolsProbesetSummarize.log.info("Found CEL file " + fi + ", unzipping");
try {
String unGzipFile = FileTools.unGzipFile(fi.getAbsolutePath());
celfiles.add(unGzipFile);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
AffyPowerToolsProbesetSummarize.log.info("Found CEL file " + fi);
celfiles.add(fi.getAbsolutePath());
}
}
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}
if (celfiles.isEmpty()) {
throw new IllegalArgumentException("No valid CEL files were found");
}
return new ArrayList<>(celfiles);
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class AffyPowerToolsProbesetSummarize method tryRun.
private Collection<RawExpressionDataVector> tryRun(ExpressionExperiment ee, ArrayDesign targetPlatform, Collection<LocalFile> files, Collection<String> accessionsOfInterest, boolean threePrime, String cdfFileName) {
List<String> celFiles = this.getCelFiles(files, accessionsOfInterest);
AffyPowerToolsProbesetSummarize.log.info("Located " + celFiles.size() + " cel files");
String outputPath = this.getOutputFilePath(ee);
String cmd;
if (threePrime) {
cmd = this.getThreePrimeSummarizationCommand(targetPlatform, cdfFileName, celFiles, outputPath);
} else {
cmd = this.getCommand(targetPlatform, celFiles, outputPath);
}
AffyPowerToolsProbesetSummarize.log.info("Running: " + cmd);
int exitVal = Integer.MIN_VALUE;
StopWatch overallWatch = new StopWatch();
overallWatch.start();
try {
final Process run = Runtime.getRuntime().exec(cmd);
GenericStreamConsumer gscErr = new GenericStreamConsumer(run.getErrorStream());
GenericStreamConsumer gscIn = new GenericStreamConsumer(run.getInputStream());
gscErr.start();
gscIn.start();
while (exitVal == Integer.MIN_VALUE) {
try {
exitVal = run.exitValue();
} catch (IllegalThreadStateException e) {
// okay, still waiting.
}
Thread.sleep(AffyPowerToolsProbesetSummarize.AFFY_UPDATE_INTERVAL_MS);
File outputFile = new File(outputPath + File.separator + "apt-probeset-summarize.log");
Long size = outputFile.length();
String minutes = TimeUtil.getMinutesElapsed(overallWatch);
AffyPowerToolsProbesetSummarize.log.info(String.format("apt-probeset-summarize logging output so far: %.2f", size / 1024.0) + " kb (" + minutes + " minutes elapsed)");
}
overallWatch.stop();
String minutes = TimeUtil.getMinutesElapsed(overallWatch);
AffyPowerToolsProbesetSummarize.log.info("apt-probeset-summarize took a total of " + minutes + " minutes");
return this.processData(ee, outputPath + File.separator + AffyPowerToolsProbesetSummarize.METHOD + ".summary.txt", targetPlatform);
} catch (InterruptedException | IOException e) {
throw new RuntimeException(e);
}
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class GeoFetcher method getFile.
Collection<LocalFile> getFile(String accession, String seekFileName) {
LocalFile file = this.fetchedFile(seekFileName);
AbstractFetcher.log.info("Found " + seekFileName + " for experiment(set) " + accession + ".");
Collection<LocalFile> result = new HashSet<>();
result.add(file);
return result;
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class LocalDatasetFetcher method fetch.
@Override
public Collection<LocalFile> fetch(String accession) {
log.info("Seeking GSE file for " + accession);
assert localPath != null;
String seekFileName = localPath + "/" + accession + SOFT_GZ;
File seekFile = new File(seekFileName);
if (seekFile.canRead()) {
return getFile(accession, seekFileName);
}
throw new RuntimeException("Failed to find " + seekFileName);
}
Aggregations