use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class RawDataFetcher method fetch.
/**
* @param identifier The url for the supplementary file.
* @return local files
*/
@Override
public Collection<LocalFile> fetch(String identifier) {
try {
if (this.ftpClient == null || !this.ftpClient.isConnected())
this.ftpClient = (new GeoUtil()).connect(FTP.BINARY_FILE_TYPE);
assert this.ftpClient != null;
File newDir = this.mkdir(identifier);
newDir = new File(newDir, "rawDataFiles");
if (!newDir.canRead() && !newDir.mkdir())
throw new IOException("Could not create the raw data subdirectory");
final String outputFileName = this.formLocalFilePath(identifier, newDir);
final String seekFile = this.formRemoteFilePath(identifier);
try {
NetUtils.checkForFile(this.ftpClient, seekFile);
} catch (FileNotFoundException e) {
// that's okay, just return.
AbstractFetcher.log.info("There is apparently no raw data archive for " + identifier + "(sought: " + seekFile + ")");
EntityUtils.deleteFile(newDir);
// important to do this!
this.ftpClient.disconnect();
return null;
}
if (this.ftpClient == null || !this.ftpClient.isConnected()) {
throw new IOException("Lost FTP connection");
}
long expectedSize = this.getExpectedSize(seekFile);
FutureTask<Boolean> future = this.defineTask(outputFileName, seekFile);
Collection<LocalFile> result = this.doTask(future, expectedSize, seekFile, outputFileName);
if (result == null || result.isEmpty()) {
throw new IOException("Files were not obtained, or download was cancelled.");
}
return result;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class NcbiGeneDomainObjectGenerator method processLocalFiles.
private void processLocalFiles(final LocalFile geneInfoFile, final LocalFile gene2AccessionFile, LocalFile geneHistoryFile, LocalFile geneEnsemblFile, final BlockingQueue<NcbiGeneData> geneDataQueue) {
final NcbiGeneInfoParser infoParser = new NcbiGeneInfoParser();
infoParser.setFilter(this.filter);
if (this.filter) {
infoParser.setSupportedTaxa(supportedTaxa.keySet());
}
final NcbiGeneEnsemblFileParser ensemblParser = new NcbiGeneEnsemblFileParser();
final NcbiGene2AccessionParser accParser = new NcbiGene2AccessionParser();
accParser.setStartingNbiId(startingNcbiId);
final File gene2accessionFileHandle = gene2AccessionFile.asFile();
final NcbiGeneHistoryParser historyParser = new NcbiGeneHistoryParser();
try {
NcbiGeneDomainObjectGenerator.log.debug("Parsing gene history");
historyParser.parse(geneHistoryFile.asFile());
if (geneEnsemblFile != null) {
NcbiGeneDomainObjectGenerator.log.debug("Parsing ensembl");
ensemblParser.parse(geneEnsemblFile.asFile());
}
//
NcbiGeneDomainObjectGenerator.log.debug("Parsing GeneInfo =" + geneInfoFile.asFile().getAbsolutePath());
try (InputStream is = FileTools.getInputStreamFromPlainOrCompressedFile(geneInfoFile.asFile().getAbsolutePath())) {
infoParser.parse(is);
}
} catch (IOException e) {
// infoProducerDone.set( true );
throw new RuntimeException(e);
}
Collection<NCBIGeneInfo> geneInfoList = infoParser.getResults();
// put into HashMap
final Map<String, NCBIGeneInfo> geneInfoMap = new HashMap<>();
Map<Integer, Integer> taxaCount = new HashMap<>();
for (NCBIGeneInfo geneInfo : geneInfoList) {
NcbiGeneHistory history = historyParser.get(geneInfo.getGeneId());
geneInfo.setHistory(history);
if (history == null) {
String discontinuedIdForGene = historyParser.discontinuedIdForSymbol(geneInfo.getDefaultSymbol(), geneInfo.getTaxId());
geneInfo.setDiscontinuedId(discontinuedIdForGene);
}
if (geneEnsemblFile != null) {
String ensemblId = ensemblParser.get(geneInfo.getGeneId());
geneInfo.setEnsemblId(ensemblId);
}
int taxId = geneInfo.getTaxId();
if (!taxaCount.containsKey(taxId)) {
taxaCount.put(taxId, 0);
}
taxaCount.put(taxId, taxaCount.get(taxId) + 1);
geneInfoMap.put(geneInfo.getGeneId(), geneInfo);
}
supportedTaxaWithNCBIGenes = new HashSet<>();
if (supportedTaxa != null) {
for (Integer taxId : taxaCount.keySet()) {
if (taxaCount.get(taxId) > 0) {
NcbiGeneDomainObjectGenerator.log.debug("Taxon " + taxId + ": " + taxaCount.get(taxId) + " genes");
Taxon t = supportedTaxa.get(taxId);
supportedTaxaWithNCBIGenes.add(t);
}
}
}
// 1) use a producer-consumer model for Gene2Accession conversion
// 1a) Parse Gene2Accession until the gene id changes. This means that
// all accessions for the gene are done.
// 1b) Create a Collection<Gene2Accession>, and push into BlockingQueue
Thread parseThread = new Thread(new Runnable() {
@Override
public void run() {
try {
NcbiGeneDomainObjectGenerator.log.debug("Parsing gene2accession=" + gene2AccessionFile.asFile().getAbsolutePath());
accParser.setStartingNbiId(startingNcbiId);
accParser.parse(gene2accessionFileHandle, geneDataQueue, geneInfoMap);
} catch (IOException e) {
throw new RuntimeException(e);
}
NcbiGeneDomainObjectGenerator.log.debug("Domain object generator done");
producerDone.set(true);
}
}, "gene2accession parser");
parseThread.start();
// 1c) As elements get added to BlockingQueue, NCBIGeneConverter
// consumes
// and creates Gene/GeneProduct/DatabaseEntry objects.
// 1d) Push Gene to another BlockingQueue genePersistence
// 2) use producer-consumer model for Gene persistence
// 2a) as elements get added to genePersistence, persist Gene and
// associated entries.
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class NcbiGeneDomainObjectGenerator method generateLocal.
public void generateLocal(String geneInfoFilePath, String gene2AccesionFilePath, String geneHistoryFilePath, String geneEnsemblFilePath, BlockingQueue<NcbiGeneData> queue) {
assert gene2AccesionFilePath != null;
try {
URL geneInfoUrl = (new File(geneInfoFilePath)).toURI().toURL();
URL gene2AccesionUrl = (new File(gene2AccesionFilePath)).toURI().toURL();
URL geneHistoryUrl = (new File(geneHistoryFilePath)).toURI().toURL();
URL geneEnsemblUrl = null;
if (geneEnsemblFilePath != null)
geneEnsemblUrl = (new File(geneEnsemblFilePath)).toURI().toURL();
LocalFile geneInfoFile = LocalFile.Factory.newInstance();
geneInfoFile.setLocalURL(geneInfoUrl);
LocalFile gene2AccessionFile = LocalFile.Factory.newInstance();
gene2AccessionFile.setLocalURL(gene2AccesionUrl);
LocalFile geneHistoryFile = LocalFile.Factory.newInstance();
geneHistoryFile.setLocalURL(geneHistoryUrl);
LocalFile geneEnsemblFile = null;
if (geneEnsemblFilePath != null) {
geneEnsemblFile = LocalFile.Factory.newInstance();
geneEnsemblFile.setLocalURL(geneEnsemblUrl);
}
this.processLocalFiles(geneInfoFile, gene2AccessionFile, geneHistoryFile, geneEnsemblFile, queue);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class NcbiGeneDomainObjectGenerator method generate.
/**
* @param queue queue
*/
public void generate(final BlockingQueue<NcbiGeneData> queue) {
NcbiGeneDomainObjectGenerator.log.info("Fetching...");
NCBIGeneFileFetcher fetcher = new NCBIGeneFileFetcher();
fetcher.setDoDownload(this.doDownload);
LocalFile geneInfoFile = fetcher.fetch(NcbiGeneDomainObjectGenerator.GENEINFO_FILE).iterator().next();
LocalFile gene2AccessionFile = fetcher.fetch(NcbiGeneDomainObjectGenerator.GENE2ACCESSION_FILE).iterator().next();
LocalFile geneHistoryFile = fetcher.fetch(NcbiGeneDomainObjectGenerator.GENEHISTORY_FILE).iterator().next();
LocalFile geneEnsemblFile = fetcher.fetch(NcbiGeneDomainObjectGenerator.GENEENSEMBL_FILE).iterator().next();
this.processLocalFiles(geneInfoFile, gene2AccessionFile, geneHistoryFile, geneEnsemblFile, queue);
}
use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.
the class StringProteinFileFetcher method unPackFile.
/**
* Method to un-archive downloaded file.
*
* @param localFile Collection of File details relating to string download
*/
@Override
public File unPackFile(Collection<LocalFile> localFile) {
File stringFile = null;
for (LocalFile file : localFile) {
String localFileName = file.getLocalURL().getFile();
try {
FileTools.unGzipFile(file.getLocalURL().getFile());
} catch (IOException e) {
throw new RuntimeException(e);
}
stringFile = new File(FileTools.chompExtension(localFileName));
// test file there
if (!stringFile.canRead()) {
throw new RuntimeException("Problem unpacking file: not readable: " + stringFile.getName());
}
}
return stringFile;
}
Aggregations