Search in sources :

Example 31 with LocalFile

use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.

the class RawDataFetcher method fetch.

/**
 * @param identifier The url for the supplementary file.
 * @return local files
 */
@Override
public Collection<LocalFile> fetch(String identifier) {
    try {
        if (this.ftpClient == null || !this.ftpClient.isConnected())
            this.ftpClient = (new GeoUtil()).connect(FTP.BINARY_FILE_TYPE);
        assert this.ftpClient != null;
        File newDir = this.mkdir(identifier);
        newDir = new File(newDir, "rawDataFiles");
        if (!newDir.canRead() && !newDir.mkdir())
            throw new IOException("Could not create the raw data subdirectory");
        final String outputFileName = this.formLocalFilePath(identifier, newDir);
        final String seekFile = this.formRemoteFilePath(identifier);
        try {
            NetUtils.checkForFile(this.ftpClient, seekFile);
        } catch (FileNotFoundException e) {
            // that's okay, just return.
            AbstractFetcher.log.info("There is apparently no raw data archive for " + identifier + "(sought: " + seekFile + ")");
            EntityUtils.deleteFile(newDir);
            // important to do this!
            this.ftpClient.disconnect();
            return null;
        }
        if (this.ftpClient == null || !this.ftpClient.isConnected()) {
            throw new IOException("Lost FTP connection");
        }
        long expectedSize = this.getExpectedSize(seekFile);
        FutureTask<Boolean> future = this.defineTask(outputFileName, seekFile);
        Collection<LocalFile> result = this.doTask(future, expectedSize, seekFile, outputFileName);
        if (result == null || result.isEmpty()) {
            throw new IOException("Files were not obtained, or download was cancelled.");
        }
        return result;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : LocalFile(ubic.gemma.model.common.description.LocalFile) FileNotFoundException(java.io.FileNotFoundException) GeoUtil(ubic.gemma.core.loader.expression.geo.util.GeoUtil) IOException(java.io.IOException) File(java.io.File) LocalFile(ubic.gemma.model.common.description.LocalFile)

Example 32 with LocalFile

use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.

the class NcbiGeneDomainObjectGenerator method processLocalFiles.

private void processLocalFiles(final LocalFile geneInfoFile, final LocalFile gene2AccessionFile, LocalFile geneHistoryFile, LocalFile geneEnsemblFile, final BlockingQueue<NcbiGeneData> geneDataQueue) {
    final NcbiGeneInfoParser infoParser = new NcbiGeneInfoParser();
    infoParser.setFilter(this.filter);
    if (this.filter) {
        infoParser.setSupportedTaxa(supportedTaxa.keySet());
    }
    final NcbiGeneEnsemblFileParser ensemblParser = new NcbiGeneEnsemblFileParser();
    final NcbiGene2AccessionParser accParser = new NcbiGene2AccessionParser();
    accParser.setStartingNbiId(startingNcbiId);
    final File gene2accessionFileHandle = gene2AccessionFile.asFile();
    final NcbiGeneHistoryParser historyParser = new NcbiGeneHistoryParser();
    try {
        NcbiGeneDomainObjectGenerator.log.debug("Parsing gene history");
        historyParser.parse(geneHistoryFile.asFile());
        if (geneEnsemblFile != null) {
            NcbiGeneDomainObjectGenerator.log.debug("Parsing ensembl");
            ensemblParser.parse(geneEnsemblFile.asFile());
        }
        // 
        NcbiGeneDomainObjectGenerator.log.debug("Parsing GeneInfo =" + geneInfoFile.asFile().getAbsolutePath());
        try (InputStream is = FileTools.getInputStreamFromPlainOrCompressedFile(geneInfoFile.asFile().getAbsolutePath())) {
            infoParser.parse(is);
        }
    } catch (IOException e) {
        // infoProducerDone.set( true );
        throw new RuntimeException(e);
    }
    Collection<NCBIGeneInfo> geneInfoList = infoParser.getResults();
    // put into HashMap
    final Map<String, NCBIGeneInfo> geneInfoMap = new HashMap<>();
    Map<Integer, Integer> taxaCount = new HashMap<>();
    for (NCBIGeneInfo geneInfo : geneInfoList) {
        NcbiGeneHistory history = historyParser.get(geneInfo.getGeneId());
        geneInfo.setHistory(history);
        if (history == null) {
            String discontinuedIdForGene = historyParser.discontinuedIdForSymbol(geneInfo.getDefaultSymbol(), geneInfo.getTaxId());
            geneInfo.setDiscontinuedId(discontinuedIdForGene);
        }
        if (geneEnsemblFile != null) {
            String ensemblId = ensemblParser.get(geneInfo.getGeneId());
            geneInfo.setEnsemblId(ensemblId);
        }
        int taxId = geneInfo.getTaxId();
        if (!taxaCount.containsKey(taxId)) {
            taxaCount.put(taxId, 0);
        }
        taxaCount.put(taxId, taxaCount.get(taxId) + 1);
        geneInfoMap.put(geneInfo.getGeneId(), geneInfo);
    }
    supportedTaxaWithNCBIGenes = new HashSet<>();
    if (supportedTaxa != null) {
        for (Integer taxId : taxaCount.keySet()) {
            if (taxaCount.get(taxId) > 0) {
                NcbiGeneDomainObjectGenerator.log.debug("Taxon " + taxId + ": " + taxaCount.get(taxId) + " genes");
                Taxon t = supportedTaxa.get(taxId);
                supportedTaxaWithNCBIGenes.add(t);
            }
        }
    }
    // 1) use a producer-consumer model for Gene2Accession conversion
    // 1a) Parse Gene2Accession until the gene id changes. This means that
    // all accessions for the gene are done.
    // 1b) Create a Collection<Gene2Accession>, and push into BlockingQueue
    Thread parseThread = new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                NcbiGeneDomainObjectGenerator.log.debug("Parsing gene2accession=" + gene2AccessionFile.asFile().getAbsolutePath());
                accParser.setStartingNbiId(startingNcbiId);
                accParser.parse(gene2accessionFileHandle, geneDataQueue, geneInfoMap);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            NcbiGeneDomainObjectGenerator.log.debug("Domain object generator done");
            producerDone.set(true);
        }
    }, "gene2accession parser");
    parseThread.start();
// 1c) As elements get added to BlockingQueue, NCBIGeneConverter
// consumes
// and creates Gene/GeneProduct/DatabaseEntry objects.
// 1d) Push Gene to another BlockingQueue genePersistence
// 2) use producer-consumer model for Gene persistence
// 2a) as elements get added to genePersistence, persist Gene and
// associated entries.
}
Also used : NCBIGeneInfo(ubic.gemma.core.loader.genome.gene.ncbi.model.NCBIGeneInfo) NcbiGeneHistory(ubic.gemma.core.loader.genome.gene.ncbi.model.NcbiGeneHistory) HashMap(java.util.HashMap) InputStream(java.io.InputStream) Taxon(ubic.gemma.model.genome.Taxon) IOException(java.io.IOException) File(java.io.File) LocalFile(ubic.gemma.model.common.description.LocalFile)

Example 33 with LocalFile

use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.

the class NcbiGeneDomainObjectGenerator method generateLocal.

public void generateLocal(String geneInfoFilePath, String gene2AccesionFilePath, String geneHistoryFilePath, String geneEnsemblFilePath, BlockingQueue<NcbiGeneData> queue) {
    assert gene2AccesionFilePath != null;
    try {
        URL geneInfoUrl = (new File(geneInfoFilePath)).toURI().toURL();
        URL gene2AccesionUrl = (new File(gene2AccesionFilePath)).toURI().toURL();
        URL geneHistoryUrl = (new File(geneHistoryFilePath)).toURI().toURL();
        URL geneEnsemblUrl = null;
        if (geneEnsemblFilePath != null)
            geneEnsemblUrl = (new File(geneEnsemblFilePath)).toURI().toURL();
        LocalFile geneInfoFile = LocalFile.Factory.newInstance();
        geneInfoFile.setLocalURL(geneInfoUrl);
        LocalFile gene2AccessionFile = LocalFile.Factory.newInstance();
        gene2AccessionFile.setLocalURL(gene2AccesionUrl);
        LocalFile geneHistoryFile = LocalFile.Factory.newInstance();
        geneHistoryFile.setLocalURL(geneHistoryUrl);
        LocalFile geneEnsemblFile = null;
        if (geneEnsemblFilePath != null) {
            geneEnsemblFile = LocalFile.Factory.newInstance();
            geneEnsemblFile.setLocalURL(geneEnsemblUrl);
        }
        this.processLocalFiles(geneInfoFile, gene2AccessionFile, geneHistoryFile, geneEnsemblFile, queue);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : LocalFile(ubic.gemma.model.common.description.LocalFile) IOException(java.io.IOException) File(java.io.File) LocalFile(ubic.gemma.model.common.description.LocalFile) URL(java.net.URL)

Example 34 with LocalFile

use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.

the class NcbiGeneDomainObjectGenerator method generate.

/**
 * @param queue queue
 */
public void generate(final BlockingQueue<NcbiGeneData> queue) {
    NcbiGeneDomainObjectGenerator.log.info("Fetching...");
    NCBIGeneFileFetcher fetcher = new NCBIGeneFileFetcher();
    fetcher.setDoDownload(this.doDownload);
    LocalFile geneInfoFile = fetcher.fetch(NcbiGeneDomainObjectGenerator.GENEINFO_FILE).iterator().next();
    LocalFile gene2AccessionFile = fetcher.fetch(NcbiGeneDomainObjectGenerator.GENE2ACCESSION_FILE).iterator().next();
    LocalFile geneHistoryFile = fetcher.fetch(NcbiGeneDomainObjectGenerator.GENEHISTORY_FILE).iterator().next();
    LocalFile geneEnsemblFile = fetcher.fetch(NcbiGeneDomainObjectGenerator.GENEENSEMBL_FILE).iterator().next();
    this.processLocalFiles(geneInfoFile, gene2AccessionFile, geneHistoryFile, geneEnsemblFile, queue);
}
Also used : LocalFile(ubic.gemma.model.common.description.LocalFile)

Example 35 with LocalFile

use of ubic.gemma.model.common.description.LocalFile in project Gemma by PavlidisLab.

the class StringProteinFileFetcher method unPackFile.

/**
 * Method to un-archive downloaded file.
 *
 * @param localFile Collection of File details relating to string download
 */
@Override
public File unPackFile(Collection<LocalFile> localFile) {
    File stringFile = null;
    for (LocalFile file : localFile) {
        String localFileName = file.getLocalURL().getFile();
        try {
            FileTools.unGzipFile(file.getLocalURL().getFile());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        stringFile = new File(FileTools.chompExtension(localFileName));
        // test file there
        if (!stringFile.canRead()) {
            throw new RuntimeException("Problem unpacking file: not readable: " + stringFile.getName());
        }
    }
    return stringFile;
}
Also used : LocalFile(ubic.gemma.model.common.description.LocalFile) IOException(java.io.IOException) LocalFile(ubic.gemma.model.common.description.LocalFile) File(java.io.File)

Aggregations

LocalFile (ubic.gemma.model.common.description.LocalFile)40 File (java.io.File)17 IOException (java.io.IOException)11 MalformedURLException (java.net.MalformedURLException)4 HashSet (java.util.HashSet)4 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)4 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)4 URL (java.net.URL)3 SimpleDateFormat (java.text.SimpleDateFormat)3 Date (java.util.Date)3 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)3 Taxon (ubic.gemma.model.genome.Taxon)3 StopWatch (org.apache.commons.lang3.time.StopWatch)2 AffyPowerToolsProbesetSummarize (ubic.gemma.core.loader.expression.AffyPowerToolsProbesetSummarize)2 RawDataFetcher (ubic.gemma.core.loader.expression.geo.fetcher.RawDataFetcher)2 HttpFetcher (ubic.gemma.core.loader.util.fetcher.HttpFetcher)2 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)2 FileNotFoundException (java.io.FileNotFoundException)1 InputStream (java.io.InputStream)1 URISyntaxException (java.net.URISyntaxException)1