Search in sources :

Example 1 with SimpleFastaCmd

use of ubic.gemma.core.loader.genome.SimpleFastaCmd in project Gemma by PavlidisLab.

the class ArrayDesignSequenceProcessingServiceImpl method processArrayDesign.

@Override
public Collection<BioSequence> processArrayDesign(ArrayDesign arrayDesign, String[] databaseNames, String blastDbHome, boolean force, FastaCmd fc) {
    Map<String, BioSequence> accessionsToFetch = this.initializeFetchList(arrayDesign, force);
    if (accessionsToFetch.size() == 0) {
        ArrayDesignSequenceProcessingServiceImpl.log.info("No accessions to fetch, no processing will be done");
        return null;
    }
    Collection<Taxon> taxaOnArray = arrayDesignService.getTaxa(arrayDesign.getId());
    // not taxon found
    if (taxaOnArray.size() == 0) {
        throw new IllegalArgumentException(taxaOnArray.size() + " taxon found for " + arrayDesign + "please specify which taxon to run");
    }
    Collection<String> notFound = accessionsToFetch.keySet();
    Collection<BioSequence> finalResult = new HashSet<>();
    int versionNumber = 1;
    if (fc == null)
        fc = new SimpleFastaCmd();
    while (versionNumber < ArrayDesignSequenceProcessingService.MAX_VERSION_NUMBER) {
        Collection<BioSequence> retrievedSequences = this.searchBlastDbs(databaseNames, blastDbHome, notFound, fc);
        // we can loop through the taxa as we can ignore sequence when retrieved and arraydesign taxon not match.
        Map<String, BioSequence> found = this.findOrUpdateSequences(accessionsToFetch, retrievedSequences, taxaOnArray, force);
        finalResult.addAll(found.values());
        notFound = this.getUnFound(notFound, found);
        if (notFound.isEmpty()) {
            break;
        }
        for (String accession : notFound) {
            if (ArrayDesignSequenceProcessingServiceImpl.log.isTraceEnabled())
                ArrayDesignSequenceProcessingServiceImpl.log.trace(accession + " not found, increasing version number to " + versionNumber);
            // remove the version number and increase it
            BioSequence bs = accessionsToFetch.get(accession);
            accessionsToFetch.remove(accession);
            // add or increase the version number.
            accession = accession.replaceFirst("\\.\\d+$", "");
            accession = accession + "." + Integer.toString(versionNumber);
            accessionsToFetch.put(accession, bs);
        }
        notFound = accessionsToFetch.keySet();
        ++versionNumber;
    }
    if (!notFound.isEmpty()) {
        this.logMissingSequences(arrayDesign, notFound);
    }
    ArrayDesignSequenceProcessingServiceImpl.log.info(finalResult.size() + " sequences found");
    arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
    return finalResult;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) Taxon(ubic.gemma.model.genome.Taxon) SimpleFastaCmd(ubic.gemma.core.loader.genome.SimpleFastaCmd)

Aggregations

SimpleFastaCmd (ubic.gemma.core.loader.genome.SimpleFastaCmd)1 Taxon (ubic.gemma.model.genome.Taxon)1 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)1