use of ubic.gemma.core.loader.genome.SimpleFastaCmd in project Gemma by PavlidisLab.
the class ArrayDesignSequenceProcessingServiceImpl method processArrayDesign.
@Override
public Collection<BioSequence> processArrayDesign(ArrayDesign arrayDesign, String[] databaseNames, String blastDbHome, boolean force, FastaCmd fc) {
Map<String, BioSequence> accessionsToFetch = this.initializeFetchList(arrayDesign, force);
if (accessionsToFetch.size() == 0) {
ArrayDesignSequenceProcessingServiceImpl.log.info("No accessions to fetch, no processing will be done");
return null;
}
Collection<Taxon> taxaOnArray = arrayDesignService.getTaxa(arrayDesign.getId());
// not taxon found
if (taxaOnArray.size() == 0) {
throw new IllegalArgumentException(taxaOnArray.size() + " taxon found for " + arrayDesign + "please specify which taxon to run");
}
Collection<String> notFound = accessionsToFetch.keySet();
Collection<BioSequence> finalResult = new HashSet<>();
int versionNumber = 1;
if (fc == null)
fc = new SimpleFastaCmd();
while (versionNumber < ArrayDesignSequenceProcessingService.MAX_VERSION_NUMBER) {
Collection<BioSequence> retrievedSequences = this.searchBlastDbs(databaseNames, blastDbHome, notFound, fc);
// we can loop through the taxa as we can ignore sequence when retrieved and arraydesign taxon not match.
Map<String, BioSequence> found = this.findOrUpdateSequences(accessionsToFetch, retrievedSequences, taxaOnArray, force);
finalResult.addAll(found.values());
notFound = this.getUnFound(notFound, found);
if (notFound.isEmpty()) {
break;
}
for (String accession : notFound) {
if (ArrayDesignSequenceProcessingServiceImpl.log.isTraceEnabled())
ArrayDesignSequenceProcessingServiceImpl.log.trace(accession + " not found, increasing version number to " + versionNumber);
// remove the version number and increase it
BioSequence bs = accessionsToFetch.get(accession);
accessionsToFetch.remove(accession);
// add or increase the version number.
accession = accession.replaceFirst("\\.\\d+$", "");
accession = accession + "." + Integer.toString(versionNumber);
accessionsToFetch.put(accession, bs);
}
notFound = accessionsToFetch.keySet();
++versionNumber;
}
if (!notFound.isEmpty()) {
this.logMissingSequences(arrayDesign, notFound);
}
ArrayDesignSequenceProcessingServiceImpl.log.info(finalResult.size() + " sequences found");
arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
return finalResult;
}
Aggregations