Search in sources :

Example 1 with ProbeSequenceParser

use of ubic.gemma.core.loader.genome.ProbeSequenceParser in project Gemma by PavlidisLab.

the class ArrayDesignSequenceProcessingServiceImpl method processOligoDesign.

/**
 * @param sequenceFile; the expected format is described in {@link ProbeSequenceParser}
 * @see ProbeSequenceParser
 */
private Collection<BioSequence> processOligoDesign(ArrayDesign arrayDesign, InputStream sequenceFile, Taxon taxon) throws IOException {
    this.checkForCompositeSequences(arrayDesign);
    ProbeSequenceParser parser = new ProbeSequenceParser();
    parser.parse(sequenceFile);
    int total = arrayDesign.getCompositeSequences().size();
    int done = 0;
    int percent = 0;
    taxon = this.validateTaxon(taxon, arrayDesign);
    ArrayDesignSequenceProcessingServiceImpl.log.info("Sequences done, updating composite sequences");
    int numWithNoSequence = 0;
    Collection<BioSequence> res = new HashSet<>();
    for (CompositeSequence compositeSequence : arrayDesign.getCompositeSequences()) {
        if (ArrayDesignSequenceProcessingServiceImpl.log.isTraceEnabled())
            ArrayDesignSequenceProcessingServiceImpl.log.trace("Looking for sequence for: " + compositeSequence.getName());
        BioSequence sequence = parser.get(compositeSequence.getName());
        if (sequence != null) {
            // overwrite the existing characteristic if necessary.
            assert sequence.getSequence() != null;
            sequence.setType(SequenceType.OLIGO);
            sequence.setPolymerType(PolymerType.DNA);
            sequence.setTaxon(taxon);
            sequence = this.persistSequence(sequence);
            compositeSequence.setBiologicalCharacteristic(sequence);
            compositeSequence.setArrayDesign(arrayDesign);
            res.add(sequence);
        } else {
            numWithNoSequence++;
            this.notifyAboutMissingSequences(numWithNoSequence, compositeSequence);
        }
        if (++done % 1000 == 0) {
            percent = this.updateProgress(total, done, percent);
        }
    }
    if (numWithNoSequence > 0)
        ArrayDesignSequenceProcessingServiceImpl.log.info("There were " + numWithNoSequence + "/" + arrayDesign.getCompositeSequences().size() + " composite sequences with no associated biological characteristic");
    ArrayDesignSequenceProcessingServiceImpl.log.info("Updating sequences on arrayDesign");
    arrayDesignService.update(arrayDesign);
    return res;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ProbeSequenceParser(ubic.gemma.core.loader.genome.ProbeSequenceParser) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

ProbeSequenceParser (ubic.gemma.core.loader.genome.ProbeSequenceParser)1 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)1 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)1