Search in sources :

Example 86 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class NcbiGeneConverter method getChromosomeDetails.

private void getChromosomeDetails(NCBIGene2Accession acc, Gene gene) {
    Chromosome chrom = gene.getPhysicalLocation().getChromosome();
    BioSequence chromSeq = BioSequence.Factory.newInstance();
    chromSeq.setName(acc.getGenomicNucleotideAccession());
    chromSeq.setType(SequenceType.WHOLE_CHROMOSOME);
    chromSeq.setTaxon(gene.getTaxon());
    DatabaseEntry dbe = DatabaseEntry.Factory.newInstance();
    dbe.setExternalDatabase(NcbiGeneConverter.genBank);
    dbe.setAccession(acc.getGenomicNucleotideAccession());
    dbe.setAccessionVersion(acc.getGenomicNucleotideAccessionVersion());
    chromSeq.setSequenceDatabaseEntry(dbe);
    try {
        FieldUtils.writeField(chrom, "sequence", chromSeq, true);
    } catch (IllegalAccessException e) {
        e.printStackTrace();
    }
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) Chromosome(ubic.gemma.model.genome.Chromosome) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry)

Example 87 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class FastaParser method parseOneRecord.

@Override
public Object parseOneRecord(String record) {
    if (StringUtils.isBlank(record))
        return null;
    Matcher matcher = pattern.matcher(record);
    Collection<BioSequence> bioSequences = this.parseHeader(matcher);
    if (bioSequences.size() == 0) {
        return null;
    }
    StringBuilder sequence = new StringBuilder();
    while (matcher.find()) {
        // skip comments.
        if (matcher.group(1).startsWith(";"))
            continue;
        sequence.append(matcher.group(1));
    }
    if (sequence.length() == 0) {
        return null;
    }
    for (BioSequence bioSequence : bioSequences) {
        bioSequence.setLength((long) sequence.length());
        bioSequence.setIsApproximateLength(false);
        bioSequence.setSequence(sequence.toString());
    }
    return bioSequences;
}
Also used : Matcher(java.util.regex.Matcher) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence)

Example 88 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class SimpleFastaCmd method getSequencesFromFastaCmdOutput.

private Collection<BioSequence> getSequencesFromFastaCmdOutput(Process pr) throws IOException {
    try (final InputStream is = new BufferedInputStream(pr.getInputStream());
        InputStream err = pr.getErrorStream()) {
        final FastaParser parser = new FastaParser();
        ParsingStreamConsumer<BioSequence> sg = new ParsingStreamConsumer<>(parser, is);
        GenericStreamConsumer gsc = new GenericStreamConsumer(err);
        sg.start();
        gsc.start();
        try {
            int exitVal = pr.waitFor();
            // Makes sure results are flushed.
            Thread.sleep(200);
            SimpleFastaCmd.log.debug(// often nonzero if some sequences are not found.
            "fastacmd exit value=" + exitVal);
            return parser.getResults();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ParsingStreamConsumer(ubic.gemma.core.util.concurrent.ParsingStreamConsumer) GenericStreamConsumer(ubic.gemma.core.util.concurrent.GenericStreamConsumer)

Example 89 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ProbeSequenceParser method parse.

@Override
public void parse(InputStream is) throws IOException {
    if (is == null)
        throw new IllegalArgumentException("InputStream was null");
    try (BufferedReader br = new BufferedReader(new InputStreamReader(is))) {
        StopWatch timer = new StopWatch();
        timer.start();
        int nullLines = 0;
        String line;
        int linesParsed = 0;
        while ((line = br.readLine()) != null) {
            BioSequence newItem = this.parseOneLine(line);
            if (++linesParsed % Parser.PARSE_ALERT_FREQUENCY == 0 && timer.getTime() > LineParser.PARSE_ALERT_TIME_FREQUENCY_MS) {
                String message = "Parsed " + linesParsed + " lines ";
                log.info(message);
                timer.reset();
                timer.start();
            }
            if (newItem == null) {
                nullLines++;
            }
        }
        log.info("Parsed " + linesParsed + " lines. " + (nullLines > 0 ? nullLines + " yielded no parse result (they may have been filtered)." : ""));
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) BufferedReader(java.io.BufferedReader) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 90 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ArrayDesignPersister method persistArrayDesignCompositeSequenceAssociations.

private ArrayDesign persistArrayDesignCompositeSequenceAssociations(ArrayDesign arrayDesign) {
    int numElements = arrayDesign.getCompositeSequences().size();
    if (numElements == 0)
        return arrayDesign;
    AbstractPersister.log.info("Filling in or updating sequences in composite seqences for " + arrayDesign);
    int persistedBioSequences = 0;
    int numElementsPerUpdate = this.numElementsPerUpdate(arrayDesign.getCompositeSequences());
    for (CompositeSequence compositeSequence : arrayDesign.getCompositeSequences()) {
        if (!this.isTransient(compositeSequence)) {
            // in case of retry (not used?)
            continue;
        }
        compositeSequence.setId(null);
        compositeSequence.setArrayDesign(arrayDesign);
        BioSequence biologicalCharacteristic = compositeSequence.getBiologicalCharacteristic();
        BioSequence persistedBs = this.persistBioSequence(biologicalCharacteristic);
        compositeSequence.setBiologicalCharacteristic(persistedBs);
        if (++persistedBioSequences % numElementsPerUpdate == 0 && numElements > 1000) {
            AbstractPersister.log.info(persistedBioSequences + "/" + numElements + " compositeSequence sequences examined for " + arrayDesign);
        }
    }
    if (persistedBioSequences > 0) {
        AbstractPersister.log.info("Total of " + persistedBioSequences + " compositeSequence sequences examined for " + arrayDesign);
    }
    return arrayDesign;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)105 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)40 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)24 Test (org.junit.Test)18 HashSet (java.util.HashSet)17 Taxon (ubic.gemma.model.genome.Taxon)15 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)12 InputStream (java.io.InputStream)11 Collection (java.util.Collection)11 HashMap (java.util.HashMap)10 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)10 GZIPInputStream (java.util.zip.GZIPInputStream)7 Gene (ubic.gemma.model.genome.Gene)7 GeoPlatform (ubic.gemma.core.loader.expression.geo.model.GeoPlatform)6 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)6 StopWatch (org.apache.commons.lang3.time.StopWatch)5 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)5 BioSequenceValueObject (ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject)5 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)4