Search in sources :

Example 51 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ArrayDesignSequenceAssociationCli method doWork.

@Override
protected Exception doWork(String[] args) {
    try {
        Exception err = this.processCommandLine(args);
        if (err != null)
            return err;
        // this is kind of an oddball function of this tool.
        if (this.hasOption('s')) {
            BioSequence updated = arrayDesignSequenceProcessingService.processSingleAccession(this.sequenceId, new String[] { "nt", "est_others", "est_human", "est_mouse" }, null, force);
            if (updated != null) {
                AbstractCLI.log.info("Updated or created " + updated);
            }
            return null;
        }
        for (ArrayDesign arrayDesign : this.arrayDesignsToProcess) {
            arrayDesign = this.thaw(arrayDesign);
            SequenceType sequenceTypeEn = SequenceType.fromString(sequenceType);
            if (sequenceTypeEn == null) {
                AbstractCLI.log.error("No sequenceType " + sequenceType + " found");
                this.bail(ErrorCode.INVALID_OPTION);
            }
            if (this.hasOption('f')) {
                try (InputStream sequenceFileIs = FileTools.getInputStreamFromPlainOrCompressedFile(sequenceFile)) {
                    if (sequenceFileIs == null) {
                        AbstractCLI.log.error("No file " + sequenceFile + " was readable");
                        this.bail(ErrorCode.INVALID_OPTION);
                        return null;
                    }
                    Taxon taxon = null;
                    if (this.hasOption('t')) {
                        taxon = taxonService.findByCommonName(this.taxonName);
                        if (taxon == null) {
                            throw new IllegalArgumentException("No taxon named " + taxonName);
                        }
                    }
                    AbstractCLI.log.info("Processing ArrayDesign...");
                    arrayDesignSequenceProcessingService.processArrayDesign(arrayDesign, sequenceFileIs, sequenceTypeEn, taxon);
                    this.audit(arrayDesign, "Sequences read from file: " + sequenceFile);
                }
            } else if (this.hasOption('i')) {
                try (InputStream idFileIs = FileTools.getInputStreamFromPlainOrCompressedFile(idFile)) {
                    if (idFileIs == null) {
                        AbstractCLI.log.error("No file " + idFile + " was readable");
                        this.bail(ErrorCode.INVALID_OPTION);
                    }
                    Taxon taxon = null;
                    if (this.hasOption('t')) {
                        taxon = taxonService.findByCommonName(this.taxonName);
                        if (taxon == null) {
                            throw new IllegalArgumentException("No taxon named " + taxonName);
                        }
                    }
                    AbstractCLI.log.info("Processing ArrayDesign...");
                    arrayDesignSequenceProcessingService.processArrayDesign(arrayDesign, idFileIs, new String[] { "nt", "est_others", "est_human", "est_mouse" }, null, taxon, force);
                    this.audit(arrayDesign, "Sequences identifiers from file: " + idFile);
                }
            } else {
                AbstractCLI.log.info("Retrieving sequences from BLAST databases");
                arrayDesignSequenceProcessingService.processArrayDesign(arrayDesign, new String[] { "nt", "est_others", "est_human", "est_mouse" }, null, force);
                this.audit(arrayDesign, "Sequence looked up from BLAST databases");
            }
        }
    } catch (Exception e) {
        AbstractCLI.log.error(e, e);
        return e;
    }
    return null;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) InputStream(java.io.InputStream) Taxon(ubic.gemma.model.genome.Taxon) SequenceType(ubic.gemma.model.genome.biosequence.SequenceType)

Example 52 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class VectorMergingServiceTest method test.

@Test
public final void test() throws Exception {
    /*
         * Need a persistent experiment that uses multiple array designs. Then merge the designs, switch the vectors,
         * and merge the vectors. GSE3443
         */
    /*
         * The experiment uses the following GPLs
         *
         * GPL2868, GPL2933, GPL2934, GPL2935, GPL2936, GPL2937, GPL2938
         *
         * Example of a sequence appearing on more than one platform: N57553
         */
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse3443merge")));
    Collection<?> results = geoService.fetchAndLoad("GSE3443", false, false, false);
    ee = (ExpressionExperiment) results.iterator().next();
    ee = this.eeService.thawLite(ee);
    Collection<ArrayDesign> aas = eeService.getArrayDesignsUsed(ee);
    assertEquals(7, aas.size());
    /*
         * Check number of sequences across all platforms. This is how many elements we need on the new platform, plus
         * extras for duplicated sequences (e.g. elements that don't have a sequence...)
         */
    Collection<ArrayDesign> taas = new HashSet<>();
    Set<BioSequence> oldbs = new HashSet<>();
    for (ArrayDesign arrayDesign : aas) {
        arrayDesign = arrayDesignService.thaw(arrayDesign);
        taas.add(arrayDesign);
        for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
            log.info(cs + " " + cs.getBiologicalCharacteristic());
            oldbs.add(cs.getBiologicalCharacteristic());
        }
    }
    assertEquals(63, oldbs.size());
    /*
         * Check total size of elements across all 7 platforms.
         */
    int totalElements = 0;
    for (ArrayDesign arrayDesign : taas) {
        totalElements += arrayDesign.getCompositeSequences().size();
    }
    assertEquals(140, totalElements);
    ArrayDesign firstaa = taas.iterator().next();
    aas.remove(firstaa);
    assertEquals(null, firstaa.getMergedInto());
    mergedAA = arrayDesignMergeService.merge(firstaa, taas, "testMerge" + RandomStringUtils.randomAlphabetic(5), "merged" + RandomStringUtils.randomAlphabetic(5), false);
    assertEquals(72, mergedAA.getCompositeSequences().size());
    Set<BioSequence> seenBs = new HashSet<>();
    for (CompositeSequence cs : mergedAA.getCompositeSequences()) {
        seenBs.add(cs.getBiologicalCharacteristic());
    }
    assertEquals(63, seenBs.size());
    // just to make this explicit. The new array design has to contain all the old sequences.
    assertEquals(oldbs.size(), seenBs.size());
    ee = eeService.thaw(ee);
    assertEquals(1828, ee.getRawExpressionDataVectors().size());
    ee = eePlatformSwitchService.switchExperimentToArrayDesign(ee, mergedAA);
    ee = eeService.thaw(ee);
    // check we actually got switched over.
    for (BioAssay ba : ee.getBioAssays()) {
        assertEquals(mergedAA, ba.getArrayDesignUsed());
    }
    for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
        assertEquals(mergedAA, v.getDesignElement().getArrayDesign());
    }
    assertEquals(15, ee.getQuantitationTypes().size());
    assertEquals(1828, ee.getRawExpressionDataVectors().size());
    ee = vectorMergingService.mergeVectors(ee);
    // check we got the right processed data
    Collection<ProcessedExpressionDataVector> pvs = processedExpressionDataVectorService.getProcessedDataVectors(ee);
    assertEquals(72, pvs.size());
    ee = eeService.thaw(ee);
    Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee, 50);
    assertEquals(50, processedDataArrays.size());
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) DoubleVectorValueObject(ubic.gemma.model.expression.bioAssayData.DoubleVectorValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) HashSet(java.util.HashSet) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 53 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ProcessedExpressionDataVectorServiceTest method getGeneAssociatedWithEe.

private Collection<Gene> getGeneAssociatedWithEe(ExpressionExperiment ee) {
    Collection<ArrayDesign> ads = this.expressionExperimentService.getArrayDesignsUsed(ee);
    Collection<Gene> genes = new HashSet<>();
    for (ArrayDesign ad : ads) {
        Taxon taxon = this.getTaxon("mouse");
        ad = this.arrayDesignService.thaw(ad);
        for (CompositeSequence cs : ad.getCompositeSequences()) {
            Gene g = this.getTestPersistentGene();
            BlatAssociation blata = BlatAssociation.Factory.newInstance();
            blata.setGeneProduct(g.getProducts().iterator().next());
            BlatResult br = BlatResult.Factory.newInstance();
            BioSequence bs = BioSequence.Factory.newInstance();
            bs.setName(RandomStringUtils.random(10));
            bs.setTaxon(taxon);
            bs = (BioSequence) persisterHelper.persist(bs);
            assertNotNull(bs);
            cs.setBiologicalCharacteristic(bs);
            compositeSequenceService.update(cs);
            cs = compositeSequenceService.load(cs.getId());
            assertNotNull(cs.getBiologicalCharacteristic());
            br.setQuerySequence(bs);
            blata.setBlatResult(br);
            blata.setBioSequence(bs);
            persisterHelper.persist(blata);
            genes.add(g);
        }
    }
    return genes;
}
Also used : Gene(ubic.gemma.model.genome.Gene) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) Taxon(ubic.gemma.model.genome.Taxon) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) HashSet(java.util.HashSet) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult)

Example 54 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ArrayDesignSequenceAlignmentandMappingTest method testProcessArrayDesign.

@Test
public final void testProcessArrayDesign() throws Exception {
    ad = arrayDesignService.thaw(ad);
    Collection<BioSequence> seqs = app.processArrayDesign(ad, new String[] { "testblastdb", "testblastdbPartTwo" }, FileTools.resourceToPath("/data/loader/genome/blast"), true, new MockFastaCmd(ad.getPrimaryTaxon()));
    assertNotNull(seqs);
    assertTrue(!seqs.isEmpty());
    Blat mockBlat = new MockBlat(ad.getPrimaryTaxon());
    ad = arrayDesignService.thaw(ad);
    Collection<BlatResult> blatResults = aligner.processArrayDesign(ad, mockBlat);
    assertTrue(blatResults.size() > 200);
}
Also used : Blat(ubic.gemma.core.apps.Blat) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) BlatResult(ubic.gemma.model.genome.sequenceAnalysis.BlatResult) Test(org.junit.Test)

Example 55 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ArrayDesignSequenceProcessorTest method testFetchAndLoadWithSequences.

@Test
public void testFetchAndLoadWithSequences() throws Exception {
    GeoService geoService = this.getBean(GeoService.class);
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
    @SuppressWarnings("unchecked") final Collection<ArrayDesign> ads = (Collection<ArrayDesign>) geoService.fetchAndLoad("GPL226", true, true, false);
    result = ads.iterator().next();
    result = arrayDesignService.thaw(result);
    try {
        Collection<BioSequence> res = app.processArrayDesign(result, new String[] { "testblastdb", "testblastdbPartTwo" }, FileTools.resourceToPath("/data/loader/genome/blast"), false);
        assertNotNull(res);
        for (BioSequence sequence : res) {
            assertNotNull(sequence.getSequence());
        }
    } catch (Exception e) {
        if (StringUtils.isNotBlank(e.getMessage()) && e.getMessage().contains("not found")) {
            log.error("fastacmd is not installed or is misconfigured.  Test skipped");
            return;
        }
        throw e;
    }
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) GeoService(ubic.gemma.core.loader.expression.geo.service.GeoService) Collection(java.util.Collection) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Aggregations

BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)105 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)40 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)24 Test (org.junit.Test)18 HashSet (java.util.HashSet)17 Taxon (ubic.gemma.model.genome.Taxon)15 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)12 InputStream (java.io.InputStream)11 Collection (java.util.Collection)11 HashMap (java.util.HashMap)10 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)10 GZIPInputStream (java.util.zip.GZIPInputStream)7 Gene (ubic.gemma.model.genome.Gene)7 GeoPlatform (ubic.gemma.core.loader.expression.geo.model.GeoPlatform)6 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)6 StopWatch (org.apache.commons.lang3.time.StopWatch)5 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)5 BioSequenceValueObject (ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject)5 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)4