Search in sources :

Example 1 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class SearchServiceImpl method compassBioSequenceSearch.

/**
 * A compass backed search that finds biosequences that match the search string. Searches the gene and probe indexes
 * for matches then converts those results to biosequences
 *
 * @param previousGeneSearchResults Can be null, otherwise used to avoid a second search for genes. The biosequences
 *                                  for the genes are added to the final results.
 */
private Collection<SearchResult> compassBioSequenceSearch(SearchSettings settings, Collection<SearchResult> previousGeneSearchResults) {
    Collection<SearchResult> results = this.compassSearch(compassBiosequence, settings);
    Collection<SearchResult> geneResults;
    if (previousGeneSearchResults == null) {
        SearchServiceImpl.log.info("Biosequence Search:  running gene search with " + settings.getQuery());
        geneResults = this.compassGeneSearch(settings);
    } else {
        SearchServiceImpl.log.info("Biosequence Search:  using previous results");
        geneResults = previousGeneSearchResults;
    }
    Map<Gene, SearchResult> genes = new HashMap<>();
    for (SearchResult sr : geneResults) {
        Object resultObject = sr.getResultObject();
        if (Gene.class.isAssignableFrom(resultObject.getClass())) {
            genes.put((Gene) resultObject, sr);
        } else {
            // see bug 1774 -- may not be happening any more.
            SearchServiceImpl.log.warn("Expected a Gene, got a " + resultObject.getClass() + " on query=" + settings.getQuery());
        }
    }
    Map<Gene, Collection<BioSequence>> seqsFromDb = bioSequenceService.findByGenes(genes.keySet());
    for (Gene gene : seqsFromDb.keySet()) {
        List<BioSequence> bs = new ArrayList<>(seqsFromDb.get(gene));
        // bioSequenceService.thawRawAndProcessed( bs );
        results.addAll(this.dbHitsToSearchResult(bs, genes.get(gene), null));
    }
    return results;
}
Also used : Gene(ubic.gemma.model.genome.Gene) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) BibliographicReferenceValueObject(ubic.gemma.model.common.description.BibliographicReferenceValueObject) SearchSettingsValueObject(ubic.gemma.model.common.search.SearchSettingsValueObject) BioSequenceValueObject(ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject) GeneEvidenceValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.GeneEvidenceValueObject) CharacteristicValueObject(ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)

Example 2 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrixTest method testConstructExpressionDataDoubleMatrixWithGeoValues.

/**
 * This is a self-contained test. That is, it does not depend on the setup in onSetUpInTransaction}. It tests
 * creating an {@link ExpressionDataDoubleMatrix} using real values from the Gene Expression Omnibus (GEO). That is,
 * we have obtained information from GSE994. The probe sets used are 218120_s_at and 121_at, and the samples used
 * are GSM15697 and GSM15744. Specifically, we the Gemma objects that correspond to the GEO objects are:
 * DesignElement 1 = 218120_s_at, DesignElement 2 = 121_at
 * BioAssay 1 = "Current Smoker 73", BioAssay 2 = "Former Smoker 34"
 * BioMaterial 1 = "GSM15697", BioMaterial 2 = "GSM15744"
 * BioAssayDimension = "GSM15697, GSM15744" (the names of all the biomaterials).
 */
@Test
public void testConstructExpressionDataDoubleMatrixWithGeoValues() {
    ByteArrayConverter bac = new ByteArrayConverter();
    ee = ExpressionExperiment.Factory.newInstance();
    QuantitationType qt = QuantitationType.Factory.newInstance();
    qt.setName("VALUE");
    qt.setIsBackgroundSubtracted(false);
    qt.setIsNormalized(false);
    qt.setIsBackground(false);
    qt.setIsRatio(false);
    qt.setIsPreferred(true);
    qt.setIsMaskedPreferred(false);
    qt.setRepresentation(PrimitiveType.DOUBLE);
    BioAssayDimension bioAssayDimension = BioAssayDimension.Factory.newInstance();
    bioAssayDimension.setName("GSM15697, GSM15744");
    List<BioAssay> assays = new ArrayList<>();
    BioAssay assay1 = BioAssay.Factory.newInstance();
    assay1.setName("Current Smoker 73");
    BioMaterial sample1 = BioMaterial.Factory.newInstance();
    sample1.setName("GSM15697");
    assay1.setSampleUsed(sample1);
    assays.add(assay1);
    BioAssay assay2 = BioAssay.Factory.newInstance();
    assay2.setName("Former Smoker 34");
    BioMaterial sample2 = BioMaterial.Factory.newInstance();
    sample2.setName("GSM15744");
    assay2.setSampleUsed(sample2);
    assays.add(assay2);
    bioAssayDimension.setBioAssays(assays);
    RawExpressionDataVector vector1 = RawExpressionDataVector.Factory.newInstance();
    double[] ddata1 = { 74.9, 101.7 };
    byte[] bdata1 = bac.doubleArrayToBytes(ddata1);
    vector1.setData(bdata1);
    vector1.setQuantitationType(qt);
    vector1.setBioAssayDimension(bioAssayDimension);
    RawExpressionDataVector vector2 = RawExpressionDataVector.Factory.newInstance();
    double[] ddata2 = { 404.6, 318.7 };
    byte[] bdata2 = bac.doubleArrayToBytes(ddata2);
    vector2.setData(bdata2);
    vector2.setQuantitationType(qt);
    vector2.setBioAssayDimension(bioAssayDimension);
    ArrayDesign ad = ArrayDesign.Factory.newInstance();
    ad.setName("test ar");
    CompositeSequence de1 = CompositeSequence.Factory.newInstance();
    de1.setName("218120_s_at");
    vector1.setDesignElement(de1);
    BioSequence bs1 = BioSequence.Factory.newInstance();
    bs1.setName("test1");
    de1.setBiologicalCharacteristic(bs1);
    de1.setArrayDesign(ad);
    CompositeSequence de2 = CompositeSequence.Factory.newInstance();
    de2.setName("121_at");
    BioSequence bs2 = BioSequence.Factory.newInstance();
    bs2.setName("test2");
    de2.setBiologicalCharacteristic(bs2);
    de2.setArrayDesign(ad);
    vector2.setDesignElement(de2);
    Collection<RawExpressionDataVector> eeVectors = new LinkedHashSet<>();
    eeVectors.add(vector1);
    eeVectors.add(vector2);
    ee.setRawExpressionDataVectors(eeVectors);
    ExpressionDataDoubleMatrix expressionDataMatrix = new ExpressionDataDoubleMatrix(eeVectors);
    assertNotNull(expressionDataMatrix);
    assertEquals(expressionDataMatrix.rows(), 2);
    assertEquals(expressionDataMatrix.columns(), 2);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) RawExpressionDataVector(ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 3 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class ArrayDesignSequenceProcessorTest method testFetchAndLoadWithIdentifiers.

@Test
public void testFetchAndLoadWithIdentifiers() throws Exception {
    String fastacmdExe = Settings.getString(SimpleFastaCmd.FASTA_CMD_ENV_VAR);
    if (fastacmdExe == null) {
        log.warn("No fastacmd executable is configured, skipping test");
        return;
    }
    File fi = new File(fastacmdExe);
    if (!fi.canRead()) {
        log.warn(fastacmdExe + " not found, skipping test");
        return;
    }
    GeoService geoService = this.getBean(GeoService.class);
    geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
    @SuppressWarnings("unchecked") final Collection<ArrayDesign> ads = (Collection<ArrayDesign>) geoService.fetchAndLoad("GPL226", true, true, false, true, true);
    result = ads.iterator().next();
    result = arrayDesignService.thaw(result);
    // have to specify taxon as this has two taxons in it
    try (InputStream f = this.getClass().getResourceAsStream("/data/loader/expression/arrayDesign/identifierTest.txt")) {
        Collection<BioSequence> res = app.processArrayDesign(result, f, new String[] { "testblastdb", "testblastdbPartTwo" }, FileTools.resourceToPath("/data/loader/genome/blast"), taxon, true);
        assertNotNull(res);
        for (BioSequence sequence : res) {
            assertNotNull(sequence.getSequence());
        }
        for (CompositeSequence cs : result.getCompositeSequences()) {
            assert cs.getBiologicalCharacteristic() != null;
        }
    }
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) InputStream(java.io.InputStream) GeoService(ubic.gemma.core.loader.expression.geo.service.GeoService) Collection(java.util.Collection) File(java.io.File) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeoDomainObjectGeneratorLocal(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal) AbstractGeoServiceTest(ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest) Test(org.junit.Test)

Example 4 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class IlluminaProbeReaderTest method testReadInputStream.

/**
 * Class under test for Map read(InputStream)
 *
 * @throws Exception when there is a problem
 */
public final void testReadInputStream() throws Exception {
    TestCase.assertTrue(apr != null);
    apr.parse(is);
    String expectedValue = "GTGGCTGCCTTCCCAGCAGTCTCTACTTCAGCATATCTGGGAGCCAGAAG";
    TestCase.assertTrue(apr.containsKey("GI_42655756-S"));
    Reporter r = apr.get("GI_42655756-S");
    TestCase.assertNotNull("Reporter GI_42655756-S not found", r);
    BioSequence bs = r.getImmobilizedCharacteristic();
    TestCase.assertNotNull("Immobilized characteristic was null", bs);
    String actualValue = bs.getSequence().toUpperCase();
    TestCase.assertEquals("Wrong sequence returned", expectedValue, actualValue);
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence)

Example 5 with BioSequence

use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.

the class MockFastaCmd method makeSequence.

private BioSequence makeSequence(Object object) {
    BioSequence result = BioSequence.Factory.newInstance(taxon);
    result.setName(object.toString());
    result.setLength(100L);
    result.setPolymerType(PolymerType.DNA);
    result.setIsApproximateLength(false);
    result.setIsCircular(false);
    result.setFractionRepeats(0.0);
    result.setSequence(RandomStringUtils.random(100, "ATGC"));
    DatabaseEntry genbank = ExternalDatabaseUtils.getGenbankAccession(object.toString());
    result.setSequenceDatabaseEntry(genbank);
    return result;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry)

Aggregations

BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)105 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)40 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)24 Test (org.junit.Test)18 HashSet (java.util.HashSet)17 Taxon (ubic.gemma.model.genome.Taxon)15 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)12 InputStream (java.io.InputStream)11 Collection (java.util.Collection)11 HashMap (java.util.HashMap)10 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)10 GZIPInputStream (java.util.zip.GZIPInputStream)7 Gene (ubic.gemma.model.genome.Gene)7 GeoPlatform (ubic.gemma.core.loader.expression.geo.model.GeoPlatform)6 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)6 StopWatch (org.apache.commons.lang3.time.StopWatch)5 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)5 BioSequenceValueObject (ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject)5 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)5 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)4