Search in sources :

Example 46 with Taxon

use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.

the class GeneCoreServiceTest method testSearchGenes.

@Test
public void testSearchGenes() {
    Gene gene = Gene.Factory.newInstance();
    Integer id = Integer.parseInt(RandomStringUtils.randomNumeric(5));
    gene.setNcbiGeneId(id);
    gene.setName("test_search");
    gene.setOfficialName("test_search");
    gene.setOfficialSymbol("test_search");
    Taxon human = taxonService.findByCommonName("human");
    gene.setTaxon(human);
    PhysicalLocation pl1 = PhysicalLocation.Factory.newInstance();
    Chromosome chromosome = new Chromosome("X", null, this.getTestPersistentBioSequence(), human);
    chromosome = (Chromosome) persisterHelper.persist(chromosome);
    pl1.setChromosome(chromosome);
    pl1.setNucleotide(10000010L);
    pl1.setNucleotideLength(1001);
    pl1.setStrand("-");
    gene.setPhysicalLocation(pl1);
    gene = geneDao.create(gene);
    Collection<GeneValueObject> searchResults = geneCoreService.searchGenes("test_search", 1L);
    assertNotNull(searchResults);
    GeneValueObject gvo = searchResults.iterator().next();
    assertNotNull(gvo);
    geneDao.remove(gene);
}
Also used : Gene(ubic.gemma.model.genome.Gene) Taxon(ubic.gemma.model.genome.Taxon) Chromosome(ubic.gemma.model.genome.Chromosome) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation) Test(org.junit.Test) BaseSpringContextTest(ubic.gemma.core.testing.BaseSpringContextTest)

Example 47 with Taxon

use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.

the class ArrayDesignProbeMapperServiceImpl method processArrayDesign.

@Override
public void processArrayDesign(ArrayDesign arrayDesign, ProbeMapperConfig config, boolean useDB) {
    assert config != null;
    if (arrayDesign.getTechnologyType().equals(TechnologyType.NONE)) {
        throw new IllegalArgumentException("Do not use this service to process platforms that do not use an probe-based technology.");
    }
    Collection<Taxon> taxa = arrayDesignService.getTaxa(arrayDesign.getId());
    Taxon taxon = arrayDesign.getPrimaryTaxon();
    if (taxa.size() > 1 && taxon == null) {
        throw new IllegalArgumentException("Array design has sequence from multiple taxa and has no primary taxon set: " + arrayDesign);
    }
    GoldenPathSequenceAnalysis goldenPathDb = new GoldenPathSequenceAnalysis(taxon);
    BlockingQueue<BACS> persistingQueue = new ArrayBlockingQueue<>(ArrayDesignProbeMapperServiceImpl.QUEUE_SIZE);
    AtomicBoolean generatorDone = new AtomicBoolean(false);
    AtomicBoolean loaderDone = new AtomicBoolean(false);
    this.load(persistingQueue, generatorDone, loaderDone, useDB);
    if (useDB) {
        ArrayDesignProbeMapperServiceImpl.log.info("Removing any old associations");
        arrayDesignService.deleteGeneProductAssociations(arrayDesign);
    }
    int count = 0;
    int hits = 0;
    ArrayDesignProbeMapperServiceImpl.log.info("Start processing " + arrayDesign.getCompositeSequences().size() + " probes ...");
    for (CompositeSequence compositeSequence : arrayDesign.getCompositeSequences()) {
        if (compositeSequence.getName().equals("1431126_a_at")) {
            ArrayDesignProbeMapperServiceImpl.log.debug("HERE");
        }
        Map<String, Collection<BlatAssociation>> results = this.processCompositeSequence(config, taxon, goldenPathDb, compositeSequence);
        if (results == null)
            continue;
        for (Collection<BlatAssociation> col : results.values()) {
            for (BlatAssociation association : col) {
                if (ArrayDesignProbeMapperServiceImpl.log.isDebugEnabled())
                    ArrayDesignProbeMapperServiceImpl.log.debug(association);
                persistingQueue.add(new BACS(compositeSequence, association));
            }
            ++hits;
        }
        if (++count % 200 == 0) {
            ArrayDesignProbeMapperServiceImpl.log.info("Processed " + count + " composite sequences" + " with blat results; " + hits + " mappings found.");
        }
    }
    generatorDone.set(true);
    ArrayDesignProbeMapperServiceImpl.log.info("Waiting for loading to complete ...");
    while (!loaderDone.get()) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
    ArrayDesignProbeMapperServiceImpl.log.info("Processed " + count + " composite sequences with blat results; " + hits + " mappings found.");
    arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
    this.deleteOldFiles(arrayDesign);
}
Also used : GoldenPathSequenceAnalysis(ubic.gemma.core.externalDb.GoldenPathSequenceAnalysis) Taxon(ubic.gemma.model.genome.Taxon) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) Collection(java.util.Collection) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)

Example 48 with Taxon

use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.

the class GeoBrowserServiceImpl method filterGeoRecords.

private List<GeoRecord> filterGeoRecords(List<GeoRecord> records) {
    ExternalDatabase geo = externalDatabaseService.findByName("GEO");
    Collection<GeoRecord> toRemove = new HashSet<>();
    assert geo != null;
    rec: for (GeoRecord record : records) {
        if (record.getNumSamples() < GeoBrowserServiceImpl.MIN_SAMPLES) {
            toRemove.add(record);
        }
        Collection<String> organisms = record.getOrganisms();
        if (organisms == null || organisms.size() == 0) {
            continue;
        }
        int i = 0;
        for (String string : organisms) {
            Taxon t = taxonService.findByCommonName(string);
            if (t == null) {
                t = taxonService.findByScientificName(string);
                if (t == null) {
                    toRemove.add(record);
                    continue rec;
                }
            }
            String acc = record.getGeoAccession();
            if (organisms.size() > 1) {
                acc = acc + "." + i;
            }
            DatabaseEntry de = DatabaseEntry.Factory.newInstance();
            de.setExternalDatabase(geo);
            de.setAccession(acc);
            Collection<ExpressionExperiment> ee = expressionExperimentService.findByAccession(de);
            if (!ee.isEmpty()) {
                for (ExpressionExperiment expressionExperiment : ee) {
                    record.getCorrespondingExperiments().add(expressionExperiment.getId());
                }
            }
            record.setPreviousClicks(localInfo.containsKey(acc) ? localInfo.get(acc).getPreviousClicks() : 0);
            record.setUsable(!localInfo.containsKey(acc) || localInfo.get(acc).isUsable());
            i++;
        }
    }
    records.removeAll(toRemove);
    return records;
}
Also used : GeoRecord(ubic.gemma.core.loader.expression.geo.model.GeoRecord) ExternalDatabase(ubic.gemma.model.common.description.ExternalDatabase) Taxon(ubic.gemma.model.genome.Taxon) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment)

Example 49 with Taxon

use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.

the class GeoConverterImpl method processId.

private int processId(GeoPlatform platform, ArrayDesign arrayDesign, String probeOrganismColumn, ExternalDatabase externalDb, List<String> sequences, List<String> probeOrganism, Taxon primaryTaxon, List<String> cloneIdentifiers, List<List<String>> externalRefs, Iterator<String> descIter, Pattern refSeqAccessionPattern, boolean strictSelection, List<String> skipped, Collection<CompositeSequence> compositeSequences, int i, String id) {
    String externalAccession = null;
    if (externalRefs != null) {
        externalAccession = this.getExternalAccession(externalRefs, i);
    }
    if (strictSelection && StringUtils.isBlank(externalAccession)) {
        // currently this is crafted to deal with affymetrix exon arrays, but could be expanded.
        // mrna_assignment is less strict than gene_assignement
        // salvage it if it has a gene assignment.
        // String filteringColumn = "gene_assignment";
        String filteringColumn = "gene_assignment";
        if (platform.getColumnNames().contains(filteringColumn)) {
            String cd = platform.getColumnData(filteringColumn).get(i);
            if (StringUtils.isBlank(cd) || cd.equals("---")) {
                skipped.add(id);
                if (skipped.size() % 10000 == 0) {
                    GeoConverterImpl.log.info("Skipped " + skipped.size() + " elements due to strict selection; last was " + id);
                }
                i++;
                return i;
            }
        // keep it.
        } else {
            // we just skip ones that don't have an external accession.
            return i;
        }
    // remaining case here: externalAccession is blank, but there is another column that we think saves it.
    }
    String cloneIdentifier = cloneIdentifiers == null ? null : cloneIdentifiers.get(i);
    String description = "";
    if (externalAccession != null) {
        String[] refs = externalAccession.split(",");
        if (refs.length > 1) {
            description = "Multiple external sequence references: " + externalAccession + "; ";
            externalAccession = refs[0];
        }
    }
    if (descIter != null)
        description = description + " " + descIter.next();
    CompositeSequence cs = CompositeSequence.Factory.newInstance();
    String probeName = platform.getProbeNamesInGemma().get(id);
    if (probeName == null) {
        probeName = id;
        if (GeoConverterImpl.log.isDebugEnabled())
            GeoConverterImpl.log.debug("Probe retaining original name: " + probeName);
        // must make sure this is populated.
        platform.getProbeNamesInGemma().put(id, id);
    } else {
        if (GeoConverterImpl.log.isDebugEnabled())
            GeoConverterImpl.log.debug("Found probe: " + probeName);
    }
    cs.setName(probeName);
    cs.setDescription(description);
    cs.setArrayDesign(arrayDesign);
    // LMD:1647- If There is a Organism Column given for the probe then set taxon from that overwriting platform
    // if probeOrganismColumn is set but for this probe no taxon do not set probeTaxon and thus create no
    // biosequence
    Taxon probeTaxon = Taxon.Factory.newInstance();
    if (probeOrganism != null && StringUtils.isNotBlank(probeOrganism.get(i))) {
        probeTaxon = this.convertProbeOrganism(probeOrganism.get(i));
    }
    // if there are no probe taxons then all the probes should take the taxon from the primary taxon
    if (probeOrganismColumn == null) {
        probeTaxon = primaryTaxon;
    }
    BioSequence bs = this.createMinimalBioSequence(probeTaxon);
    this.setBsProps(platform, externalDb, sequences, refSeqAccessionPattern, i, id, externalAccession, cloneIdentifier, bs);
    this.checkCs(arrayDesign, externalAccession, cloneIdentifier, cs, probeTaxon, bs);
    compositeSequences.add(cs);
    platformDesignElementMap.get(arrayDesign.getShortName()).put(probeName, cs);
    i++;
    return i;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) Taxon(ubic.gemma.model.genome.Taxon) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 50 with Taxon

use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.

the class GeoConverterImpl method convertChannel.

/**
 * GEO does not keep track of 'biomaterials' that make up different channels. Therefore the two channels effectively
 * make up a single biomaterial, as far as we're concerned. We're losing information here.
 *
 * @param sample      sample
 * @param channel     channel
 * @param bioMaterial BA
 */
private void convertChannel(GeoSample sample, GeoChannel channel, BioMaterial bioMaterial) {
    if (bioMaterial == null)
        return;
    GeoConverterImpl.log.debug("Sample: " + sample.getGeoAccession() + " - Converting channel " + channel.getSourceName());
    bioMaterial.setDescription((bioMaterial.getDescription() == null ? "" : bioMaterial.getDescription() + ";") + "Channel " + channel.getChannelNumber());
    if (!StringUtils.isBlank(channel.getGrowthProtocol())) {
        Treatment treatment = Treatment.Factory.newInstance();
        treatment.setName(sample.getGeoAccession() + " channel " + channel.getChannelNumber() + " treatment");
        treatment.setDescription(channel.getGrowthProtocol());
        bioMaterial.getTreatments().add(treatment);
    }
    if (!StringUtils.isBlank(channel.getTreatmentProtocol())) {
        Treatment treatment = Treatment.Factory.newInstance();
        treatment.setName(sample.getGeoAccession() + " channel " + channel.getChannelNumber() + " growth");
        treatment.setDescription(channel.getTreatmentProtocol());
        bioMaterial.getTreatments().add(treatment);
    }
    if (!StringUtils.isBlank(channel.getExtractProtocol())) {
        Treatment treatment = Treatment.Factory.newInstance();
        treatment.setName(sample.getGeoAccession() + " channel " + channel.getChannelNumber() + " extraction");
        treatment.setDescription(channel.getExtractProtocol());
        bioMaterial.getTreatments().add(treatment);
    }
    if (!StringUtils.isBlank(channel.getLabelProtocol())) {
        Treatment treatment = Treatment.Factory.newInstance();
        treatment.setName(sample.getGeoAccession() + " channel " + channel.getChannelNumber() + " labeling");
        treatment.setDescription(channel.getLabelProtocol());
        bioMaterial.getTreatments().add(treatment);
    }
    for (String characteristic : channel.getCharacteristics()) {
        characteristic = this.trimString(characteristic);
        /*
             * Sometimes values are like Age:8 weeks, so we can try to convert them.
             */
        String[] fields = characteristic.split(":");
        String defaultDescription = "GEO Sample characteristic";
        if (fields.length == 2) {
            String category = fields[0].trim();
            String value = fields[1].trim();
            try {
                Characteristic gemmaChar = Characteristic.Factory.newInstance();
                this.convertVariableType(gemmaChar, GeoVariable.convertStringToType(category));
                if (gemmaChar.getCategory() == null) {
                    continue;
                }
                gemmaChar.setDescription(defaultDescription);
                gemmaChar.setValue(value);
                gemmaChar.setEvidenceCode(GOEvidenceCode.IIA);
                bioMaterial.getCharacteristics().add(gemmaChar);
            } catch (Exception e) {
                // conversion didn't work, fall back.
                this.doFallback(bioMaterial, characteristic, defaultDescription);
            }
        } else {
            // no colon, just use raw (same as fallback above)
            this.doFallback(bioMaterial, characteristic, defaultDescription);
        }
    }
    if (StringUtils.isNotBlank(channel.getSourceName())) {
        Characteristic sourceChar = Characteristic.Factory.newInstance();
        sourceChar.setDescription("GEO Sample source");
        String characteristic = this.trimString(channel.getSourceName());
        sourceChar.setCategory("BioSource");
        sourceChar.setCategoryUri("http://www.ebi.ac.uk/efo/EFO_0000635");
        sourceChar.setValue(characteristic);
        sourceChar.setEvidenceCode(GOEvidenceCode.IIA);
        bioMaterial.getCharacteristics().add(sourceChar);
    }
    if (StringUtils.isNotBlank(channel.getOrganism())) {
        // if we have a case where the two channels have different taxon throw an exception.
        String currentChannelTaxon = channel.getOrganism();
        if (bioMaterial.getSourceTaxon() != null) {
            String previousChannelTaxon = bioMaterial.getSourceTaxon().getScientificName();
            if (previousChannelTaxon != null && !(previousChannelTaxon.equals(currentChannelTaxon))) {
                throw new IllegalArgumentException("Channel 1 taxon is " + bioMaterial.getSourceTaxon().getScientificName() + " Channel 2 taxon is " + currentChannelTaxon + " Check that is expected for sample " + sample.getGeoAccession());
            }
        } else {
            // get it from the channel.
            Taxon taxon = Taxon.Factory.newInstance();
            taxon.setIsSpecies(true);
            taxon.setScientificName(channel.getOrganism());
            // plausible default, doesn't matter.
            taxon.setIsGenesUsable(true);
            bioMaterial.setSourceTaxon(taxon);
        }
    }
    if (channel.getMolecule() != null) {
        // this we can convert automatically pretty easily.
        Characteristic c = channel.getMoleculeAsCharacteristic();
        bioMaterial.getCharacteristics().add(c);
    }
    if (StringUtils.isNotBlank(channel.getLabel())) {
        String characteristic = this.trimString(channel.getLabel());
        // This is typically something like "biotin-labeled nucleotides", which we can convert later.
        Characteristic labelChar = Characteristic.Factory.newInstance();
        labelChar.setDescription("GEO Sample label");
        labelChar.setCategory("LabelCompound");
        labelChar.setCategoryUri("http://www.ebi.ac.uk/efo/EFO_0000562");
        labelChar.setValue(characteristic);
        labelChar.setEvidenceCode(GOEvidenceCode.IIA);
        bioMaterial.getCharacteristics().add(labelChar);
    }
}
Also used : Treatment(ubic.gemma.model.expression.biomaterial.Treatment) Taxon(ubic.gemma.model.genome.Taxon) MalformedURLException(java.net.MalformedURLException)

Aggregations

Taxon (ubic.gemma.model.genome.Taxon)161 Gene (ubic.gemma.model.genome.Gene)34 Test (org.junit.Test)31 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)29 HashSet (java.util.HashSet)23 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)23 InputStream (java.io.InputStream)17 Before (org.junit.Before)16 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)15 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)14 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)12 StopWatch (org.apache.commons.lang3.time.StopWatch)11 Transactional (org.springframework.transaction.annotation.Transactional)11 ArrayList (java.util.ArrayList)10 File (java.io.File)9 SimpleExpressionExperimentMetaData (ubic.gemma.core.loader.expression.simple.model.SimpleExpressionExperimentMetaData)9 Chromosome (ubic.gemma.model.genome.Chromosome)8 Collection (java.util.Collection)7 Element (org.w3c.dom.Element)7 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)7