Search in sources :

Example 51 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class ArrayDesignProbeMapperServiceImpl method printResult.

/**
 * Print line of result to STDOUT.
 */
private void printResult(CompositeSequence cs, BlatAssociation blatAssociation) {
    GeneProduct geneProduct = blatAssociation.getGeneProduct();
    Gene gene = geneProduct.getGene();
    System.out.println(cs.getName() + '\t' + blatAssociation.getBioSequence().getName() + '\t' + geneProduct.getName() + '\t' + gene.getOfficialSymbol() + "\t" + gene.getClass().getSimpleName());
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) Gene(ubic.gemma.model.genome.Gene)

Example 52 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class ArrayDesignProbeMapperServiceImpl method processArrayDesign.

@Override
public void processArrayDesign(ArrayDesign arrayDesign, Taxon taxon, File source, ExternalDatabase sourceDB, boolean ncbiIds) throws IOException {
    if (taxon == null && !ncbiIds) {
        throw new IllegalArgumentException("You must provide a taxon unless passing ncbiIds = true");
    }
    if (arrayDesign.getTechnologyType().equals(TechnologyType.NONE)) {
        throw new IllegalArgumentException("Do not use this service to process platforms that do not use an probe-based technology.");
    }
    try (BufferedReader b = new BufferedReader(new FileReader(source))) {
        String line;
        int numSkipped = 0;
        ArrayDesignProbeMapperServiceImpl.log.info("Removing any old associations");
        arrayDesignService.deleteGeneProductAssociations(arrayDesign);
        while ((line = b.readLine()) != null) {
            if (StringUtils.isBlank(line)) {
                continue;
            }
            if (line.startsWith("#")) {
                continue;
            }
            String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
            if (fields.length != 3) {
                throw new IOException("Illegal format, expected three columns, got " + fields.length);
            }
            String probeId = fields[0];
            String seqName = fields[1];
            /*
                 * FIXME. We have to allow NCBI gene ids here.
                 */
            String geneSymbol = fields[2];
            if (StringUtils.isBlank(geneSymbol)) {
                numSkipped++;
                continue;
            }
            CompositeSequence c = compositeSequenceService.findByName(arrayDesign, probeId);
            if (c == null) {
                if (ArrayDesignProbeMapperServiceImpl.log.isDebugEnabled())
                    ArrayDesignProbeMapperServiceImpl.log.debug("No probe found for '" + probeId + "' on " + arrayDesign + ", skipping");
                numSkipped++;
                continue;
            }
            // a probe can have more than one gene associated with it if so they are piped |
            Collection<Gene> geneListProbe = new HashSet<>();
            // indicate multiple genes
            Gene geneDetails;
            StringTokenizer st = new StringTokenizer(geneSymbol, "|");
            while (st.hasMoreTokens()) {
                String geneToken = st.nextToken().trim();
                if (ncbiIds) {
                    geneDetails = geneService.findByNCBIId(Integer.parseInt(geneToken));
                } else {
                    geneDetails = geneService.findByOfficialSymbol(geneToken, taxon);
                }
                if (geneDetails != null) {
                    geneListProbe.add(geneDetails);
                }
            }
            if (geneListProbe.size() == 0) {
                ArrayDesignProbeMapperServiceImpl.log.warn("No gene(s) found for '" + geneSymbol + "' in " + taxon + ", skipping");
                numSkipped++;
                continue;
            } else if (geneListProbe.size() > 1) {
                // this is a common situation, when the geneSymbol actually has |-separated genes, so no need to
                // make a
                // lot of fuss.
                ArrayDesignProbeMapperServiceImpl.log.debug("More than one gene found for '" + geneSymbol + "' in " + taxon);
            }
            BioSequence bs = c.getBiologicalCharacteristic();
            if (bs != null) {
                if (StringUtils.isNotBlank(seqName)) {
                    bs = bioSequenceService.thaw(bs);
                    if (!bs.getName().equals(seqName)) {
                        ArrayDesignProbeMapperServiceImpl.log.warn("Sequence name '" + seqName + "' given for " + probeId + " does not match existing entry " + bs.getName() + ", skipping");
                        numSkipped++;
                        continue;
                    }
                }
            // otherwise we assume everything is okay.
            } else {
                // create one based on the text provided.
                if (StringUtils.isBlank(seqName)) {
                    ArrayDesignProbeMapperServiceImpl.log.warn("You must provide sequence names for probes which are not already mapped. probeName=" + probeId + " had no sequence associated and no name provided; skipping");
                    numSkipped++;
                    continue;
                }
                bs = BioSequence.Factory.newInstance();
                bs.setName(seqName);
                bs.setTaxon(taxon);
                bs.setDescription("Imported from annotation file");
                // Placeholder.
                bs.setType(SequenceType.OTHER);
                bs = bioSequenceService.create(bs);
                c.setBiologicalCharacteristic(bs);
                compositeSequenceService.update(c);
            }
            assert bs != null;
            assert bs.getId() != null;
            for (Gene gene : geneListProbe) {
                gene = geneService.thaw(gene);
                if (gene.getProducts().size() == 0) {
                    ArrayDesignProbeMapperServiceImpl.log.warn("There are no gene products for " + gene + ", it cannot be mapped to probes. Skipping");
                    numSkipped++;
                    continue;
                }
                for (GeneProduct gp : gene.getProducts()) {
                    AnnotationAssociation association = AnnotationAssociation.Factory.newInstance();
                    association.setBioSequence(bs);
                    association.setGeneProduct(gp);
                    association.setSource(sourceDB);
                    annotationAssociationService.create(association);
                }
            }
        }
        arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
        this.deleteOldFiles(arrayDesign);
        ArrayDesignProbeMapperServiceImpl.log.info("Completed association processing for " + arrayDesign + ", " + numSkipped + " were skipped");
    }
}
Also used : AnnotationAssociation(ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) IOException(java.io.IOException) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) StringTokenizer(java.util.StringTokenizer) Gene(ubic.gemma.model.genome.Gene) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) HashSet(java.util.HashSet)

Example 53 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class NcbiGeneConverter method convert.

public Gene convert(NCBIGeneInfo info) {
    Gene gene = Gene.Factory.newInstance();
    gene.setNcbiGeneId(Integer.parseInt(info.getGeneId()));
    gene.setName(info.getDefaultSymbol());
    gene.setOfficialSymbol(info.getDefaultSymbol());
    gene.setOfficialName(info.getDescription());
    gene.setEnsemblId(info.getEnsemblId());
    /*
         * NOTE we allow multiple discontinued or previous ids, separated by commas. This is a hack to account for cases
         * uncovered recently...can be minimized by running this regularly.
         */
    if (info.getHistory() != null) {
        assert info.getHistory().getCurrentId() == null || info.getGeneId().equals(info.getHistory().getCurrentId());
        assert info.getHistory().getPreviousIds() != null;
        if (!info.getHistory().getPreviousIds().isEmpty()) {
            String previousIds = StringUtils.join(info.getHistory().getPreviousIds(), ",");
            gene.setPreviousNcbiId(previousIds);
        }
    } else if (StringUtils.isNotBlank(info.getDiscontinuedId())) {
        if (NcbiGeneConverter.log.isDebugEnabled())
            NcbiGeneConverter.log.debug("Gene matches a gene that was discontinued: " + gene + " matches gene that had id " + info.getDiscontinuedId());
        gene.setPreviousNcbiId(info.getDiscontinuedId());
    }
    gene.setDescription("Imported from NCBI gene; Nomenclature status: " + info.getNomenclatureStatus());
    Taxon t = Taxon.Factory.newInstance();
    t.setNcbiId(info.getTaxId());
    t.setIsGenesUsable(false);
    t.setIsSpecies(true);
    gene.setTaxon(t);
    /*
         * We are going to stop maintaining this information
         */
    PhysicalLocation pl = PhysicalLocation.Factory.newInstance();
    Chromosome chrom = new Chromosome(info.getChromosome(), t);
    pl.setChromosome(chrom);
    gene.setPhysicalLocation(pl);
    Collection<GeneAlias> aliases = gene.getAliases();
    for (String alias : info.getSynonyms()) {
        GeneAlias newAlias = GeneAlias.Factory.newInstance();
        newAlias.setAlias(alias);
        aliases.add(newAlias);
    }
    for (String dbname : info.getDbXrefs().keySet()) {
        if (!dbname.equalsIgnoreCase("Ensembl"))
            continue;
        String identifier = info.getDbXrefs().get(dbname);
        DatabaseEntry crossref = DatabaseEntry.Factory.newInstance();
        crossref.setAccession(identifier);
        crossref.setExternalDatabase(NcbiGeneConverter.getEnsembl());
        gene.getAccessions().add(crossref);
    }
    return gene;
}
Also used : Gene(ubic.gemma.model.genome.Gene) GeneAlias(ubic.gemma.model.genome.gene.GeneAlias) Taxon(ubic.gemma.model.genome.Taxon) Chromosome(ubic.gemma.model.genome.Chromosome) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Example 54 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class NcbiGeneLoader method doLoad.

void doLoad(final BlockingQueue<Gene> geneQueue) {
    StopWatch timer = new StopWatch();
    timer.start();
    while (!(converterDone.get() && geneQueue.isEmpty())) {
        Gene gene = null;
        try {
            // the converted genes.
            gene = geneQueue.poll();
            if (gene == null) {
                continue;
            }
            persisterHelper.persistOrUpdate(gene);
            if (++loadedGeneCount % 1000 == 0 || timer.getTime() > 30 * 1000) {
                NcbiGeneLoader.log.info("Processed " + loadedGeneCount + " genes. Queue has " + geneQueue.size() + " items; last gene: " + gene);
                timer.reset();
                timer.start();
            }
        } catch (Exception e) {
            NcbiGeneLoader.log.error("Error while loading gene: " + gene + ": " + e.getMessage(), e);
            loaderDone.set(true);
            throw new RuntimeException(e);
        }
    }
    NcbiGeneLoader.log.info("Loaded " + loadedGeneCount + " genes. ");
    loaderDone.set(true);
}
Also used : Gene(ubic.gemma.model.genome.Gene) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 55 with Gene

use of ubic.gemma.model.genome.Gene in project Gemma by PavlidisLab.

the class StringProteinInteractionLoader method doLoad.

/**
 * Poll the queue to see if any Gene2GeneProteinAssociation to load into database. If so firstly check to see if the
 * genes are in the gemma db as these identifiers came from biomart If both genes found load.
 *
 * @param gene2GeneProteinAssociationQueue queue of Gene2GeneProteinAssociation to load
 */
private void doLoad(final BlockingQueue<Gene2GeneProteinAssociation> gene2GeneProteinAssociationQueue) {
    StringProteinInteractionLoader.log.info("starting processing ");
    while (!(converterDone.get() && gene2GeneProteinAssociationQueue.isEmpty())) {
        try {
            Gene2GeneProteinAssociation gene2GeneProteinAssociation = gene2GeneProteinAssociationQueue.poll();
            if (gene2GeneProteinAssociation == null) {
                continue;
            }
            // check they are genes gemma knows about
            Gene geneOne = geneService.findByNCBIId(gene2GeneProteinAssociation.getFirstGene().getNcbiGeneId());
            Gene geneTwo = geneService.findByNCBIId(gene2GeneProteinAssociation.getSecondGene().getNcbiGeneId());
            if (geneOne == null) {
                StringProteinInteractionLoader.log.warn("Gene with NCBI id=" + gene2GeneProteinAssociation.getFirstGene().getNcbiGeneId() + " not in Gemma");
                continue;
            }
            if (geneTwo == null) {
                StringProteinInteractionLoader.log.warn("Gene with NCBI id=" + gene2GeneProteinAssociation.getSecondGene().getNcbiGeneId() + " not in Gemma");
                continue;
            }
            FieldUtils.writeField(gene2GeneProteinAssociation, "firstGene", geneOne, true);
            FieldUtils.writeField(gene2GeneProteinAssociation, "secondGene", geneTwo, true);
            persisterHelper.persist(gene2GeneProteinAssociation);
            if (++loadedGeneCount % 1000 == 0) {
                StringProteinInteractionLoader.log.info("Proceesed " + loadedGeneCount + " protein protein interactions. " + "Current queue has " + gene2GeneProteinAssociationQueue.size() + " items.");
            }
        } catch (Exception e) {
            StringProteinInteractionLoader.log.error(e, e);
            loaderDone.set(true);
            throw new RuntimeException(e);
        }
    }
    StringProteinInteractionLoader.log.info("Loaded " + loadedGeneCount + " protein protein interactions. ");
    loaderDone.set(true);
}
Also used : Gene(ubic.gemma.model.genome.Gene) Gene2GeneProteinAssociation(ubic.gemma.model.association.Gene2GeneProteinAssociation) IOException(java.io.IOException)

Aggregations

Gene (ubic.gemma.model.genome.Gene)186 Taxon (ubic.gemma.model.genome.Taxon)34 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)32 StopWatch (org.apache.commons.lang3.time.StopWatch)31 Test (org.junit.Test)24 HashSet (java.util.HashSet)23 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)20 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)18 Element (org.w3c.dom.Element)16 ArrayList (java.util.ArrayList)13 Transactional (org.springframework.transaction.annotation.Transactional)12 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)12 Collection (java.util.Collection)11 OntologyTerm (ubic.basecode.ontology.model.OntologyTerm)11 CharacteristicValueObject (ubic.gemma.model.genome.gene.phenotype.valueObject.CharacteristicValueObject)10 HashMap (java.util.HashMap)8 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)8 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)7 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)7 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)7