Search in sources :

Example 6 with GeneProduct

use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.

the class CompositeSequenceMapSummary method toString.

@Override
public String toString() {
    StringBuilder buf = new StringBuilder();
    buf.append(compositeSequence.getName()).append("\t");
    if (compositeSequence.getBiologicalCharacteristic() != null) {
        buf.append(compositeSequence.getBiologicalCharacteristic().getName()).append("\t");
    } else {
        buf.append("\t");
    }
    buf.append(blatResults.size()).append("\t");
    for (GeneProduct gp : geneProducts) {
        buf.append(gp.getName()).append("|");
    }
    buf.append("\t");
    for (Gene g : genes) {
        buf.append(g.getOfficialSymbol()).append("|");
    }
    return buf.toString().replaceAll("\\|\t", "\t").replaceFirst("\\|$", "");
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) Gene(ubic.gemma.model.genome.Gene)

Example 7 with GeneProduct

use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.

the class ArrayDesignProbeMapperServiceImpl method checkForAlias.

private GeneProduct checkForAlias(GeneProduct geneProduct) {
    Collection<GeneProduct> candidates = geneProductService.findByName(geneProduct.getName(), geneProduct.getGene().getTaxon());
    if (candidates.isEmpty())
        return null;
    Gene gene = geneProduct.getGene();
    for (GeneProduct existing2 : candidates) {
        Collection<GeneAlias> aliases = existing2.getGene().getAliases();
        for (GeneAlias geneAlias : aliases) {
            if (geneAlias.getAlias().equalsIgnoreCase(gene.getOfficialSymbol())) {
                /*
                     * So, our gene products match, and the genes match but via an alias. That's pretty solid.
                     */
                ArrayDesignProbeMapperServiceImpl.log.info("Associated gene product " + geneProduct + " has a match in Gemma through an aliased gene: " + existing2);
                return existing2;
            }
        }
    }
    return null;
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) Gene(ubic.gemma.model.genome.Gene) GeneAlias(ubic.gemma.model.genome.gene.GeneAlias)

Example 8 with GeneProduct

use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.

the class ArrayDesignProbeMapperServiceImpl method printResult.

/**
 * Print line of result to STDOUT.
 */
private void printResult(CompositeSequence cs, BlatAssociation blatAssociation) {
    GeneProduct geneProduct = blatAssociation.getGeneProduct();
    Gene gene = geneProduct.getGene();
    System.out.println(cs.getName() + '\t' + blatAssociation.getBioSequence().getName() + '\t' + geneProduct.getName() + '\t' + gene.getOfficialSymbol() + "\t" + gene.getClass().getSimpleName());
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) Gene(ubic.gemma.model.genome.Gene)

Example 9 with GeneProduct

use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.

the class ArrayDesignProbeMapperServiceImpl method processArrayDesign.

@Override
public void processArrayDesign(ArrayDesign arrayDesign, Taxon taxon, File source, ExternalDatabase sourceDB, boolean ncbiIds) throws IOException {
    if (taxon == null && !ncbiIds) {
        throw new IllegalArgumentException("You must provide a taxon unless passing ncbiIds = true");
    }
    if (arrayDesign.getTechnologyType().equals(TechnologyType.NONE)) {
        throw new IllegalArgumentException("Do not use this service to process platforms that do not use an probe-based technology.");
    }
    try (BufferedReader b = new BufferedReader(new FileReader(source))) {
        String line;
        int numSkipped = 0;
        ArrayDesignProbeMapperServiceImpl.log.info("Removing any old associations");
        arrayDesignService.deleteGeneProductAssociations(arrayDesign);
        while ((line = b.readLine()) != null) {
            if (StringUtils.isBlank(line)) {
                continue;
            }
            if (line.startsWith("#")) {
                continue;
            }
            String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
            if (fields.length != 3) {
                throw new IOException("Illegal format, expected three columns, got " + fields.length);
            }
            String probeId = fields[0];
            String seqName = fields[1];
            /*
                 * FIXME. We have to allow NCBI gene ids here.
                 */
            String geneSymbol = fields[2];
            if (StringUtils.isBlank(geneSymbol)) {
                numSkipped++;
                continue;
            }
            CompositeSequence c = compositeSequenceService.findByName(arrayDesign, probeId);
            if (c == null) {
                if (ArrayDesignProbeMapperServiceImpl.log.isDebugEnabled())
                    ArrayDesignProbeMapperServiceImpl.log.debug("No probe found for '" + probeId + "' on " + arrayDesign + ", skipping");
                numSkipped++;
                continue;
            }
            // a probe can have more than one gene associated with it if so they are piped |
            Collection<Gene> geneListProbe = new HashSet<>();
            // indicate multiple genes
            Gene geneDetails;
            StringTokenizer st = new StringTokenizer(geneSymbol, "|");
            while (st.hasMoreTokens()) {
                String geneToken = st.nextToken().trim();
                if (ncbiIds) {
                    geneDetails = geneService.findByNCBIId(Integer.parseInt(geneToken));
                } else {
                    geneDetails = geneService.findByOfficialSymbol(geneToken, taxon);
                }
                if (geneDetails != null) {
                    geneListProbe.add(geneDetails);
                }
            }
            if (geneListProbe.size() == 0) {
                ArrayDesignProbeMapperServiceImpl.log.warn("No gene(s) found for '" + geneSymbol + "' in " + taxon + ", skipping");
                numSkipped++;
                continue;
            } else if (geneListProbe.size() > 1) {
                // this is a common situation, when the geneSymbol actually has |-separated genes, so no need to
                // make a
                // lot of fuss.
                ArrayDesignProbeMapperServiceImpl.log.debug("More than one gene found for '" + geneSymbol + "' in " + taxon);
            }
            BioSequence bs = c.getBiologicalCharacteristic();
            if (bs != null) {
                if (StringUtils.isNotBlank(seqName)) {
                    bs = bioSequenceService.thaw(bs);
                    if (!bs.getName().equals(seqName)) {
                        ArrayDesignProbeMapperServiceImpl.log.warn("Sequence name '" + seqName + "' given for " + probeId + " does not match existing entry " + bs.getName() + ", skipping");
                        numSkipped++;
                        continue;
                    }
                }
            // otherwise we assume everything is okay.
            } else {
                // create one based on the text provided.
                if (StringUtils.isBlank(seqName)) {
                    ArrayDesignProbeMapperServiceImpl.log.warn("You must provide sequence names for probes which are not already mapped. probeName=" + probeId + " had no sequence associated and no name provided; skipping");
                    numSkipped++;
                    continue;
                }
                bs = BioSequence.Factory.newInstance();
                bs.setName(seqName);
                bs.setTaxon(taxon);
                bs.setDescription("Imported from annotation file");
                // Placeholder.
                bs.setType(SequenceType.OTHER);
                bs = bioSequenceService.create(bs);
                c.setBiologicalCharacteristic(bs);
                compositeSequenceService.update(c);
            }
            assert bs != null;
            assert bs.getId() != null;
            for (Gene gene : geneListProbe) {
                gene = geneService.thaw(gene);
                if (gene.getProducts().size() == 0) {
                    ArrayDesignProbeMapperServiceImpl.log.warn("There are no gene products for " + gene + ", it cannot be mapped to probes. Skipping");
                    numSkipped++;
                    continue;
                }
                for (GeneProduct gp : gene.getProducts()) {
                    AnnotationAssociation association = AnnotationAssociation.Factory.newInstance();
                    association.setBioSequence(bs);
                    association.setGeneProduct(gp);
                    association.setSource(sourceDB);
                    annotationAssociationService.create(association);
                }
            }
        }
        arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
        this.deleteOldFiles(arrayDesign);
        ArrayDesignProbeMapperServiceImpl.log.info("Completed association processing for " + arrayDesign + ", " + numSkipped + " were skipped");
    }
}
Also used : AnnotationAssociation(ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation) BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) IOException(java.io.IOException) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) StringTokenizer(java.util.StringTokenizer) Gene(ubic.gemma.model.genome.Gene) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) HashSet(java.util.HashSet)

Example 10 with GeneProduct

use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.

the class NcbiGeneConverter method convert.

public Collection<GeneProduct> convert(NCBIGene2Accession acc, Gene gene) {
    Collection<GeneProduct> geneProducts = new HashSet<>();
    // RNA section
    if (acc.getRnaNucleotideAccession() != null) {
        GeneProduct rna = GeneProduct.Factory.newInstance();
        // set available fields
        rna.setNcbiGi(acc.getRnaNucleotideGI());
        rna.setGene(gene);
        rna.setName(acc.getRnaNucleotideAccession());
        rna.setType(GeneProductType.RNA);
        String description = "Imported from NCBI Gene";
        if (acc.getStatus() != null) {
            description = description + " (Refseq status: " + acc.getStatus() + ").";
        }
        if (acc.getRnaNucleotideAccession() != null) {
            DatabaseEntry accession = DatabaseEntry.Factory.newInstance();
            accession.setAccession(acc.getRnaNucleotideAccession());
            accession.setAccessionVersion(acc.getRnaNucleotideAccessionVersion());
            accession.setExternalDatabase(NcbiGeneConverter.genBank);
            if (rna.getAccessions() == null) {
                rna.setAccessions(new HashSet<DatabaseEntry>());
            }
            rna.getAccessions().add(accession);
        }
        /*
             * Fill in physical location details.
             */
        if (acc.getGenomicNucleotideAccession() != null && gene.getPhysicalLocation() != null) {
            this.getChromosomeDetails(acc, gene);
            PhysicalLocation pl = this.getPhysicalLocation(acc, gene);
            rna.setPhysicalLocation(pl);
        }
        rna.setDescription(description);
        geneProducts.add(rna);
    }
    // Protein section
    if (NcbiGeneConverter.retainProteinInformation && acc.getProteinAccession() != null) {
        GeneProduct protein = GeneProduct.Factory.newInstance();
        // set available fields
        protein.setNcbiGi(acc.getProteinGI());
        protein.setGene(gene);
        protein.setName(acc.getProteinAccession());
        protein.setType(GeneProductType.PROTEIN);
        protein.setDescription("Imported from NCBI Gene" + (acc.getStatus() != null ? " (" + acc.getStatus() + ")" : ""));
        DatabaseEntry accession = DatabaseEntry.Factory.newInstance();
        accession.setAccession(acc.getProteinAccession());
        accession.setAccessionVersion(acc.getProteinAccessionVersion());
        accession.setExternalDatabase(NcbiGeneConverter.genBank);
        Collection<DatabaseEntry> accessions = new HashSet<>();
        accessions.add(accession);
        protein.setAccessions(accessions);
        geneProducts.add(protein);
    }
    return geneProducts;
}
Also used : GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) DatabaseEntry(ubic.gemma.model.common.description.DatabaseEntry) HashSet(java.util.HashSet) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Aggregations

GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)41 Gene (ubic.gemma.model.genome.Gene)20 HashSet (java.util.HashSet)16 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)12 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)8 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)8 Test (org.junit.Test)6 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)5 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)5 AnnotationAssociation (ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation)5 HashMap (java.util.HashMap)4 PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)4 Criteria (org.hibernate.Criteria)3 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 GeneProductValueObject (ubic.gemma.model.genome.gene.GeneProductValueObject)2 BufferedReader (java.io.BufferedReader)1 FileReader (java.io.FileReader)1