Search in sources :

Example 16 with PhysicalLocation

use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.

the class NcbiGeneConverter method getPhysicalLocation.

private PhysicalLocation getPhysicalLocation(NCBIGene2Accession acc, Gene gene) {
    PhysicalLocation pl = PhysicalLocation.Factory.newInstance();
    pl.setChromosome(gene.getPhysicalLocation().getChromosome());
    if (acc.getOrientation() != null) {
        pl.setStrand(acc.getOrientation());
    }
    if (acc.getStartPosition() != null) {
        pl.setNucleotide(acc.getStartPosition());
        pl.setNucleotideLength((int) Math.abs(acc.getEndPosition() - acc.getStartPosition()));
        pl.setBin(SequenceBinUtils.binFromRange(acc.getStartPosition().intValue(), acc.getEndPosition().intValue()));
    }
    return pl;
}
Also used : PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Example 17 with PhysicalLocation

use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.

the class GoldenPathSequenceAnalysis method findClosestGene.

/**
 * Given a location, find the nearest gene on the same strand, including only "known", "refseq" or "ensembl"
 * transcripts.
 *
 * @param chromosome chromosome
 * @param queryStart start
 * @param queryEnd end
 * @param strand Either '+' or '-'
 * @param maxWindow the number of bases on each side to look, at most, in addition to looking inside the given
 *        region.
 * @return the Gene closest to the given location. This is a transient instance, not from Gemma's database.
 */
public Gene findClosestGene(String chromosome, Long queryStart, Long queryEnd, String strand, int maxWindow) {
    if (queryEnd < queryStart)
        throw new IllegalArgumentException("End must not be less than start");
    long round = 0L;
    int numRounds = 5;
    int increment = (int) (maxWindow / (double) numRounds);
    while (round < numRounds) {
        long left = queryStart + round * increment;
        long right = queryEnd + round * increment;
        Collection<GeneProduct> geneProducts = this.findRefGenesByLocation(chromosome, left, right, strand);
        geneProducts.addAll(this.findKnownGenesByLocation(chromosome, left, right, strand));
        Gene nearest = null;
        int closestSoFar = Integer.MAX_VALUE;
        for (GeneProduct geneProduct : geneProducts) {
            PhysicalLocation gpl = geneProduct.getPhysicalLocation();
            Long start = gpl.getNucleotide();
            Long end = start + gpl.getNucleotideLength();
            int gap = (int) Math.min(left - end, start - right);
            if (gap < closestSoFar) {
                closestSoFar = gap;
                nearest = geneProduct.getGene();
            }
        }
        if (nearest != null)
            return nearest;
        round++;
    }
    return null;
}
Also used : BioSequence2GeneProduct(ubic.gemma.model.association.BioSequence2GeneProduct) GeneProduct(ubic.gemma.model.genome.gene.GeneProduct) Gene(ubic.gemma.model.genome.Gene) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Example 18 with PhysicalLocation

use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.

the class GoldenPathSequenceAnalysis method getExons.

/**
 * Fill in the exon information for a gene, given the raw blobs from the GoldenPath database.
 * Be sure to pass the right Blob arguments!
 *
 * @param exonStarts starts
 * @param exonEnds ends
 * @throws SQLException sql problem
 */
private Collection<PhysicalLocation> getExons(Chromosome chrom, Blob exonStarts, Blob exonEnds) throws SQLException {
    Collection<PhysicalLocation> exons = new HashSet<>();
    if (exonStarts == null || exonEnds == null) {
        return exons;
    }
    String exonStartLocations = SQLUtils.blobToString(exonStarts);
    String exonEndLocations = SQLUtils.blobToString(exonEnds);
    int[] exonStartsInts = SequenceManipulation.blatLocationsToIntArray(exonStartLocations);
    int[] exonEndsInts = SequenceManipulation.blatLocationsToIntArray(exonEndLocations);
    assert exonStartsInts.length == exonEndsInts.length;
    for (int i = 0; i < exonEndsInts.length; i++) {
        int exonStart = exonStartsInts[i];
        int exonEnd = exonEndsInts[i];
        PhysicalLocation exon = PhysicalLocation.Factory.newInstance();
        exon.setChromosome(chrom);
        assert chrom.getTaxon() != null;
        exon.setNucleotide((long) exonStart);
        exon.setNucleotideLength(exonEnd - exonStart);
        exon.setBin(SequenceBinUtils.binFromRange(exonStart, exonEnd));
        exons.add(exon);
    }
    return exons;
}
Also used : PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation) HashSet(java.util.HashSet)

Example 19 with PhysicalLocation

use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.

the class GoldenPathSequenceAnalysis method computeLocationInGene.

/**
 * Given a location and a gene product, compute the distance from the 3' end of the gene product as well as the
 * amount of overlap. If the location has low overlaps with known exons (threshold set by
 * RECHECK_OVERLAP_THRESHOLD), we optionally search for mRNAs in the region. If there are overlapping mRNAs, we use
 * the best overlap value. If the overlap is still not high enough we optionally check ESTs.
 *
 * @param chromosome chromosome
 * @param queryStart start
 * @param queryEnd end
 * @param starts Start locations of alignments of the query (target coordinates)
 * @param sizes Sizes of alignments of the query.
 * @param geneProduct GeneProduct with which the overlap and distance is to be computed.
 * @param method method
 * @param config The useEsts and useRNA options are relevant
 * @return a ThreePrimeData object containing the results.
 */
private BlatAssociation computeLocationInGene(String chromosome, Long queryStart, Long queryEnd, String starts, String sizes, GeneProduct geneProduct, ThreePrimeDistanceMethod method, ProbeMapperConfig config) {
    assert geneProduct != null : "GeneProduct is null";
    BlatAssociation blatAssociation = BlatAssociation.Factory.newInstance();
    blatAssociation.setGeneProduct(geneProduct);
    blatAssociation.setThreePrimeDistanceMeasurementMethod(method);
    PhysicalLocation geneLoc = geneProduct.getPhysicalLocation();
    assert geneLoc != null : "PhysicalLocation for GeneProduct " + geneProduct + " is null";
    assert geneLoc.getNucleotide() != null;
    int geneStart = geneLoc.getNucleotide().intValue();
    int geneEnd = geneLoc.getNucleotide().intValue() + geneLoc.getNucleotideLength();
    int exonOverlap = 0;
    if (starts != null & sizes != null) {
        exonOverlap = SequenceManipulation.getGeneProductExonOverlap(starts, sizes, geneLoc.getStrand(), geneProduct);
        int totalSize = SequenceManipulation.totalSize(sizes);
        if (config.isUseMrnas() && exonOverlap / (double) (totalSize) < GoldenPathSequenceAnalysis.RECHECK_OVERLAP_THRESHOLD) {
            int newOverlap = this.checkRNAs(chromosome, queryStart, queryEnd, starts, sizes, exonOverlap, geneLoc.getStrand(), geneProduct.getGene());
            if (newOverlap > exonOverlap) {
                GoldenPath.log.debug("mRNA overlap was higher than primary transcript");
                exonOverlap = newOverlap;
            }
        }
        if (config.isUseEsts() && exonOverlap / (double) (totalSize) < GoldenPathSequenceAnalysis.RECHECK_OVERLAP_THRESHOLD) {
            int newOverlap = this.checkESTs(chromosome, queryStart, queryEnd, starts, sizes, exonOverlap, geneLoc.getStrand());
            if (newOverlap > exonOverlap) {
                GoldenPath.log.debug("Exon overlap was higher than mrna or  primary transcript");
                exonOverlap = newOverlap;
            }
        }
        assert exonOverlap <= totalSize;
    }
    blatAssociation.setOverlap(exonOverlap);
    if (method == ThreePrimeDistanceMethod.MIDDLE) {
        int center = SequenceManipulation.findCenter(starts, sizes);
        if (geneLoc.getStrand().equals("+")) {
            // then the 3' end is at the 'end'. : >>>>>>>>>>>>>>>>>>>>>*>>>>> (* is where we might be)
            blatAssociation.setThreePrimeDistance((long) Math.max(0, geneEnd - center));
        } else if (geneProduct.getPhysicalLocation().getStrand().equals("-")) {
            // then the 3' end is at the 'start'. : <<<*<<<<<<<<<<<<<<<<<<<<<<<
            blatAssociation.setThreePrimeDistance((long) Math.max(0, center - geneStart));
        } else {
            throw new IllegalArgumentException("Strand wasn't '+' or '-'");
        }
    } else if (method == ThreePrimeDistanceMethod.RIGHT) {
        if (geneLoc.getStrand().equals("+")) {
            // then the 3' end is at the 'end'. : >>>>>>>>>>>>>>>>>>>>>*>>>>> (* is where we might be)
            blatAssociation.setThreePrimeDistance(Math.max(0, geneEnd - queryEnd));
        } else if (geneProduct.getPhysicalLocation().getStrand().equals("-")) {
            // then the 3' end is at the 'start'. : <<<*<<<<<<<<<<<<<<<<<<<<<<<
            blatAssociation.setThreePrimeDistance(Math.max(0, queryStart - geneStart));
        } else {
            throw new IllegalArgumentException("Strand wasn't '+' or '-'");
        }
    } else if (method == ThreePrimeDistanceMethod.LEFT) {
        throw new UnsupportedOperationException("Left edge measure not supported");
    } else {
        throw new IllegalArgumentException("Unknown method");
    }
    return blatAssociation;
}
Also used : BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation)

Example 20 with PhysicalLocation

use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.

the class GeneServiceImpl method getPhysicalLocationsValueObjects.

@Override
@Transactional(readOnly = true)
public Collection<PhysicalLocationValueObject> getPhysicalLocationsValueObjects(Gene gene) {
    if (gene == null) {
        return Collections.emptyList();
    }
    gene = this.thaw(gene);
    Collection<GeneProduct> gpCollection = gene.getProducts();
    Collection<PhysicalLocationValueObject> locations = new LinkedList<>();
    if (gpCollection == null)
        return null;
    for (GeneProduct gp : gpCollection) {
        PhysicalLocation physicalLocation = gp.getPhysicalLocation();
        if (physicalLocation == null) {
            AbstractService.log.warn(gene.getOfficialSymbol() + " product " + gp.getName() + " (id:" + gp.getId() + ") has no location.");
            continue;
        }
        // Only add if the physical location of the product is different from any we already know.
        PhysicalLocationValueObject vo = new PhysicalLocationValueObject(physicalLocation);
        if (!locations.contains(vo)) {
            locations.add(vo);
        }
    }
    return locations;
}
Also used : PhysicalLocationValueObject(ubic.gemma.model.genome.PhysicalLocationValueObject) PhysicalLocation(ubic.gemma.model.genome.PhysicalLocation) Transactional(org.springframework.transaction.annotation.Transactional)

Aggregations

PhysicalLocation (ubic.gemma.model.genome.PhysicalLocation)22 Chromosome (ubic.gemma.model.genome.Chromosome)9 Gene (ubic.gemma.model.genome.Gene)7 Taxon (ubic.gemma.model.genome.Taxon)7 HashSet (java.util.HashSet)5 GeneProduct (ubic.gemma.model.genome.gene.GeneProduct)4 BlatAssociation (ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)4 BlatResult (ubic.gemma.model.genome.sequenceAnalysis.BlatResult)4 Collection (java.util.Collection)3 Test (org.junit.Test)3 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)3 BioSequence2GeneProduct (ubic.gemma.model.association.BioSequence2GeneProduct)3 HashMap (java.util.HashMap)2 DatabaseEntry (ubic.gemma.model.common.description.DatabaseEntry)2 ArrayList (java.util.ArrayList)1 Before (org.junit.Before)1 Transactional (org.springframework.transaction.annotation.Transactional)1 GeneServiceImpl (ubic.gemma.core.genome.gene.service.GeneServiceImpl)1 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)1 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)1