use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.
the class NcbiGeneConverter method getPhysicalLocation.
private PhysicalLocation getPhysicalLocation(NCBIGene2Accession acc, Gene gene) {
PhysicalLocation pl = PhysicalLocation.Factory.newInstance();
pl.setChromosome(gene.getPhysicalLocation().getChromosome());
if (acc.getOrientation() != null) {
pl.setStrand(acc.getOrientation());
}
if (acc.getStartPosition() != null) {
pl.setNucleotide(acc.getStartPosition());
pl.setNucleotideLength((int) Math.abs(acc.getEndPosition() - acc.getStartPosition()));
pl.setBin(SequenceBinUtils.binFromRange(acc.getStartPosition().intValue(), acc.getEndPosition().intValue()));
}
return pl;
}
use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.
the class GoldenPathSequenceAnalysis method findClosestGene.
/**
* Given a location, find the nearest gene on the same strand, including only "known", "refseq" or "ensembl"
* transcripts.
*
* @param chromosome chromosome
* @param queryStart start
* @param queryEnd end
* @param strand Either '+' or '-'
* @param maxWindow the number of bases on each side to look, at most, in addition to looking inside the given
* region.
* @return the Gene closest to the given location. This is a transient instance, not from Gemma's database.
*/
public Gene findClosestGene(String chromosome, Long queryStart, Long queryEnd, String strand, int maxWindow) {
if (queryEnd < queryStart)
throw new IllegalArgumentException("End must not be less than start");
long round = 0L;
int numRounds = 5;
int increment = (int) (maxWindow / (double) numRounds);
while (round < numRounds) {
long left = queryStart + round * increment;
long right = queryEnd + round * increment;
Collection<GeneProduct> geneProducts = this.findRefGenesByLocation(chromosome, left, right, strand);
geneProducts.addAll(this.findKnownGenesByLocation(chromosome, left, right, strand));
Gene nearest = null;
int closestSoFar = Integer.MAX_VALUE;
for (GeneProduct geneProduct : geneProducts) {
PhysicalLocation gpl = geneProduct.getPhysicalLocation();
Long start = gpl.getNucleotide();
Long end = start + gpl.getNucleotideLength();
int gap = (int) Math.min(left - end, start - right);
if (gap < closestSoFar) {
closestSoFar = gap;
nearest = geneProduct.getGene();
}
}
if (nearest != null)
return nearest;
round++;
}
return null;
}
use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.
the class GoldenPathSequenceAnalysis method getExons.
/**
* Fill in the exon information for a gene, given the raw blobs from the GoldenPath database.
* Be sure to pass the right Blob arguments!
*
* @param exonStarts starts
* @param exonEnds ends
* @throws SQLException sql problem
*/
private Collection<PhysicalLocation> getExons(Chromosome chrom, Blob exonStarts, Blob exonEnds) throws SQLException {
Collection<PhysicalLocation> exons = new HashSet<>();
if (exonStarts == null || exonEnds == null) {
return exons;
}
String exonStartLocations = SQLUtils.blobToString(exonStarts);
String exonEndLocations = SQLUtils.blobToString(exonEnds);
int[] exonStartsInts = SequenceManipulation.blatLocationsToIntArray(exonStartLocations);
int[] exonEndsInts = SequenceManipulation.blatLocationsToIntArray(exonEndLocations);
assert exonStartsInts.length == exonEndsInts.length;
for (int i = 0; i < exonEndsInts.length; i++) {
int exonStart = exonStartsInts[i];
int exonEnd = exonEndsInts[i];
PhysicalLocation exon = PhysicalLocation.Factory.newInstance();
exon.setChromosome(chrom);
assert chrom.getTaxon() != null;
exon.setNucleotide((long) exonStart);
exon.setNucleotideLength(exonEnd - exonStart);
exon.setBin(SequenceBinUtils.binFromRange(exonStart, exonEnd));
exons.add(exon);
}
return exons;
}
use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.
the class GoldenPathSequenceAnalysis method computeLocationInGene.
/**
* Given a location and a gene product, compute the distance from the 3' end of the gene product as well as the
* amount of overlap. If the location has low overlaps with known exons (threshold set by
* RECHECK_OVERLAP_THRESHOLD), we optionally search for mRNAs in the region. If there are overlapping mRNAs, we use
* the best overlap value. If the overlap is still not high enough we optionally check ESTs.
*
* @param chromosome chromosome
* @param queryStart start
* @param queryEnd end
* @param starts Start locations of alignments of the query (target coordinates)
* @param sizes Sizes of alignments of the query.
* @param geneProduct GeneProduct with which the overlap and distance is to be computed.
* @param method method
* @param config The useEsts and useRNA options are relevant
* @return a ThreePrimeData object containing the results.
*/
private BlatAssociation computeLocationInGene(String chromosome, Long queryStart, Long queryEnd, String starts, String sizes, GeneProduct geneProduct, ThreePrimeDistanceMethod method, ProbeMapperConfig config) {
assert geneProduct != null : "GeneProduct is null";
BlatAssociation blatAssociation = BlatAssociation.Factory.newInstance();
blatAssociation.setGeneProduct(geneProduct);
blatAssociation.setThreePrimeDistanceMeasurementMethod(method);
PhysicalLocation geneLoc = geneProduct.getPhysicalLocation();
assert geneLoc != null : "PhysicalLocation for GeneProduct " + geneProduct + " is null";
assert geneLoc.getNucleotide() != null;
int geneStart = geneLoc.getNucleotide().intValue();
int geneEnd = geneLoc.getNucleotide().intValue() + geneLoc.getNucleotideLength();
int exonOverlap = 0;
if (starts != null & sizes != null) {
exonOverlap = SequenceManipulation.getGeneProductExonOverlap(starts, sizes, geneLoc.getStrand(), geneProduct);
int totalSize = SequenceManipulation.totalSize(sizes);
if (config.isUseMrnas() && exonOverlap / (double) (totalSize) < GoldenPathSequenceAnalysis.RECHECK_OVERLAP_THRESHOLD) {
int newOverlap = this.checkRNAs(chromosome, queryStart, queryEnd, starts, sizes, exonOverlap, geneLoc.getStrand(), geneProduct.getGene());
if (newOverlap > exonOverlap) {
GoldenPath.log.debug("mRNA overlap was higher than primary transcript");
exonOverlap = newOverlap;
}
}
if (config.isUseEsts() && exonOverlap / (double) (totalSize) < GoldenPathSequenceAnalysis.RECHECK_OVERLAP_THRESHOLD) {
int newOverlap = this.checkESTs(chromosome, queryStart, queryEnd, starts, sizes, exonOverlap, geneLoc.getStrand());
if (newOverlap > exonOverlap) {
GoldenPath.log.debug("Exon overlap was higher than mrna or primary transcript");
exonOverlap = newOverlap;
}
}
assert exonOverlap <= totalSize;
}
blatAssociation.setOverlap(exonOverlap);
if (method == ThreePrimeDistanceMethod.MIDDLE) {
int center = SequenceManipulation.findCenter(starts, sizes);
if (geneLoc.getStrand().equals("+")) {
// then the 3' end is at the 'end'. : >>>>>>>>>>>>>>>>>>>>>*>>>>> (* is where we might be)
blatAssociation.setThreePrimeDistance((long) Math.max(0, geneEnd - center));
} else if (geneProduct.getPhysicalLocation().getStrand().equals("-")) {
// then the 3' end is at the 'start'. : <<<*<<<<<<<<<<<<<<<<<<<<<<<
blatAssociation.setThreePrimeDistance((long) Math.max(0, center - geneStart));
} else {
throw new IllegalArgumentException("Strand wasn't '+' or '-'");
}
} else if (method == ThreePrimeDistanceMethod.RIGHT) {
if (geneLoc.getStrand().equals("+")) {
// then the 3' end is at the 'end'. : >>>>>>>>>>>>>>>>>>>>>*>>>>> (* is where we might be)
blatAssociation.setThreePrimeDistance(Math.max(0, geneEnd - queryEnd));
} else if (geneProduct.getPhysicalLocation().getStrand().equals("-")) {
// then the 3' end is at the 'start'. : <<<*<<<<<<<<<<<<<<<<<<<<<<<
blatAssociation.setThreePrimeDistance(Math.max(0, queryStart - geneStart));
} else {
throw new IllegalArgumentException("Strand wasn't '+' or '-'");
}
} else if (method == ThreePrimeDistanceMethod.LEFT) {
throw new UnsupportedOperationException("Left edge measure not supported");
} else {
throw new IllegalArgumentException("Unknown method");
}
return blatAssociation;
}
use of ubic.gemma.model.genome.PhysicalLocation in project Gemma by PavlidisLab.
the class GeneServiceImpl method getPhysicalLocationsValueObjects.
@Override
@Transactional(readOnly = true)
public Collection<PhysicalLocationValueObject> getPhysicalLocationsValueObjects(Gene gene) {
if (gene == null) {
return Collections.emptyList();
}
gene = this.thaw(gene);
Collection<GeneProduct> gpCollection = gene.getProducts();
Collection<PhysicalLocationValueObject> locations = new LinkedList<>();
if (gpCollection == null)
return null;
for (GeneProduct gp : gpCollection) {
PhysicalLocation physicalLocation = gp.getPhysicalLocation();
if (physicalLocation == null) {
AbstractService.log.warn(gene.getOfficialSymbol() + " product " + gp.getName() + " (id:" + gp.getId() + ") has no location.");
continue;
}
// Only add if the physical location of the product is different from any we already know.
PhysicalLocationValueObject vo = new PhysicalLocationValueObject(physicalLocation);
if (!locations.contains(vo)) {
locations.add(vo);
}
}
return locations;
}
Aggregations