use of ubic.gemma.model.association.BioSequence2GeneProduct in project Gemma by PavlidisLab.
the class ArrayDesignAnnotationServiceImpl method generateAnnotationFile.
@Override
public int generateAnnotationFile(Writer writer, Map<CompositeSequence, Collection<BioSequence2GeneProduct>> genesWithSpecificity, OutputType ty) throws IOException {
int compositeSequencesProcessed = 0;
int simple = 0;
int empty = 0;
int complex = 0;
// we used LinkedHasSets to keep everything in a predictable order - this is important for the gene symbols,
// descriptions and NCBIIds (but not important for GO terms). When a probe maps to multiple genes, we list those
// three items for the genes in the same order. There is a feature request to make
// the order deterministic (i.e.,lexicographic sort), this could be done by using little gene objects or whatever.
Collection<OntologyTerm> goTerms = new LinkedHashSet<>();
Set<String> genes = new LinkedHashSet<>();
Set<String> geneDescriptions = new LinkedHashSet<>();
Set<String> geneIds = new LinkedHashSet<>();
Set<String> ncbiIds = new LinkedHashSet<>();
Map<Gene, Collection<VocabCharacteristic>> goMappings = this.getGOMappings(genesWithSpecificity);
for (CompositeSequence cs : genesWithSpecificity.keySet()) {
Collection<BioSequence2GeneProduct> geneclusters = genesWithSpecificity.get(cs);
if (++compositeSequencesProcessed % 2000 == 0 && ArrayDesignAnnotationServiceImpl.log.isInfoEnabled()) {
ArrayDesignAnnotationServiceImpl.log.info("Processed " + compositeSequencesProcessed + "/" + genesWithSpecificity.size() + " compositeSequences " + empty + " empty; " + simple + " simple; " + complex + " complex;");
}
if (geneclusters.isEmpty()) {
this.writeAnnotationLine(writer, cs.getName(), "", "", null, "", "");
empty++;
continue;
}
if (geneclusters.size() == 1) {
// common case, do it quickly.
BioSequence2GeneProduct b2g = geneclusters.iterator().next();
Gene g = b2g.getGeneProduct().getGene();
goTerms = this.getGoTerms(goMappings.get(g), ty);
String gemmaId = g.getId() == null ? "" : g.getId().toString();
String ncbiId = g.getNcbiGeneId() == null ? "" : g.getNcbiGeneId().toString();
this.writeAnnotationLine(writer, cs.getName(), g.getOfficialSymbol(), g.getOfficialName(), goTerms, gemmaId, ncbiId);
simple++;
continue;
}
goTerms.clear();
genes.clear();
geneDescriptions.clear();
geneIds.clear();
ncbiIds.clear();
for (BioSequence2GeneProduct bioSequence2GeneProduct : geneclusters) {
Gene g = bioSequence2GeneProduct.getGeneProduct().getGene();
genes.add(g.getOfficialSymbol());
geneDescriptions.add(g.getOfficialName());
geneIds.add(g.getId().toString());
Integer ncbiGeneId = g.getNcbiGeneId();
if (ncbiGeneId != null) {
ncbiIds.add(ncbiGeneId.toString());
}
goTerms.addAll(this.getGoTerms(goMappings.get(g), ty));
}
String geneString = StringUtils.join(genes, "|");
String geneDescriptionString = StringUtils.join(geneDescriptions, "|");
String geneIdsString = StringUtils.join(geneIds, "|");
String ncbiIdsString = StringUtils.join(ncbiIds, "|");
this.writeAnnotationLine(writer, cs.getName(), geneString, geneDescriptionString, goTerms, geneIdsString, ncbiIdsString);
complex++;
}
writer.close();
return compositeSequencesProcessed;
}
use of ubic.gemma.model.association.BioSequence2GeneProduct in project Gemma by PavlidisLab.
the class GoldenPathSequenceAnalysis method findAssociations.
/**
* Given a physical location, identify overlapping genes or predicted genes.
*
* @param chromosome The chromosome name (the organism is set by the constructor)
* @param queryStart The start base of the region to query (the start of the alignment to the genome)
* @param queryEnd The end base of the region to query (the end of the alignment to the genome)
* @param starts Locations of alignment block starts in target. (comma-delimited from blat)
* @param sizes Sizes of alignment blocks (comma-delimited from blat)
* @param strand Either + or - indicating the strand to look on, or null to search both strands.
* @param method The constant representing the method to use to locate the 3' distance.
* @param config configuration
* @return A list of BioSequence2GeneProduct objects. The distance stored by a ThreePrimeData will be 0 if the
* sequence overhangs the found genes (rather than providing a negative distance). If no genes are found,
* the result is null; These are transient instances, not from Gemma's database
*/
public Collection<BlatAssociation> findAssociations(String chromosome, Long queryStart, Long queryEnd, String starts, String sizes, String strand, ThreePrimeDistanceMethod method, ProbeMapperConfig config) {
if (GoldenPath.log.isDebugEnabled())
GoldenPath.log.debug("Seeking gene overlaps with: chrom=" + chromosome + " start=" + queryStart + " end=" + queryEnd + " strand=" + strand);
if (queryEnd < queryStart)
throw new IllegalArgumentException("End must not be less than start");
/*
* These are transient instances only
*/
Collection<GeneProduct> geneProducts = new HashSet<>();
if (config.isUseRefGene()) {
// starting with refgene means we can get the correct transcript name etc.
geneProducts.addAll(this.findRefGenesByLocation(chromosome, queryStart, queryEnd, strand));
}
if (config.isUseKnownGene()) {
// get known genes as well, in case all we got was an intron. Currently does not work with rat (rn6)
geneProducts.addAll(this.findKnownGenesByLocation(chromosome, queryStart, queryEnd, strand));
}
if (geneProducts.size() == 0)
return null;
Collection<BlatAssociation> results = new HashSet<>();
for (GeneProduct geneProduct : geneProducts) {
if (GoldenPath.log.isDebugEnabled())
GoldenPath.log.debug(geneProduct);
BlatAssociation blatAssociation = this.computeLocationInGene(chromosome, queryStart, queryEnd, starts, sizes, geneProduct, method, config);
/*
* We check against the actual threshold later. We can't fully check it now because not all the slots are
* populated yet.
*/
if (config.getMinimumExonOverlapFraction() > 0.0 && blatAssociation.getOverlap() == 0) {
GoldenPath.log.debug("Result failed to meet exon overlap threshold (0)");
continue;
}
results.add(blatAssociation);
}
return results;
}
use of ubic.gemma.model.association.BioSequence2GeneProduct in project Gemma by PavlidisLab.
the class CompositeSequenceDaoImpl method thaw.
@Override
public CompositeSequence thaw(CompositeSequence compositeSequence) {
if (compositeSequence == null)
return null;
// noinspection unchecked
Hibernate.initialize(compositeSequence.getBiologicalCharacteristic());
if (compositeSequence.getBiologicalCharacteristic() != null) {
Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getTaxon());
if (compositeSequence.getBiologicalCharacteristic().getTaxon() != null) {
Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getTaxon().getExternalDatabase());
Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getTaxon().getParentTaxon());
if (compositeSequence.getBiologicalCharacteristic().getTaxon().getParentTaxon() != null)
Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getTaxon().getParentTaxon().getExternalDatabase());
}
Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry());
if (compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry() != null) {
Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry().getExternalDatabase());
}
Hibernate.initialize(compositeSequence.getBiologicalCharacteristic().getBioSequence2GeneProduct());
for (BioSequence2GeneProduct bsgp : compositeSequence.getBiologicalCharacteristic().getBioSequence2GeneProduct()) {
if (bsgp != null) {
Hibernate.initialize(bsgp);
if (bsgp.getGeneProduct() != null) {
Hibernate.initialize(bsgp.getGeneProduct());
Hibernate.initialize(bsgp.getGeneProduct().getGene());
if (bsgp.getGeneProduct().getGene() != null) {
Hibernate.initialize(bsgp.getGeneProduct().getGene().getAliases());
Hibernate.initialize(bsgp.getGeneProduct().getGene().getAccessions());
}
}
}
}
}
Hibernate.initialize(compositeSequence.getArrayDesign());
return compositeSequence;
}
use of ubic.gemma.model.association.BioSequence2GeneProduct in project Gemma by PavlidisLab.
the class CompositeSequenceDaoImpl method thaw.
@Override
public void thaw(final Collection<CompositeSequence> compositeSequences) {
HibernateTemplate templ = this.getHibernateTemplate();
templ.executeWithNativeSession(new org.springframework.orm.hibernate3.HibernateCallback<Object>() {
@Override
public Object doInHibernate(org.hibernate.Session session) throws org.hibernate.HibernateException {
int i = 0;
int numToDo = compositeSequences.size();
for (CompositeSequence cs : compositeSequences) {
session.buildLockRequest(LockOptions.NONE).lock(cs);
Hibernate.initialize(cs.getArrayDesign());
BioSequence bs = cs.getBiologicalCharacteristic();
if (bs == null) {
continue;
}
session.buildLockRequest(LockOptions.NONE).lock(bs);
Hibernate.initialize(bs);
Hibernate.initialize(bs.getTaxon());
DatabaseEntry dbEntry = bs.getSequenceDatabaseEntry();
if (dbEntry != null) {
Hibernate.initialize(dbEntry);
Hibernate.initialize(dbEntry.getExternalDatabase());
session.evict(dbEntry);
session.evict(dbEntry.getExternalDatabase());
}
if (bs.getBioSequence2GeneProduct() == null) {
continue;
}
for (BioSequence2GeneProduct bs2gp : bs.getBioSequence2GeneProduct()) {
if (bs2gp == null) {
continue;
}
GeneProduct geneProduct = bs2gp.getGeneProduct();
if (geneProduct != null && geneProduct.getGene() != null) {
Gene g = geneProduct.getGene();
g.getAliases().size();
session.evict(g);
session.evict(geneProduct);
}
}
if (++i % 2000 == 0) {
AbstractDao.log.info("Progress: " + i + "/" + numToDo + "...");
try {
Thread.sleep(10);
} catch (InterruptedException e) {
//
}
}
session.evict(bs);
}
session.clear();
return null;
}
});
}
use of ubic.gemma.model.association.BioSequence2GeneProduct in project Gemma by PavlidisLab.
the class CompositeSequenceDaoImpl method batchGetGenesWithSpecificity.
/**
* @param batch of composite sequences to process
* @param results - adding to this
*/
private void batchGetGenesWithSpecificity(Collection<CompositeSequence> batch, Map<CompositeSequence, Collection<BioSequence2GeneProduct>> results) {
if (batch.size() == 0) {
return;
}
// language=HQL
final String queryString = "select cs,bas from CompositeSequence cs, BioSequence2GeneProduct bas inner join cs.biologicalCharacteristic bs " + "inner join fetch bas.geneProduct gp inner join fetch gp.gene gene " + "where bas.bioSequence=bs and cs in (:cs)";
List<?> qr = this.getHibernateTemplate().findByNamedParam(queryString, "cs", batch);
for (Object o : qr) {
Object[] oa = (Object[]) o;
CompositeSequence csa = (CompositeSequence) oa[0];
BioSequence2GeneProduct ba = (BioSequence2GeneProduct) oa[1];
if (ba instanceof BlatAssociation) {
BlatResult blatResult = ((BlatAssociation) ba).getBlatResult();
PhysicalLocation pl = blatResult.getTargetAlignedRegion();
/*
* We didn't always used to fill in the targetAlignedRegion ... this is just in case.
*/
if (pl == null) {
pl = PhysicalLocation.Factory.newInstance();
pl.setChromosome(blatResult.getTargetChromosome());
pl.setNucleotide(blatResult.getTargetStart());
pl.setNucleotideLength(blatResult.getTargetEnd().intValue() - blatResult.getTargetStart().intValue());
pl.setStrand(blatResult.getStrand());
// Note: not bothering to fill in the bin.
}
}
if (!results.containsKey(csa)) {
results.put(csa, new HashSet<BioSequence2GeneProduct>());
}
results.get(csa).add(ba);
}
/*
* This is kind of important. We ensure we return an empty map for probes that do not have a mapping.
*/
for (CompositeSequence cs : batch) {
if (!results.containsKey(cs)) {
results.put(cs, new HashSet<BioSequence2GeneProduct>());
}
}
}
Aggregations