use of ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation in project Gemma by PavlidisLab.
the class ArrayDesignProbeMapperServiceImpl method processArrayDesign.
@Override
public void processArrayDesign(ArrayDesign arrayDesign, Taxon taxon, File source, ExternalDatabase sourceDB, boolean ncbiIds) throws IOException {
if (taxon == null && !ncbiIds) {
throw new IllegalArgumentException("You must provide a taxon unless passing ncbiIds = true");
}
if (arrayDesign.getTechnologyType().equals(TechnologyType.NONE)) {
throw new IllegalArgumentException("Do not use this service to process platforms that do not use an probe-based technology.");
}
try (BufferedReader b = new BufferedReader(new FileReader(source))) {
String line;
int numSkipped = 0;
ArrayDesignProbeMapperServiceImpl.log.info("Removing any old associations");
arrayDesignService.deleteGeneProductAssociations(arrayDesign);
while ((line = b.readLine()) != null) {
if (StringUtils.isBlank(line)) {
continue;
}
if (line.startsWith("#")) {
continue;
}
String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
if (fields.length != 3) {
throw new IOException("Illegal format, expected three columns, got " + fields.length);
}
String probeId = fields[0];
String seqName = fields[1];
/*
* FIXME. We have to allow NCBI gene ids here.
*/
String geneSymbol = fields[2];
if (StringUtils.isBlank(geneSymbol)) {
numSkipped++;
continue;
}
CompositeSequence c = compositeSequenceService.findByName(arrayDesign, probeId);
if (c == null) {
if (ArrayDesignProbeMapperServiceImpl.log.isDebugEnabled())
ArrayDesignProbeMapperServiceImpl.log.debug("No probe found for '" + probeId + "' on " + arrayDesign + ", skipping");
numSkipped++;
continue;
}
// a probe can have more than one gene associated with it if so they are piped |
Collection<Gene> geneListProbe = new HashSet<>();
// indicate multiple genes
Gene geneDetails;
StringTokenizer st = new StringTokenizer(geneSymbol, "|");
while (st.hasMoreTokens()) {
String geneToken = st.nextToken().trim();
if (ncbiIds) {
geneDetails = geneService.findByNCBIId(Integer.parseInt(geneToken));
} else {
geneDetails = geneService.findByOfficialSymbol(geneToken, taxon);
}
if (geneDetails != null) {
geneListProbe.add(geneDetails);
}
}
if (geneListProbe.size() == 0) {
ArrayDesignProbeMapperServiceImpl.log.warn("No gene(s) found for '" + geneSymbol + "' in " + taxon + ", skipping");
numSkipped++;
continue;
} else if (geneListProbe.size() > 1) {
// this is a common situation, when the geneSymbol actually has |-separated genes, so no need to
// make a
// lot of fuss.
ArrayDesignProbeMapperServiceImpl.log.debug("More than one gene found for '" + geneSymbol + "' in " + taxon);
}
BioSequence bs = c.getBiologicalCharacteristic();
if (bs != null) {
if (StringUtils.isNotBlank(seqName)) {
bs = bioSequenceService.thaw(bs);
if (!bs.getName().equals(seqName)) {
ArrayDesignProbeMapperServiceImpl.log.warn("Sequence name '" + seqName + "' given for " + probeId + " does not match existing entry " + bs.getName() + ", skipping");
numSkipped++;
continue;
}
}
// otherwise we assume everything is okay.
} else {
// create one based on the text provided.
if (StringUtils.isBlank(seqName)) {
ArrayDesignProbeMapperServiceImpl.log.warn("You must provide sequence names for probes which are not already mapped. probeName=" + probeId + " had no sequence associated and no name provided; skipping");
numSkipped++;
continue;
}
bs = BioSequence.Factory.newInstance();
bs.setName(seqName);
bs.setTaxon(taxon);
bs.setDescription("Imported from annotation file");
// Placeholder.
bs.setType(SequenceType.OTHER);
bs = bioSequenceService.create(bs);
c.setBiologicalCharacteristic(bs);
compositeSequenceService.update(c);
}
assert bs != null;
assert bs.getId() != null;
for (Gene gene : geneListProbe) {
gene = geneService.thaw(gene);
if (gene.getProducts().size() == 0) {
ArrayDesignProbeMapperServiceImpl.log.warn("There are no gene products for " + gene + ", it cannot be mapped to probes. Skipping");
numSkipped++;
continue;
}
for (GeneProduct gp : gene.getProducts()) {
AnnotationAssociation association = AnnotationAssociation.Factory.newInstance();
association.setBioSequence(bs);
association.setGeneProduct(gp);
association.setSource(sourceDB);
annotationAssociationService.create(association);
}
}
}
arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
this.deleteOldFiles(arrayDesign);
ArrayDesignProbeMapperServiceImpl.log.info("Completed association processing for " + arrayDesign + ", " + numSkipped + " were skipped");
}
}
use of ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation in project Gemma by PavlidisLab.
the class AnnotationAssociationDaoImpl method thaw.
@Override
public void thaw(final Collection<AnnotationAssociation> anCollection) {
if (anCollection == null)
return;
HibernateTemplate template = this.getHibernateTemplate();
template.executeWithNativeSession(new org.springframework.orm.hibernate3.HibernateCallback<Object>() {
@Override
public Object doInHibernate(org.hibernate.Session session) throws org.hibernate.HibernateException {
for (Object object : anCollection) {
AnnotationAssociation blatAssociation = (AnnotationAssociation) object;
if (blatAssociation.getId() == null)
continue;
AnnotationAssociationDaoImpl.this.thawAssociation(session, blatAssociation);
}
return null;
}
});
}
use of ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation in project Gemma by PavlidisLab.
the class CompositeSequenceServiceImpl method getGeneMappingSummary.
@Override
public Collection<GeneMappingSummary> getGeneMappingSummary(CompositeSequence cs) {
BioSequence biologicalCharacteristic = cs.getBiologicalCharacteristic();
biologicalCharacteristic = bioSequenceService.thaw(biologicalCharacteristic);
Map<Integer, GeneMappingSummary> results = new HashMap<>();
if (biologicalCharacteristic == null || biologicalCharacteristic.getBioSequence2GeneProduct() == null) {
return results.values();
}
Collection<BioSequence2GeneProduct> bs2gps = biologicalCharacteristic.getBioSequence2GeneProduct();
for (BioSequence2GeneProduct bs2gp : bs2gps) {
GeneProductValueObject geneProduct = new GeneProductValueObject(geneProductService.thaw(bs2gp.getGeneProduct()));
GeneValueObject gene = new GeneValueObject(bs2gp.getGeneProduct().getGene());
BlatResultValueObject blatResult = null;
if ((bs2gp instanceof BlatAssociation)) {
BlatAssociation blatAssociation = (BlatAssociation) bs2gp;
blatResult = new BlatResultValueObject(blatResultService.thaw(blatAssociation.getBlatResult()));
} else if (bs2gp instanceof AnnotationAssociation) {
/*
* Make a dummy blat result
*/
blatResult = new BlatResultValueObject();
blatResult.setQuerySequence(BioSequenceValueObject.fromEntity(biologicalCharacteristic));
blatResult.setId(biologicalCharacteristic.getId());
}
if (blatResult == null) {
continue;
}
if (results.containsKey(ProbeMapUtils.hashBlatResult(blatResult))) {
results.get(ProbeMapUtils.hashBlatResult(blatResult)).addGene(geneProduct, gene);
} else {
GeneMappingSummary summary = new GeneMappingSummary();
summary.addGene(geneProduct, gene);
summary.setBlatResult(blatResult);
summary.setCompositeSequence(this.loadValueObject(cs));
results.put(ProbeMapUtils.hashBlatResult(blatResult), summary);
}
}
this.addBlatResultsLackingGenes(cs, results);
if (results.size() == 0) {
// add a 'dummy' that at least contains the information about the CS. This is a bit of a hack...
GeneMappingSummary summary = new GeneMappingSummary();
summary.setCompositeSequence(this.loadValueObject(cs));
BlatResultValueObject newInstance = new BlatResultValueObject(-1L);
newInstance.setQuerySequence(BioSequenceValueObject.fromEntity(biologicalCharacteristic));
summary.setBlatResult(newInstance);
results.put(ProbeMapUtils.hashBlatResult(newInstance), summary);
}
return results.values();
}
use of ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation in project Gemma by PavlidisLab.
the class ArrayDesignDaoImpl method deleteGeneProductAssociations.
@Override
public void deleteGeneProductAssociations(ArrayDesign arrayDesign) {
this.getSessionFactory().getCurrentSession().buildLockRequest(LockOptions.UPGRADE).setLockMode(LockMode.PESSIMISTIC_WRITE).lock(arrayDesign);
// this query is polymorphic, id gets the annotation associations?
// language=HQL
final String queryString = "select ba from CompositeSequence cs " + "inner join cs.biologicalCharacteristic bs, BioSequence2GeneProduct ba " + "where ba.bioSequence = bs and cs.arrayDesign=:arrayDesign";
List blatAssociations = this.getSessionFactory().getCurrentSession().createQuery(queryString).setParameter("arrayDesign", arrayDesign).list();
if (!blatAssociations.isEmpty()) {
for (Object r : blatAssociations) {
this.getSessionFactory().getCurrentSession().delete(r);
}
AbstractDao.log.info("Done deleting " + blatAssociations.size() + " blat associations for " + arrayDesign);
}
this.getSessionFactory().getCurrentSession().flush();
final String annotationAssociationQueryString = "select ba from CompositeSequence cs " + " inner join cs.biologicalCharacteristic bs, AnnotationAssociation ba " + " where ba.bioSequence = bs and cs.arrayDesign=:arrayDesign";
// noinspection unchecked
List<AnnotationAssociation> annotAssociations = this.getSessionFactory().getCurrentSession().createQuery(annotationAssociationQueryString).setParameter("arrayDesign", arrayDesign).list();
if (!annotAssociations.isEmpty()) {
for (AnnotationAssociation r : annotAssociations) {
this.getSessionFactory().getCurrentSession().delete(r);
}
AbstractDao.log.info("Done deleting " + annotAssociations.size() + " AnnotationAssociations for " + arrayDesign);
}
}
use of ubic.gemma.model.genome.sequenceAnalysis.AnnotationAssociation in project Gemma by PavlidisLab.
the class GeneProductServiceImpl method remove.
@Override
@Transactional
public void remove(Collection<GeneProduct> toRemove) {
Collection<BlatAssociation> associations = this.blatAssociationDao.find(toRemove);
if (!associations.isEmpty()) {
AbstractService.log.info("Removing " + associations.size() + " blat associations involving up to " + toRemove.size() + " products.");
this.blatAssociationDao.remove(associations);
}
Collection<AnnotationAssociation> annotationAssociations = this.annotationAssociationDao.find(toRemove);
if (!annotationAssociations.isEmpty()) {
AbstractService.log.info("Removing " + annotationAssociations.size() + " annotationAssociations involving up to " + toRemove.size() + " products.");
this.annotationAssociationDao.remove(annotationAssociations);
}
// remove associations to database entries that are still associated with sequences.
for (GeneProduct gp : toRemove) {
gp = this.thaw(gp);
Collection<DatabaseEntry> accessions = gp.getAccessions();
Collection<DatabaseEntry> toRelease = new HashSet<>();
for (DatabaseEntry de : accessions) {
if (this.bioSequenceDao.findByAccession(de) != null) {
toRelease.add(de);
}
}
gp.getAccessions().removeAll(toRelease);
this.geneProductDao.remove(gp);
}
}
Aggregations