use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.
the class CompositeSequenceMapSummary method toString.
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append(compositeSequence.getName()).append("\t");
if (compositeSequence.getBiologicalCharacteristic() != null) {
buf.append(compositeSequence.getBiologicalCharacteristic().getName()).append("\t");
} else {
buf.append("\t");
}
buf.append(blatResults.size()).append("\t");
for (GeneProduct gp : geneProducts) {
buf.append(gp.getName()).append("|");
}
buf.append("\t");
for (Gene g : genes) {
buf.append(g.getOfficialSymbol()).append("|");
}
return buf.toString().replaceAll("\\|\t", "\t").replaceFirst("\\|$", "");
}
use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.
the class ArrayDesignProbeMapperServiceImpl method checkForAlias.
private GeneProduct checkForAlias(GeneProduct geneProduct) {
Collection<GeneProduct> candidates = geneProductService.findByName(geneProduct.getName(), geneProduct.getGene().getTaxon());
if (candidates.isEmpty())
return null;
Gene gene = geneProduct.getGene();
for (GeneProduct existing2 : candidates) {
Collection<GeneAlias> aliases = existing2.getGene().getAliases();
for (GeneAlias geneAlias : aliases) {
if (geneAlias.getAlias().equalsIgnoreCase(gene.getOfficialSymbol())) {
/*
* So, our gene products match, and the genes match but via an alias. That's pretty solid.
*/
ArrayDesignProbeMapperServiceImpl.log.info("Associated gene product " + geneProduct + " has a match in Gemma through an aliased gene: " + existing2);
return existing2;
}
}
}
return null;
}
use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.
the class ArrayDesignProbeMapperServiceImpl method printResult.
/**
* Print line of result to STDOUT.
*/
private void printResult(CompositeSequence cs, BlatAssociation blatAssociation) {
GeneProduct geneProduct = blatAssociation.getGeneProduct();
Gene gene = geneProduct.getGene();
System.out.println(cs.getName() + '\t' + blatAssociation.getBioSequence().getName() + '\t' + geneProduct.getName() + '\t' + gene.getOfficialSymbol() + "\t" + gene.getClass().getSimpleName());
}
use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.
the class ArrayDesignProbeMapperServiceImpl method processArrayDesign.
@Override
public void processArrayDesign(ArrayDesign arrayDesign, Taxon taxon, File source, ExternalDatabase sourceDB, boolean ncbiIds) throws IOException {
if (taxon == null && !ncbiIds) {
throw new IllegalArgumentException("You must provide a taxon unless passing ncbiIds = true");
}
if (arrayDesign.getTechnologyType().equals(TechnologyType.NONE)) {
throw new IllegalArgumentException("Do not use this service to process platforms that do not use an probe-based technology.");
}
try (BufferedReader b = new BufferedReader(new FileReader(source))) {
String line;
int numSkipped = 0;
ArrayDesignProbeMapperServiceImpl.log.info("Removing any old associations");
arrayDesignService.deleteGeneProductAssociations(arrayDesign);
while ((line = b.readLine()) != null) {
if (StringUtils.isBlank(line)) {
continue;
}
if (line.startsWith("#")) {
continue;
}
String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
if (fields.length != 3) {
throw new IOException("Illegal format, expected three columns, got " + fields.length);
}
String probeId = fields[0];
String seqName = fields[1];
/*
* FIXME. We have to allow NCBI gene ids here.
*/
String geneSymbol = fields[2];
if (StringUtils.isBlank(geneSymbol)) {
numSkipped++;
continue;
}
CompositeSequence c = compositeSequenceService.findByName(arrayDesign, probeId);
if (c == null) {
if (ArrayDesignProbeMapperServiceImpl.log.isDebugEnabled())
ArrayDesignProbeMapperServiceImpl.log.debug("No probe found for '" + probeId + "' on " + arrayDesign + ", skipping");
numSkipped++;
continue;
}
// a probe can have more than one gene associated with it if so they are piped |
Collection<Gene> geneListProbe = new HashSet<>();
// indicate multiple genes
Gene geneDetails;
StringTokenizer st = new StringTokenizer(geneSymbol, "|");
while (st.hasMoreTokens()) {
String geneToken = st.nextToken().trim();
if (ncbiIds) {
geneDetails = geneService.findByNCBIId(Integer.parseInt(geneToken));
} else {
geneDetails = geneService.findByOfficialSymbol(geneToken, taxon);
}
if (geneDetails != null) {
geneListProbe.add(geneDetails);
}
}
if (geneListProbe.size() == 0) {
ArrayDesignProbeMapperServiceImpl.log.warn("No gene(s) found for '" + geneSymbol + "' in " + taxon + ", skipping");
numSkipped++;
continue;
} else if (geneListProbe.size() > 1) {
// this is a common situation, when the geneSymbol actually has |-separated genes, so no need to
// make a
// lot of fuss.
ArrayDesignProbeMapperServiceImpl.log.debug("More than one gene found for '" + geneSymbol + "' in " + taxon);
}
BioSequence bs = c.getBiologicalCharacteristic();
if (bs != null) {
if (StringUtils.isNotBlank(seqName)) {
bs = bioSequenceService.thaw(bs);
if (!bs.getName().equals(seqName)) {
ArrayDesignProbeMapperServiceImpl.log.warn("Sequence name '" + seqName + "' given for " + probeId + " does not match existing entry " + bs.getName() + ", skipping");
numSkipped++;
continue;
}
}
// otherwise we assume everything is okay.
} else {
// create one based on the text provided.
if (StringUtils.isBlank(seqName)) {
ArrayDesignProbeMapperServiceImpl.log.warn("You must provide sequence names for probes which are not already mapped. probeName=" + probeId + " had no sequence associated and no name provided; skipping");
numSkipped++;
continue;
}
bs = BioSequence.Factory.newInstance();
bs.setName(seqName);
bs.setTaxon(taxon);
bs.setDescription("Imported from annotation file");
// Placeholder.
bs.setType(SequenceType.OTHER);
bs = bioSequenceService.create(bs);
c.setBiologicalCharacteristic(bs);
compositeSequenceService.update(c);
}
assert bs != null;
assert bs.getId() != null;
for (Gene gene : geneListProbe) {
gene = geneService.thaw(gene);
if (gene.getProducts().size() == 0) {
ArrayDesignProbeMapperServiceImpl.log.warn("There are no gene products for " + gene + ", it cannot be mapped to probes. Skipping");
numSkipped++;
continue;
}
for (GeneProduct gp : gene.getProducts()) {
AnnotationAssociation association = AnnotationAssociation.Factory.newInstance();
association.setBioSequence(bs);
association.setGeneProduct(gp);
association.setSource(sourceDB);
annotationAssociationService.create(association);
}
}
}
arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
this.deleteOldFiles(arrayDesign);
ArrayDesignProbeMapperServiceImpl.log.info("Completed association processing for " + arrayDesign + ", " + numSkipped + " were skipped");
}
}
use of ubic.gemma.model.genome.gene.GeneProduct in project Gemma by PavlidisLab.
the class NcbiGeneConverter method convert.
public Collection<GeneProduct> convert(NCBIGene2Accession acc, Gene gene) {
Collection<GeneProduct> geneProducts = new HashSet<>();
// RNA section
if (acc.getRnaNucleotideAccession() != null) {
GeneProduct rna = GeneProduct.Factory.newInstance();
// set available fields
rna.setNcbiGi(acc.getRnaNucleotideGI());
rna.setGene(gene);
rna.setName(acc.getRnaNucleotideAccession());
rna.setType(GeneProductType.RNA);
String description = "Imported from NCBI Gene";
if (acc.getStatus() != null) {
description = description + " (Refseq status: " + acc.getStatus() + ").";
}
if (acc.getRnaNucleotideAccession() != null) {
DatabaseEntry accession = DatabaseEntry.Factory.newInstance();
accession.setAccession(acc.getRnaNucleotideAccession());
accession.setAccessionVersion(acc.getRnaNucleotideAccessionVersion());
accession.setExternalDatabase(NcbiGeneConverter.genBank);
if (rna.getAccessions() == null) {
rna.setAccessions(new HashSet<DatabaseEntry>());
}
rna.getAccessions().add(accession);
}
/*
* Fill in physical location details.
*/
if (acc.getGenomicNucleotideAccession() != null && gene.getPhysicalLocation() != null) {
this.getChromosomeDetails(acc, gene);
PhysicalLocation pl = this.getPhysicalLocation(acc, gene);
rna.setPhysicalLocation(pl);
}
rna.setDescription(description);
geneProducts.add(rna);
}
// Protein section
if (NcbiGeneConverter.retainProteinInformation && acc.getProteinAccession() != null) {
GeneProduct protein = GeneProduct.Factory.newInstance();
// set available fields
protein.setNcbiGi(acc.getProteinGI());
protein.setGene(gene);
protein.setName(acc.getProteinAccession());
protein.setType(GeneProductType.PROTEIN);
protein.setDescription("Imported from NCBI Gene" + (acc.getStatus() != null ? " (" + acc.getStatus() + ")" : ""));
DatabaseEntry accession = DatabaseEntry.Factory.newInstance();
accession.setAccession(acc.getProteinAccession());
accession.setAccessionVersion(acc.getProteinAccessionVersion());
accession.setExternalDatabase(NcbiGeneConverter.genBank);
Collection<DatabaseEntry> accessions = new HashSet<>();
accessions.add(accession);
protein.setAccessions(accessions);
geneProducts.add(protein);
}
return geneProducts;
}
Aggregations