use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.
the class GeneCoreServiceTest method testSearchGenes.
@Test
public void testSearchGenes() {
Gene gene = Gene.Factory.newInstance();
Integer id = Integer.parseInt(RandomStringUtils.randomNumeric(5));
gene.setNcbiGeneId(id);
gene.setName("test_search");
gene.setOfficialName("test_search");
gene.setOfficialSymbol("test_search");
Taxon human = taxonService.findByCommonName("human");
gene.setTaxon(human);
PhysicalLocation pl1 = PhysicalLocation.Factory.newInstance();
Chromosome chromosome = new Chromosome("X", null, this.getTestPersistentBioSequence(), human);
chromosome = (Chromosome) persisterHelper.persist(chromosome);
pl1.setChromosome(chromosome);
pl1.setNucleotide(10000010L);
pl1.setNucleotideLength(1001);
pl1.setStrand("-");
gene.setPhysicalLocation(pl1);
gene = geneDao.create(gene);
Collection<GeneValueObject> searchResults = geneCoreService.searchGenes("test_search", 1L);
assertNotNull(searchResults);
GeneValueObject gvo = searchResults.iterator().next();
assertNotNull(gvo);
geneDao.remove(gene);
}
use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.
the class ArrayDesignProbeMapperServiceImpl method processArrayDesign.
@Override
public void processArrayDesign(ArrayDesign arrayDesign, ProbeMapperConfig config, boolean useDB) {
assert config != null;
if (arrayDesign.getTechnologyType().equals(TechnologyType.NONE)) {
throw new IllegalArgumentException("Do not use this service to process platforms that do not use an probe-based technology.");
}
Collection<Taxon> taxa = arrayDesignService.getTaxa(arrayDesign.getId());
Taxon taxon = arrayDesign.getPrimaryTaxon();
if (taxa.size() > 1 && taxon == null) {
throw new IllegalArgumentException("Array design has sequence from multiple taxa and has no primary taxon set: " + arrayDesign);
}
GoldenPathSequenceAnalysis goldenPathDb = new GoldenPathSequenceAnalysis(taxon);
BlockingQueue<BACS> persistingQueue = new ArrayBlockingQueue<>(ArrayDesignProbeMapperServiceImpl.QUEUE_SIZE);
AtomicBoolean generatorDone = new AtomicBoolean(false);
AtomicBoolean loaderDone = new AtomicBoolean(false);
this.load(persistingQueue, generatorDone, loaderDone, useDB);
if (useDB) {
ArrayDesignProbeMapperServiceImpl.log.info("Removing any old associations");
arrayDesignService.deleteGeneProductAssociations(arrayDesign);
}
int count = 0;
int hits = 0;
ArrayDesignProbeMapperServiceImpl.log.info("Start processing " + arrayDesign.getCompositeSequences().size() + " probes ...");
for (CompositeSequence compositeSequence : arrayDesign.getCompositeSequences()) {
if (compositeSequence.getName().equals("1431126_a_at")) {
ArrayDesignProbeMapperServiceImpl.log.debug("HERE");
}
Map<String, Collection<BlatAssociation>> results = this.processCompositeSequence(config, taxon, goldenPathDb, compositeSequence);
if (results == null)
continue;
for (Collection<BlatAssociation> col : results.values()) {
for (BlatAssociation association : col) {
if (ArrayDesignProbeMapperServiceImpl.log.isDebugEnabled())
ArrayDesignProbeMapperServiceImpl.log.debug(association);
persistingQueue.add(new BACS(compositeSequence, association));
}
++hits;
}
if (++count % 200 == 0) {
ArrayDesignProbeMapperServiceImpl.log.info("Processed " + count + " composite sequences" + " with blat results; " + hits + " mappings found.");
}
}
generatorDone.set(true);
ArrayDesignProbeMapperServiceImpl.log.info("Waiting for loading to complete ...");
while (!loaderDone.get()) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
ArrayDesignProbeMapperServiceImpl.log.info("Processed " + count + " composite sequences with blat results; " + hits + " mappings found.");
arrayDesignReportService.generateArrayDesignReport(arrayDesign.getId());
this.deleteOldFiles(arrayDesign);
}
use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.
the class GeoBrowserServiceImpl method filterGeoRecords.
private List<GeoRecord> filterGeoRecords(List<GeoRecord> records) {
ExternalDatabase geo = externalDatabaseService.findByName("GEO");
Collection<GeoRecord> toRemove = new HashSet<>();
assert geo != null;
rec: for (GeoRecord record : records) {
if (record.getNumSamples() < GeoBrowserServiceImpl.MIN_SAMPLES) {
toRemove.add(record);
}
Collection<String> organisms = record.getOrganisms();
if (organisms == null || organisms.size() == 0) {
continue;
}
int i = 0;
for (String string : organisms) {
Taxon t = taxonService.findByCommonName(string);
if (t == null) {
t = taxonService.findByScientificName(string);
if (t == null) {
toRemove.add(record);
continue rec;
}
}
String acc = record.getGeoAccession();
if (organisms.size() > 1) {
acc = acc + "." + i;
}
DatabaseEntry de = DatabaseEntry.Factory.newInstance();
de.setExternalDatabase(geo);
de.setAccession(acc);
Collection<ExpressionExperiment> ee = expressionExperimentService.findByAccession(de);
if (!ee.isEmpty()) {
for (ExpressionExperiment expressionExperiment : ee) {
record.getCorrespondingExperiments().add(expressionExperiment.getId());
}
}
record.setPreviousClicks(localInfo.containsKey(acc) ? localInfo.get(acc).getPreviousClicks() : 0);
record.setUsable(!localInfo.containsKey(acc) || localInfo.get(acc).isUsable());
i++;
}
}
records.removeAll(toRemove);
return records;
}
use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.
the class GeoConverterImpl method processId.
private int processId(GeoPlatform platform, ArrayDesign arrayDesign, String probeOrganismColumn, ExternalDatabase externalDb, List<String> sequences, List<String> probeOrganism, Taxon primaryTaxon, List<String> cloneIdentifiers, List<List<String>> externalRefs, Iterator<String> descIter, Pattern refSeqAccessionPattern, boolean strictSelection, List<String> skipped, Collection<CompositeSequence> compositeSequences, int i, String id) {
String externalAccession = null;
if (externalRefs != null) {
externalAccession = this.getExternalAccession(externalRefs, i);
}
if (strictSelection && StringUtils.isBlank(externalAccession)) {
// currently this is crafted to deal with affymetrix exon arrays, but could be expanded.
// mrna_assignment is less strict than gene_assignement
// salvage it if it has a gene assignment.
// String filteringColumn = "gene_assignment";
String filteringColumn = "gene_assignment";
if (platform.getColumnNames().contains(filteringColumn)) {
String cd = platform.getColumnData(filteringColumn).get(i);
if (StringUtils.isBlank(cd) || cd.equals("---")) {
skipped.add(id);
if (skipped.size() % 10000 == 0) {
GeoConverterImpl.log.info("Skipped " + skipped.size() + " elements due to strict selection; last was " + id);
}
i++;
return i;
}
// keep it.
} else {
// we just skip ones that don't have an external accession.
return i;
}
// remaining case here: externalAccession is blank, but there is another column that we think saves it.
}
String cloneIdentifier = cloneIdentifiers == null ? null : cloneIdentifiers.get(i);
String description = "";
if (externalAccession != null) {
String[] refs = externalAccession.split(",");
if (refs.length > 1) {
description = "Multiple external sequence references: " + externalAccession + "; ";
externalAccession = refs[0];
}
}
if (descIter != null)
description = description + " " + descIter.next();
CompositeSequence cs = CompositeSequence.Factory.newInstance();
String probeName = platform.getProbeNamesInGemma().get(id);
if (probeName == null) {
probeName = id;
if (GeoConverterImpl.log.isDebugEnabled())
GeoConverterImpl.log.debug("Probe retaining original name: " + probeName);
// must make sure this is populated.
platform.getProbeNamesInGemma().put(id, id);
} else {
if (GeoConverterImpl.log.isDebugEnabled())
GeoConverterImpl.log.debug("Found probe: " + probeName);
}
cs.setName(probeName);
cs.setDescription(description);
cs.setArrayDesign(arrayDesign);
// LMD:1647- If There is a Organism Column given for the probe then set taxon from that overwriting platform
// if probeOrganismColumn is set but for this probe no taxon do not set probeTaxon and thus create no
// biosequence
Taxon probeTaxon = Taxon.Factory.newInstance();
if (probeOrganism != null && StringUtils.isNotBlank(probeOrganism.get(i))) {
probeTaxon = this.convertProbeOrganism(probeOrganism.get(i));
}
// if there are no probe taxons then all the probes should take the taxon from the primary taxon
if (probeOrganismColumn == null) {
probeTaxon = primaryTaxon;
}
BioSequence bs = this.createMinimalBioSequence(probeTaxon);
this.setBsProps(platform, externalDb, sequences, refSeqAccessionPattern, i, id, externalAccession, cloneIdentifier, bs);
this.checkCs(arrayDesign, externalAccession, cloneIdentifier, cs, probeTaxon, bs);
compositeSequences.add(cs);
platformDesignElementMap.get(arrayDesign.getShortName()).put(probeName, cs);
i++;
return i;
}
use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.
the class GeoConverterImpl method convertChannel.
/**
* GEO does not keep track of 'biomaterials' that make up different channels. Therefore the two channels effectively
* make up a single biomaterial, as far as we're concerned. We're losing information here.
*
* @param sample sample
* @param channel channel
* @param bioMaterial BA
*/
private void convertChannel(GeoSample sample, GeoChannel channel, BioMaterial bioMaterial) {
if (bioMaterial == null)
return;
GeoConverterImpl.log.debug("Sample: " + sample.getGeoAccession() + " - Converting channel " + channel.getSourceName());
bioMaterial.setDescription((bioMaterial.getDescription() == null ? "" : bioMaterial.getDescription() + ";") + "Channel " + channel.getChannelNumber());
if (!StringUtils.isBlank(channel.getGrowthProtocol())) {
Treatment treatment = Treatment.Factory.newInstance();
treatment.setName(sample.getGeoAccession() + " channel " + channel.getChannelNumber() + " treatment");
treatment.setDescription(channel.getGrowthProtocol());
bioMaterial.getTreatments().add(treatment);
}
if (!StringUtils.isBlank(channel.getTreatmentProtocol())) {
Treatment treatment = Treatment.Factory.newInstance();
treatment.setName(sample.getGeoAccession() + " channel " + channel.getChannelNumber() + " growth");
treatment.setDescription(channel.getTreatmentProtocol());
bioMaterial.getTreatments().add(treatment);
}
if (!StringUtils.isBlank(channel.getExtractProtocol())) {
Treatment treatment = Treatment.Factory.newInstance();
treatment.setName(sample.getGeoAccession() + " channel " + channel.getChannelNumber() + " extraction");
treatment.setDescription(channel.getExtractProtocol());
bioMaterial.getTreatments().add(treatment);
}
if (!StringUtils.isBlank(channel.getLabelProtocol())) {
Treatment treatment = Treatment.Factory.newInstance();
treatment.setName(sample.getGeoAccession() + " channel " + channel.getChannelNumber() + " labeling");
treatment.setDescription(channel.getLabelProtocol());
bioMaterial.getTreatments().add(treatment);
}
for (String characteristic : channel.getCharacteristics()) {
characteristic = this.trimString(characteristic);
/*
* Sometimes values are like Age:8 weeks, so we can try to convert them.
*/
String[] fields = characteristic.split(":");
String defaultDescription = "GEO Sample characteristic";
if (fields.length == 2) {
String category = fields[0].trim();
String value = fields[1].trim();
try {
Characteristic gemmaChar = Characteristic.Factory.newInstance();
this.convertVariableType(gemmaChar, GeoVariable.convertStringToType(category));
if (gemmaChar.getCategory() == null) {
continue;
}
gemmaChar.setDescription(defaultDescription);
gemmaChar.setValue(value);
gemmaChar.setEvidenceCode(GOEvidenceCode.IIA);
bioMaterial.getCharacteristics().add(gemmaChar);
} catch (Exception e) {
// conversion didn't work, fall back.
this.doFallback(bioMaterial, characteristic, defaultDescription);
}
} else {
// no colon, just use raw (same as fallback above)
this.doFallback(bioMaterial, characteristic, defaultDescription);
}
}
if (StringUtils.isNotBlank(channel.getSourceName())) {
Characteristic sourceChar = Characteristic.Factory.newInstance();
sourceChar.setDescription("GEO Sample source");
String characteristic = this.trimString(channel.getSourceName());
sourceChar.setCategory("BioSource");
sourceChar.setCategoryUri("http://www.ebi.ac.uk/efo/EFO_0000635");
sourceChar.setValue(characteristic);
sourceChar.setEvidenceCode(GOEvidenceCode.IIA);
bioMaterial.getCharacteristics().add(sourceChar);
}
if (StringUtils.isNotBlank(channel.getOrganism())) {
// if we have a case where the two channels have different taxon throw an exception.
String currentChannelTaxon = channel.getOrganism();
if (bioMaterial.getSourceTaxon() != null) {
String previousChannelTaxon = bioMaterial.getSourceTaxon().getScientificName();
if (previousChannelTaxon != null && !(previousChannelTaxon.equals(currentChannelTaxon))) {
throw new IllegalArgumentException("Channel 1 taxon is " + bioMaterial.getSourceTaxon().getScientificName() + " Channel 2 taxon is " + currentChannelTaxon + " Check that is expected for sample " + sample.getGeoAccession());
}
} else {
// get it from the channel.
Taxon taxon = Taxon.Factory.newInstance();
taxon.setIsSpecies(true);
taxon.setScientificName(channel.getOrganism());
// plausible default, doesn't matter.
taxon.setIsGenesUsable(true);
bioMaterial.setSourceTaxon(taxon);
}
}
if (channel.getMolecule() != null) {
// this we can convert automatically pretty easily.
Characteristic c = channel.getMoleculeAsCharacteristic();
bioMaterial.getCharacteristics().add(c);
}
if (StringUtils.isNotBlank(channel.getLabel())) {
String characteristic = this.trimString(channel.getLabel());
// This is typically something like "biotin-labeled nucleotides", which we can convert later.
Characteristic labelChar = Characteristic.Factory.newInstance();
labelChar.setDescription("GEO Sample label");
labelChar.setCategory("LabelCompound");
labelChar.setCategoryUri("http://www.ebi.ac.uk/efo/EFO_0000562");
labelChar.setValue(characteristic);
labelChar.setEvidenceCode(GOEvidenceCode.IIA);
bioMaterial.getCharacteristics().add(labelChar);
}
}
Aggregations