use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class GeoConverterTest method testImageClones.
/*
* We should not longer use IMAGE:XXXXX as the sequence name.
*/
@Test
public void testImageClones() throws Exception {
InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GPL226_family.soft.gz"));
GeoFamilyParser parser = new GeoFamilyParser();
parser.parse(is);
GeoPlatform platform = ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get("GPL226");
Object result = this.gc.convert(platform);
ArrayDesign ad = (ArrayDesign) result;
assertNotNull(ad);
for (CompositeSequence cs : ad.getCompositeSequences()) {
BioSequence bs = cs.getBiologicalCharacteristic();
assertTrue("Got: " + bs.getName(), !bs.getName().startsWith("IMAGE") || bs.getSequenceDatabaseEntry() == null);
}
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class GeoConverterTest method testWithImages.
/*
* Has image clones.
*/
@Test
public final void testWithImages() throws Exception {
GeoFamilyParser parser = new GeoFamilyParser();
parser.setProcessPlatformsOnly(true);
try (InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GPL890_family.soft.gz"))) {
parser.parse(is);
}
GeoPlatform platform = ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get("GPL890");
Object result = this.gc.convert(platform);
ArrayDesign ad = (ArrayDesign) result;
for (CompositeSequence cs : ad.getCompositeSequences()) {
BioSequence bs = cs.getBiologicalCharacteristic();
if (bs != null && bs.getSequence() != null) {
return;
}
}
fail("No sequences!");
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class GeoConverterTest method testMultipleTaxaIdentifiedBYAbbreviationsOnArrayWithOrganismColumn.
/*
* Method to test that an array design can have multiple taxa stored against it and that if abbreviations used as
* probe names mapped to the scientific names correctly if the abbreviation is stored in DB.
*/
@Test
public void testMultipleTaxaIdentifiedBYAbbreviationsOnArrayWithOrganismColumn() throws Exception {
Taxon rainbowTroat = taxonService.findByAbbreviation("omyk");
Taxon whiteFish = taxonService.findByAbbreviation("cclu");
Taxon rainbowSmelt = taxonService.findByAbbreviation("omor");
Taxon atlanticSalm = taxonService.findByAbbreviation("ssal");
assertNotNull(atlanticSalm);
// prototype bean.
gc = this.getBean(GeoConverter.class);
InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GPL2899_family.soft.gz"));
GeoFamilyParser parser = new GeoFamilyParser();
// parse only the plaform
parser.setProcessPlatformsOnly(true);
parser.parse(is);
GeoPlatform platform = ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get("GPL2899");
Object result = gc.convert(platform);
ArrayDesign ad = (ArrayDesign) result;
assertNotNull(ad);
Set<Taxon> taxa = new HashSet<>();
BioSequence bs;
for (CompositeSequence cs : ad.getCompositeSequences()) {
bs = cs.getBiologicalCharacteristic();
if (bs != null) {
assertNotNull(bs.getTaxon());
taxa.add(bs.getTaxon());
}
}
assertEquals(4, taxa.size());
// original file has five taxa, test file just kept four.
assertTrue(taxa.contains(atlanticSalm));
assertTrue(taxa.contains(rainbowTroat));
assertTrue(taxa.contains(whiteFish));
assertTrue(taxa.contains(rainbowSmelt));
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class CommonQueries method getCs2GeneMap.
/**
* @return map of probes to input genes they map to. Other genes those probes might detect are not included.
*/
public static Map<CompositeSequence, Collection<Gene>> getCs2GeneMap(Collection<Gene> genes, Session session) {
StopWatch timer = new StopWatch();
timer.start();
final String csQueryString = "select distinct cs, gene from Gene as gene" + " inner join gene.products gp, BioSequence2GeneProduct ba, CompositeSequence cs " + " where ba.bioSequence=cs.biologicalCharacteristic and ba.geneProduct = gp" + " and gene in (:genes) ";
Map<CompositeSequence, Collection<Gene>> cs2gene = new HashMap<>();
org.hibernate.Query queryObject = session.createQuery(csQueryString);
queryObject.setCacheable(true);
queryObject.setParameterList("genes", genes);
queryObject.setReadOnly(true);
queryObject.setFlushMode(FlushMode.MANUAL);
ScrollableResults results = queryObject.scroll(ScrollMode.FORWARD_ONLY);
CommonQueries.addGenes(cs2gene, results);
results.close();
if (timer.getTime() > 200) {
CommonQueries.log.info("Get cs2gene for " + genes.size() + " :" + timer.getTime() + "ms");
}
return cs2gene;
}
use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.
the class CommonQueries method getCs2GeneMap.
public static Map<CompositeSequence, Collection<Gene>> getCs2GeneMap(Collection<Gene> genes, Collection<ArrayDesign> arrayDesigns, Session session) {
StopWatch timer = new StopWatch();
timer.start();
final String csQueryString = "select distinct cs, gene from Gene as gene" + " inner join gene.products gp, BioSequence2GeneProduct ba, CompositeSequence cs " + " where ba.bioSequence=cs.biologicalCharacteristic and ba.geneProduct = gp" + " and gene in (:genes) and cs.arrayDesign in (:ads) ";
Map<CompositeSequence, Collection<Gene>> cs2gene = new HashMap<>();
Query queryObject = session.createQuery(csQueryString);
queryObject.setCacheable(true);
queryObject.setParameterList("genes", genes);
queryObject.setParameterList("ads", arrayDesigns);
queryObject.setReadOnly(true);
queryObject.setFlushMode(FlushMode.MANUAL);
ScrollableResults results = queryObject.scroll(ScrollMode.FORWARD_ONLY);
CommonQueries.addGenes(cs2gene, results);
results.close();
if (timer.getTime() > 200) {
CommonQueries.log.info("Get cs2gene for " + genes.size() + " :" + timer.getTime() + "ms");
}
return cs2gene;
}
Aggregations