use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class MockBlat method blatQuery.
@Override
public Collection<BlatResult> blatQuery(BioSequence b) {
Collection<BlatResult> result = new HashSet<>();
BioSequence chromseq = PersistentDummyObjectHelper.getTestNonPersistentBioSequence(taxon);
chromseq.setLength((long) 1e7);
BlatResult br = BlatResult.Factory.newInstance();
Chromosome chromosome = new Chromosome("XXX", null, chromseq, taxon);
br.setTargetChromosome(chromosome);
assert br.getTargetChromosome().getSequence() != null;
long targetStart = MockBlat.RANDOM.nextInt(chromseq.getLength().intValue());
br.setQuerySequence(b);
br.setTargetStart(targetStart);
br.setTargetEnd(targetStart + b.getLength());
br.setMatches((int) (b.getLength() - 1));
br.setMismatches(1);
br.setRepMatches(0);
br.setQueryGapCount(0);
br.setQueryGapBases(0);
br.setQueryStart(0);
br.setQueryEnd(b.getLength().intValue());
br.setTargetGapBases(0);
br.setTargetGapCount(0);
PhysicalLocation targetAlignedRegion = PhysicalLocation.Factory.newInstance();
targetAlignedRegion.setChromosome(br.getTargetChromosome());
targetAlignedRegion.setNucleotide(targetStart);
targetAlignedRegion.setNucleotideLength(b.getLength().intValue());
targetAlignedRegion.setStrand("+");
result.add(br);
return result;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class GeoConverterTest method testArrayTaxonDifferentToSampleTaxon.
/*
* GSE2388 is an example of where the array and sample taxon do not match. This test checks that the biomaterial and
* array taxons are set correctly.
*
*/
@SuppressWarnings("unchecked")
@Test
@Transactional
public void testArrayTaxonDifferentToSampleTaxon() throws Exception {
Taxon rainbowTrout = taxonService.findByAbbreviation("omyk");
assertNotNull(rainbowTrout);
Taxon atlanticSalm = taxonService.findByAbbreviation("ssal");
assertNotNull(atlanticSalm);
InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GSE2388_family.soft.gz"));
GeoFamilyParser parser = new GeoFamilyParser();
parser.parse(is);
GeoSeries series = ((GeoParseResult) parser.getResults().iterator().next()).getSeriesMap().get("GSE2388");
GeoPlatform platform = ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get("GPL966");
DatasetCombiner datasetCombiner = new DatasetCombiner();
GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence(series);
series.setSampleCorrespondence(correspondence);
// assert that the biomaterials have been set as one taxon
Object seriesResult = gc.convert(series);
assertNotNull(seriesResult);
Collection<ExpressionExperiment> ees = (Collection<ExpressionExperiment>) seriesResult;
ExpressionExperiment exper = ees.iterator().next();
Collection<BioAssay> bioassays = exper.getBioAssays();
BioMaterial material = bioassays.iterator().next().getSampleUsed();
Taxon taxon = material.getSourceTaxon();
assertEquals("Oncorhynchus kisutch", taxon.getScientificName());
// assert that the platform is another taxon
Object resultPlatForm = gc.convert(platform);
ArrayDesign ad = (ArrayDesign) resultPlatForm;
assertNotNull(ad);
Set<Taxon> taxa = new HashSet<>();
for (CompositeSequence cs : ad.getCompositeSequences()) {
BioSequence bs = cs.getBiologicalCharacteristic();
if (bs != null) {
assertNotNull(bs.getTaxon());
log.info(bs.getTaxon());
taxa.add(bs.getTaxon());
}
}
// can be empty taxon if the probe does not have a sequence which is why taxon size is 3.
assertEquals(2, taxa.size());
assertTrue(taxa.contains(rainbowTrout));
assertTrue(taxa.contains(atlanticSalm));
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class GeoConverterTest method testSingleTaxonOnArrayWithNoOrganismColumn.
/*
* Ensure that if platform has one taxon then taxon is still set correctly
*/
@Test
public void testSingleTaxonOnArrayWithNoOrganismColumn() throws Exception {
InputStream is = new GZIPInputStream(this.getClass().getResourceAsStream("/data/loader/expression/geo/GPL226_family.soft.gz"));
GeoFamilyParser parser = new GeoFamilyParser();
parser.setProcessPlatformsOnly(true);
parser.parse(is);
GeoPlatform platform = ((GeoParseResult) parser.getResults().iterator().next()).getPlatformMap().get("GPL226");
Object result = this.gc.convert(platform);
ArrayDesign ad = (ArrayDesign) result;
assertNotNull(ad);
Set<Taxon> listPossibleTaxonValues = new HashSet<>();
BioSequence bs;
for (CompositeSequence cs : ad.getCompositeSequences()) {
bs = cs.getBiologicalCharacteristic();
if (bs != null) {
listPossibleTaxonValues.add(bs.getTaxon());
}
}
assertEquals(1, listPossibleTaxonValues.size());
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class FastaParserTest method testParseInputStream.
public void testParseInputStream() throws Exception {
FastaParser p = new FastaParser();
p.parse(f);
Collection<BioSequence> actualResult = p.getResults();
TestCase.assertNotNull(actualResult);
TestCase.assertEquals(172, actualResult.size());
for (Object object : actualResult) {
BioSequence b = (BioSequence) object;
FastaParserTest.log.debug("NAME=" + b.getName() + " DESC=" + b.getDescription() + " SEQ=" + b.getSequence());
}
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class FastaParserTest method testParsecodelink.
public void testParsecodelink() throws Exception {
try (InputStream n = FastaParserTest.class.getResourceAsStream("/data/loader/genome/codelink.testsequence.txt")) {
FastaParser p = new FastaParser();
p.parse(n);
Collection<BioSequence> actualResult = p.getResults();
TestCase.assertNotNull(actualResult);
TestCase.assertEquals(22, actualResult.size());
for (Object object : actualResult) {
BioSequence b = (BioSequence) object;
FastaParserTest.log.debug("NAME=" + b.getName() + " DESC=" + b.getDescription() + " SEQ=" + b.getSequence());
}
}
}
Aggregations