use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ArrayDesignSequenceAssociationCli method doWork.
@Override
protected Exception doWork(String[] args) {
try {
Exception err = this.processCommandLine(args);
if (err != null)
return err;
// this is kind of an oddball function of this tool.
if (this.hasOption('s')) {
BioSequence updated = arrayDesignSequenceProcessingService.processSingleAccession(this.sequenceId, new String[] { "nt", "est_others", "est_human", "est_mouse" }, null, force);
if (updated != null) {
AbstractCLI.log.info("Updated or created " + updated);
}
return null;
}
for (ArrayDesign arrayDesign : this.arrayDesignsToProcess) {
arrayDesign = this.thaw(arrayDesign);
SequenceType sequenceTypeEn = SequenceType.fromString(sequenceType);
if (sequenceTypeEn == null) {
AbstractCLI.log.error("No sequenceType " + sequenceType + " found");
this.bail(ErrorCode.INVALID_OPTION);
}
if (this.hasOption('f')) {
try (InputStream sequenceFileIs = FileTools.getInputStreamFromPlainOrCompressedFile(sequenceFile)) {
if (sequenceFileIs == null) {
AbstractCLI.log.error("No file " + sequenceFile + " was readable");
this.bail(ErrorCode.INVALID_OPTION);
return null;
}
Taxon taxon = null;
if (this.hasOption('t')) {
taxon = taxonService.findByCommonName(this.taxonName);
if (taxon == null) {
throw new IllegalArgumentException("No taxon named " + taxonName);
}
}
AbstractCLI.log.info("Processing ArrayDesign...");
arrayDesignSequenceProcessingService.processArrayDesign(arrayDesign, sequenceFileIs, sequenceTypeEn, taxon);
this.audit(arrayDesign, "Sequences read from file: " + sequenceFile);
}
} else if (this.hasOption('i')) {
try (InputStream idFileIs = FileTools.getInputStreamFromPlainOrCompressedFile(idFile)) {
if (idFileIs == null) {
AbstractCLI.log.error("No file " + idFile + " was readable");
this.bail(ErrorCode.INVALID_OPTION);
}
Taxon taxon = null;
if (this.hasOption('t')) {
taxon = taxonService.findByCommonName(this.taxonName);
if (taxon == null) {
throw new IllegalArgumentException("No taxon named " + taxonName);
}
}
AbstractCLI.log.info("Processing ArrayDesign...");
arrayDesignSequenceProcessingService.processArrayDesign(arrayDesign, idFileIs, new String[] { "nt", "est_others", "est_human", "est_mouse" }, null, taxon, force);
this.audit(arrayDesign, "Sequences identifiers from file: " + idFile);
}
} else {
AbstractCLI.log.info("Retrieving sequences from BLAST databases");
arrayDesignSequenceProcessingService.processArrayDesign(arrayDesign, new String[] { "nt", "est_others", "est_human", "est_mouse" }, null, force);
this.audit(arrayDesign, "Sequence looked up from BLAST databases");
}
}
} catch (Exception e) {
AbstractCLI.log.error(e, e);
return e;
}
return null;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class VectorMergingServiceTest method test.
@Test
public final void test() throws Exception {
/*
* Need a persistent experiment that uses multiple array designs. Then merge the designs, switch the vectors,
* and merge the vectors. GSE3443
*/
/*
* The experiment uses the following GPLs
*
* GPL2868, GPL2933, GPL2934, GPL2935, GPL2936, GPL2937, GPL2938
*
* Example of a sequence appearing on more than one platform: N57553
*/
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath("gse3443merge")));
Collection<?> results = geoService.fetchAndLoad("GSE3443", false, false, false);
ee = (ExpressionExperiment) results.iterator().next();
ee = this.eeService.thawLite(ee);
Collection<ArrayDesign> aas = eeService.getArrayDesignsUsed(ee);
assertEquals(7, aas.size());
/*
* Check number of sequences across all platforms. This is how many elements we need on the new platform, plus
* extras for duplicated sequences (e.g. elements that don't have a sequence...)
*/
Collection<ArrayDesign> taas = new HashSet<>();
Set<BioSequence> oldbs = new HashSet<>();
for (ArrayDesign arrayDesign : aas) {
arrayDesign = arrayDesignService.thaw(arrayDesign);
taas.add(arrayDesign);
for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
log.info(cs + " " + cs.getBiologicalCharacteristic());
oldbs.add(cs.getBiologicalCharacteristic());
}
}
assertEquals(63, oldbs.size());
/*
* Check total size of elements across all 7 platforms.
*/
int totalElements = 0;
for (ArrayDesign arrayDesign : taas) {
totalElements += arrayDesign.getCompositeSequences().size();
}
assertEquals(140, totalElements);
ArrayDesign firstaa = taas.iterator().next();
aas.remove(firstaa);
assertEquals(null, firstaa.getMergedInto());
mergedAA = arrayDesignMergeService.merge(firstaa, taas, "testMerge" + RandomStringUtils.randomAlphabetic(5), "merged" + RandomStringUtils.randomAlphabetic(5), false);
assertEquals(72, mergedAA.getCompositeSequences().size());
Set<BioSequence> seenBs = new HashSet<>();
for (CompositeSequence cs : mergedAA.getCompositeSequences()) {
seenBs.add(cs.getBiologicalCharacteristic());
}
assertEquals(63, seenBs.size());
// just to make this explicit. The new array design has to contain all the old sequences.
assertEquals(oldbs.size(), seenBs.size());
ee = eeService.thaw(ee);
assertEquals(1828, ee.getRawExpressionDataVectors().size());
ee = eePlatformSwitchService.switchExperimentToArrayDesign(ee, mergedAA);
ee = eeService.thaw(ee);
// check we actually got switched over.
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(mergedAA, ba.getArrayDesignUsed());
}
for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
assertEquals(mergedAA, v.getDesignElement().getArrayDesign());
}
assertEquals(15, ee.getQuantitationTypes().size());
assertEquals(1828, ee.getRawExpressionDataVectors().size());
ee = vectorMergingService.mergeVectors(ee);
// check we got the right processed data
Collection<ProcessedExpressionDataVector> pvs = processedExpressionDataVectorService.getProcessedDataVectors(ee);
assertEquals(72, pvs.size());
ee = eeService.thaw(ee);
Collection<DoubleVectorValueObject> processedDataArrays = processedExpressionDataVectorService.getProcessedDataArrays(ee, 50);
assertEquals(50, processedDataArrays.size());
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ProcessedExpressionDataVectorServiceTest method getGeneAssociatedWithEe.
private Collection<Gene> getGeneAssociatedWithEe(ExpressionExperiment ee) {
Collection<ArrayDesign> ads = this.expressionExperimentService.getArrayDesignsUsed(ee);
Collection<Gene> genes = new HashSet<>();
for (ArrayDesign ad : ads) {
Taxon taxon = this.getTaxon("mouse");
ad = this.arrayDesignService.thaw(ad);
for (CompositeSequence cs : ad.getCompositeSequences()) {
Gene g = this.getTestPersistentGene();
BlatAssociation blata = BlatAssociation.Factory.newInstance();
blata.setGeneProduct(g.getProducts().iterator().next());
BlatResult br = BlatResult.Factory.newInstance();
BioSequence bs = BioSequence.Factory.newInstance();
bs.setName(RandomStringUtils.random(10));
bs.setTaxon(taxon);
bs = (BioSequence) persisterHelper.persist(bs);
assertNotNull(bs);
cs.setBiologicalCharacteristic(bs);
compositeSequenceService.update(cs);
cs = compositeSequenceService.load(cs.getId());
assertNotNull(cs.getBiologicalCharacteristic());
br.setQuerySequence(bs);
blata.setBlatResult(br);
blata.setBioSequence(bs);
persisterHelper.persist(blata);
genes.add(g);
}
}
return genes;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ArrayDesignSequenceAlignmentandMappingTest method testProcessArrayDesign.
@Test
public final void testProcessArrayDesign() throws Exception {
ad = arrayDesignService.thaw(ad);
Collection<BioSequence> seqs = app.processArrayDesign(ad, new String[] { "testblastdb", "testblastdbPartTwo" }, FileTools.resourceToPath("/data/loader/genome/blast"), true, new MockFastaCmd(ad.getPrimaryTaxon()));
assertNotNull(seqs);
assertTrue(!seqs.isEmpty());
Blat mockBlat = new MockBlat(ad.getPrimaryTaxon());
ad = arrayDesignService.thaw(ad);
Collection<BlatResult> blatResults = aligner.processArrayDesign(ad, mockBlat);
assertTrue(blatResults.size() > 200);
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ArrayDesignSequenceProcessorTest method testFetchAndLoadWithSequences.
@Test
public void testFetchAndLoadWithSequences() throws Exception {
GeoService geoService = this.getBean(GeoService.class);
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
@SuppressWarnings("unchecked") final Collection<ArrayDesign> ads = (Collection<ArrayDesign>) geoService.fetchAndLoad("GPL226", true, true, false);
result = ads.iterator().next();
result = arrayDesignService.thaw(result);
try {
Collection<BioSequence> res = app.processArrayDesign(result, new String[] { "testblastdb", "testblastdbPartTwo" }, FileTools.resourceToPath("/data/loader/genome/blast"), false);
assertNotNull(res);
for (BioSequence sequence : res) {
assertNotNull(sequence.getSequence());
}
} catch (Exception e) {
if (StringUtils.isNotBlank(e.getMessage()) && e.getMessage().contains("not found")) {
log.error("fastacmd is not installed or is misconfigured. Test skipped");
return;
}
throw e;
}
}
Aggregations