use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class SearchServiceImpl method compassBioSequenceSearch.
/**
* A compass backed search that finds biosequences that match the search string. Searches the gene and probe indexes
* for matches then converts those results to biosequences
*
* @param previousGeneSearchResults Can be null, otherwise used to avoid a second search for genes. The biosequences
* for the genes are added to the final results.
*/
private Collection<SearchResult> compassBioSequenceSearch(SearchSettings settings, Collection<SearchResult> previousGeneSearchResults) {
Collection<SearchResult> results = this.compassSearch(compassBiosequence, settings);
Collection<SearchResult> geneResults;
if (previousGeneSearchResults == null) {
SearchServiceImpl.log.info("Biosequence Search: running gene search with " + settings.getQuery());
geneResults = this.compassGeneSearch(settings);
} else {
SearchServiceImpl.log.info("Biosequence Search: using previous results");
geneResults = previousGeneSearchResults;
}
Map<Gene, SearchResult> genes = new HashMap<>();
for (SearchResult sr : geneResults) {
Object resultObject = sr.getResultObject();
if (Gene.class.isAssignableFrom(resultObject.getClass())) {
genes.put((Gene) resultObject, sr);
} else {
// see bug 1774 -- may not be happening any more.
SearchServiceImpl.log.warn("Expected a Gene, got a " + resultObject.getClass() + " on query=" + settings.getQuery());
}
}
Map<Gene, Collection<BioSequence>> seqsFromDb = bioSequenceService.findByGenes(genes.keySet());
for (Gene gene : seqsFromDb.keySet()) {
List<BioSequence> bs = new ArrayList<>(seqsFromDb.get(gene));
// bioSequenceService.thawRawAndProcessed( bs );
results.addAll(this.dbHitsToSearchResult(bs, genes.get(gene), null));
}
return results;
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrixTest method testConstructExpressionDataDoubleMatrixWithGeoValues.
/**
* This is a self-contained test. That is, it does not depend on the setup in onSetUpInTransaction}. It tests
* creating an {@link ExpressionDataDoubleMatrix} using real values from the Gene Expression Omnibus (GEO). That is,
* we have obtained information from GSE994. The probe sets used are 218120_s_at and 121_at, and the samples used
* are GSM15697 and GSM15744. Specifically, we the Gemma objects that correspond to the GEO objects are:
* DesignElement 1 = 218120_s_at, DesignElement 2 = 121_at
* BioAssay 1 = "Current Smoker 73", BioAssay 2 = "Former Smoker 34"
* BioMaterial 1 = "GSM15697", BioMaterial 2 = "GSM15744"
* BioAssayDimension = "GSM15697, GSM15744" (the names of all the biomaterials).
*/
@Test
public void testConstructExpressionDataDoubleMatrixWithGeoValues() {
ByteArrayConverter bac = new ByteArrayConverter();
ee = ExpressionExperiment.Factory.newInstance();
QuantitationType qt = QuantitationType.Factory.newInstance();
qt.setName("VALUE");
qt.setIsBackgroundSubtracted(false);
qt.setIsNormalized(false);
qt.setIsBackground(false);
qt.setIsRatio(false);
qt.setIsPreferred(true);
qt.setIsMaskedPreferred(false);
qt.setRepresentation(PrimitiveType.DOUBLE);
BioAssayDimension bioAssayDimension = BioAssayDimension.Factory.newInstance();
bioAssayDimension.setName("GSM15697, GSM15744");
List<BioAssay> assays = new ArrayList<>();
BioAssay assay1 = BioAssay.Factory.newInstance();
assay1.setName("Current Smoker 73");
BioMaterial sample1 = BioMaterial.Factory.newInstance();
sample1.setName("GSM15697");
assay1.setSampleUsed(sample1);
assays.add(assay1);
BioAssay assay2 = BioAssay.Factory.newInstance();
assay2.setName("Former Smoker 34");
BioMaterial sample2 = BioMaterial.Factory.newInstance();
sample2.setName("GSM15744");
assay2.setSampleUsed(sample2);
assays.add(assay2);
bioAssayDimension.setBioAssays(assays);
RawExpressionDataVector vector1 = RawExpressionDataVector.Factory.newInstance();
double[] ddata1 = { 74.9, 101.7 };
byte[] bdata1 = bac.doubleArrayToBytes(ddata1);
vector1.setData(bdata1);
vector1.setQuantitationType(qt);
vector1.setBioAssayDimension(bioAssayDimension);
RawExpressionDataVector vector2 = RawExpressionDataVector.Factory.newInstance();
double[] ddata2 = { 404.6, 318.7 };
byte[] bdata2 = bac.doubleArrayToBytes(ddata2);
vector2.setData(bdata2);
vector2.setQuantitationType(qt);
vector2.setBioAssayDimension(bioAssayDimension);
ArrayDesign ad = ArrayDesign.Factory.newInstance();
ad.setName("test ar");
CompositeSequence de1 = CompositeSequence.Factory.newInstance();
de1.setName("218120_s_at");
vector1.setDesignElement(de1);
BioSequence bs1 = BioSequence.Factory.newInstance();
bs1.setName("test1");
de1.setBiologicalCharacteristic(bs1);
de1.setArrayDesign(ad);
CompositeSequence de2 = CompositeSequence.Factory.newInstance();
de2.setName("121_at");
BioSequence bs2 = BioSequence.Factory.newInstance();
bs2.setName("test2");
de2.setBiologicalCharacteristic(bs2);
de2.setArrayDesign(ad);
vector2.setDesignElement(de2);
Collection<RawExpressionDataVector> eeVectors = new LinkedHashSet<>();
eeVectors.add(vector1);
eeVectors.add(vector2);
ee.setRawExpressionDataVectors(eeVectors);
ExpressionDataDoubleMatrix expressionDataMatrix = new ExpressionDataDoubleMatrix(eeVectors);
assertNotNull(expressionDataMatrix);
assertEquals(expressionDataMatrix.rows(), 2);
assertEquals(expressionDataMatrix.columns(), 2);
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class ArrayDesignSequenceProcessorTest method testFetchAndLoadWithIdentifiers.
@Test
public void testFetchAndLoadWithIdentifiers() throws Exception {
String fastacmdExe = Settings.getString(SimpleFastaCmd.FASTA_CMD_ENV_VAR);
if (fastacmdExe == null) {
log.warn("No fastacmd executable is configured, skipping test");
return;
}
File fi = new File(fastacmdExe);
if (!fi.canRead()) {
log.warn(fastacmdExe + " not found, skipping test");
return;
}
GeoService geoService = this.getBean(GeoService.class);
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
@SuppressWarnings("unchecked") final Collection<ArrayDesign> ads = (Collection<ArrayDesign>) geoService.fetchAndLoad("GPL226", true, true, false, true, true);
result = ads.iterator().next();
result = arrayDesignService.thaw(result);
// have to specify taxon as this has two taxons in it
try (InputStream f = this.getClass().getResourceAsStream("/data/loader/expression/arrayDesign/identifierTest.txt")) {
Collection<BioSequence> res = app.processArrayDesign(result, f, new String[] { "testblastdb", "testblastdbPartTwo" }, FileTools.resourceToPath("/data/loader/genome/blast"), taxon, true);
assertNotNull(res);
for (BioSequence sequence : res) {
assertNotNull(sequence.getSequence());
}
for (CompositeSequence cs : result.getCompositeSequences()) {
assert cs.getBiologicalCharacteristic() != null;
}
}
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class IlluminaProbeReaderTest method testReadInputStream.
/**
* Class under test for Map read(InputStream)
*
* @throws Exception when there is a problem
*/
public final void testReadInputStream() throws Exception {
TestCase.assertTrue(apr != null);
apr.parse(is);
String expectedValue = "GTGGCTGCCTTCCCAGCAGTCTCTACTTCAGCATATCTGGGAGCCAGAAG";
TestCase.assertTrue(apr.containsKey("GI_42655756-S"));
Reporter r = apr.get("GI_42655756-S");
TestCase.assertNotNull("Reporter GI_42655756-S not found", r);
BioSequence bs = r.getImmobilizedCharacteristic();
TestCase.assertNotNull("Immobilized characteristic was null", bs);
String actualValue = bs.getSequence().toUpperCase();
TestCase.assertEquals("Wrong sequence returned", expectedValue, actualValue);
}
use of ubic.gemma.model.genome.biosequence.BioSequence in project Gemma by PavlidisLab.
the class MockFastaCmd method makeSequence.
private BioSequence makeSequence(Object object) {
BioSequence result = BioSequence.Factory.newInstance(taxon);
result.setName(object.toString());
result.setLength(100L);
result.setPolymerType(PolymerType.DNA);
result.setIsApproximateLength(false);
result.setIsCircular(false);
result.setFractionRepeats(0.0);
result.setSequence(RandomStringUtils.random(100, "ATGC"));
DatabaseEntry genbank = ExternalDatabaseUtils.getGenbankAccession(object.toString());
result.setSequenceDatabaseEntry(genbank);
return result;
}
Aggregations