use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.
the class GoldenPathQueryTest method setUp.
@Override
protected void setUp() throws Exception {
super.setUp();
Taxon t = Taxon.Factory.newInstance();
t.setCommonName("human");
t.setIsGenesUsable(true);
t.setIsSpecies(true);
try {
String databaseHost = Settings.getString("gemma.testdb.host");
String databaseUser = Settings.getString("gemma.testdb.user");
String databasePassword = Settings.getString("gemma.testdb.password");
queryer = new GoldenPathQuery(Settings.getString("gemma.goldenpath.db.human"), databaseHost, databaseUser, databasePassword);
this.hasDb = true;
} catch (Exception e) {
this.hasDb = false;
}
}
use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.
the class SimpleExpressionDataLoaderServiceTest method testLoad.
@Test
public final void testLoad() throws Exception {
Taxon taxon = this.getTaxon("mouse");
SimpleExpressionExperimentMetaData metaData = new SimpleExpressionExperimentMetaData();
ArrayDesign ad = ArrayDesign.Factory.newInstance();
ad.setShortName(RandomStringUtils.randomAlphabetic(5));
ad.setName(RandomStringUtils.randomAlphabetic(5));
ad.setPrimaryTaxon(taxon);
ad.setTechnologyType(TechnologyType.ONECOLOR);
Collection<ArrayDesign> ads = new HashSet<>();
ads.add(ad);
metaData.setArrayDesigns(ads);
metaData.setTaxon(taxon);
metaData.setShortName(RandomStringUtils.randomAlphabetic(5));
metaData.setName(RandomStringUtils.randomAlphabetic(5));
metaData.setDescription("Simple expression data loader service test - load");
metaData.setQuantitationTypeName("testing");
metaData.setGeneralType(GeneralType.QUANTITATIVE);
metaData.setScale(ScaleType.LOG2);
metaData.setType(StandardQuantitationType.AMOUNT);
metaData.setIsRatio(true);
try (InputStream data = this.getClass().getResourceAsStream("/data/testdata.txt")) {
ee = service.create(metaData, data);
}
ee = eeService.thaw(ee);
assertNotNull(ee);
assertEquals(30, ee.getRawExpressionDataVectors().size());
assertEquals(12, ee.getBioAssays().size());
}
use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.
the class StringBiomartProteinConverterTest method setUp.
@Before
public void setUp() {
String fileNameBiomartmouse = "/data/loader/protein/biomart/biomartmmusculusShort.txt";
URL fileNameBiomartmouseURL = this.getClass().getResource(fileNameBiomartmouse);
File taxonBiomartFile = new File(fileNameBiomartmouseURL.getFile());
Taxon taxon = Taxon.Factory.newInstance();
taxon.setIsGenesUsable(true);
taxon.setNcbiId(10090);
taxon.setScientificName("Mus musculus");
taxon.setIsSpecies(true);
taxa.add(taxon);
try {
BiomartEnsemblNcbiObjectGenerator biomartEnsemblNcbiObjectGenerator = new BiomartEnsemblNcbiObjectGenerator();
biomartEnsemblNcbiObjectGenerator.setBioMartFileName(taxonBiomartFile);
Map<String, Ensembl2NcbiValueObject> map = biomartEnsemblNcbiObjectGenerator.generate(taxa);
stringBiomartProteinConverter = new StringProteinProteinInteractionConverter(map);
} catch (Exception e) {
e.printStackTrace();
fail();
}
stringProteinProteinInteractionOne = new StringProteinProteinInteraction("ENSMUSP00000111623", "ENSMUSP00000100396");
StringProteinProteinInteraction stringProteinProteinInteractionTwo = new StringProteinProteinInteraction("ENSMUSP00000100395", "ENSMUSP00000100396");
StringProteinProteinInteraction stringProteinProteinInteractionThree = new StringProteinProteinInteraction("ENSMUSP00000100407", "ENSMUSP00000100395");
// add them to array
stringProteinProteinInteractions.add(stringProteinProteinInteractionOne);
stringProteinProteinInteractions.add(stringProteinProteinInteractionTwo);
stringProteinProteinInteractions.add(stringProteinProteinInteractionThree);
}
use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.
the class DatabaseViewGeneratorImpl method generateDatasetView.
private void generateDatasetView(Integer limit, Collection<ExpressionExperiment> experiments) throws IOException {
DatabaseViewGeneratorImpl.log.info("Generating dataset summary view");
/*
* Get handle to output file
*/
File file = this.getViewFile(DatabaseViewGeneratorImpl.DATASET_SUMMARY_VIEW_BASENAME);
DatabaseViewGeneratorImpl.log.info("Writing to " + file);
try (Writer writer = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)))) {
writer.write("GemmaDsId\tSource\tSourceAccession\tShortName\tName\tDescription\ttaxon\tManufacturer\n");
/*
* Print out their names etc.
*/
int i = 0;
for (ExpressionExperiment ee : experiments) {
ee = expressionExperimentService.thawLite(ee);
DatabaseViewGeneratorImpl.log.info("Processing: " + ee.getShortName());
String acc = "";
String source = "";
if (ee.getAccession() != null && ee.getAccession().getAccession() != null) {
acc = ee.getAccession().getAccession();
source = ee.getAccession().getExternalDatabase().getName();
}
Long gemmaId = ee.getId();
String shortName = ee.getShortName();
String name = ee.getName();
String description = ee.getDescription();
description = StringUtils.replaceChars(description, '\t', ' ');
description = StringUtils.replaceChars(description, '\n', ' ');
description = StringUtils.replaceChars(description, '\r', ' ');
Taxon taxon = expressionExperimentService.getTaxon(ee);
if (taxon == null)
continue;
Collection<ArrayDesign> ads = expressionExperimentService.getArrayDesignsUsed(ee);
StringBuilder manufacturers = new StringBuilder();
// TODO could cache the arrayDesigns to make faster, thawing ad is time consuming
for (ArrayDesign ad : ads) {
ad = arrayDesignService.thawLite(ad);
if (ad.getDesignProvider() == null) {
DatabaseViewGeneratorImpl.log.debug("Array Design: " + ad.getShortName() + " has no design provoider assoicated with it. Skipping");
continue;
}
manufacturers.append(ad.getDesignProvider().getName()).append(",");
}
writer.write(String.format("%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", gemmaId, source, acc, shortName, name, description, taxon.getCommonName(), StringUtils.removeEnd(manufacturers.toString(), ",")));
if (limit != null && (limit > 0 && ++i > limit))
break;
}
}
}
use of ubic.gemma.model.genome.Taxon in project Gemma by PavlidisLab.
the class RepeatScan method repeatScan.
/**
* Run repeatmasker on the sequences. The sequence will be updated with the masked (lower-case) sequences and the
* fraction of masked bases will be filled in.
*
* @param sequences sequences
* @return sequences that had repeats.
*/
public Collection<BioSequence> repeatScan(Collection<BioSequence> sequences) {
try {
if (sequences.size() == 0) {
RepeatScan.log.warn("No sequences to test");
return sequences;
}
File querySequenceFile = File.createTempFile("repmask", ".fa");
SequenceWriter.writeSequencesToFile(sequences, querySequenceFile);
Taxon taxon = sequences.iterator().next().getTaxon();
this.execRepeatMasker(querySequenceFile, taxon);
final String outputSequencePath = querySequenceFile.getParent() + File.separatorChar + querySequenceFile.getName() + ".masked";
// final String outputScorePath = querySequenceFile.getParent() + File.separatorChar
// + querySequenceFile.getName() + ".masked";
File output = new File(outputSequencePath);
if (!output.exists()) {
this.handleNoOutputCondition(querySequenceFile, outputSequencePath);
return new HashSet<>();
}
return this.processRepeatMaskerOutput(sequences, outputSequencePath);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
Aggregations