use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class ExperimentalDesignImporterImpl method mapBioMaterialsToNamePossibilities.
/**
* @param bioMaterials bio materials
* @return a map of various strings that we might find in a design importing file to the biomaterials.
*/
private Map<String, BioMaterial> mapBioMaterialsToNamePossibilities(Collection<BioMaterial> bioMaterials) {
Map<String, BioMaterial> biomaterialsInExpressionExperiment = new HashMap<>();
// this rather big loop is recomputed each time we call this method. No big deal, but could be more efficient.
for (BioMaterial bm : bioMaterials) {
biomaterialsInExpressionExperiment.put(bm.getName(), bm);
// we allow multiple bioassays per biomaterial - e.g. two platforms run on the sa
for (BioAssay ba : bm.getBioAssaysUsedIn()) {
/*
* Allow matches to the accession (external id) of the bioassay; trying to be flexible! This _could_
* cause problems if there are multiple bioassays per biomaterial, thus the check here.
*/
if (ba.getAccession() != null && StringUtils.isNotBlank(ba.getAccession().getAccession())) {
String accession = ba.getAccession().getAccession();
/*
* We get at most one bioassay per biomaterial.
*/
biomaterialsInExpressionExperiment.put(accession, bm);
}
/*
* Similarly allow match on the bioassay name
*/
biomaterialsInExpressionExperiment.put(ba.getName(), bm);
}
/*
* All put in the very-mangled name we use in the 'native' Gemma export format. This includes the ID, so not
* useful for tests.
*/
biomaterialsInExpressionExperiment.put(ExpressionDataWriterUtils.constructBioAssayName(bm, bm.getBioAssaysUsedIn()), bm);
}
return biomaterialsInExpressionExperiment;
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class SimpleExpressionDataLoaderServiceImpl method convertBioAssayDimension.
/**
* @return BioAssayDimension
*/
private BioAssayDimension convertBioAssayDimension(ExpressionExperiment ee, ArrayDesign arrayDesign, Taxon taxon, DoubleMatrix<String, String> matrix) {
BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
bad.setName("For " + ee.getShortName());
bad.setDescription("Generated from flat file");
for (int i = 0; i < matrix.columns(); i++) {
String columnName = matrix.getColName(i);
BioMaterial bioMaterial = BioMaterial.Factory.newInstance();
bioMaterial.setName(columnName);
bioMaterial.setDescription("Generated by Gemma for: " + ee.getShortName());
bioMaterial.setSourceTaxon(taxon);
BioAssay assay = BioAssay.Factory.newInstance();
assay.setName(columnName);
assay.setArrayDesignUsed(arrayDesign);
assay.setSampleUsed(bioMaterial);
assay.setIsOutlier(false);
assay.setSequencePairedReads(false);
bad.getBioAssays().add(assay);
}
SimpleExpressionDataLoaderServiceImpl.log.info("Generated " + bad.getBioAssays().size() + " bioAssays");
return bad;
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class GeoConverterImpl method convertSeriesSingle.
private ExpressionExperiment convertSeriesSingle(GeoSeries series) {
if (series == null)
return null;
GeoConverterImpl.log.info("Converting series: " + series.getGeoAccession());
Collection<GeoDataset> dataSets = series.getDatasets();
Collection<String> dataSetsToSkip = new HashSet<>();
Collection<GeoSample> samplesToSkip = new HashSet<>();
this.checkForDataToSkip(series, dataSetsToSkip, samplesToSkip);
if (dataSets.size() > 0 && dataSetsToSkip.size() == dataSets.size()) {
return null;
}
if (!this.isUsable(series)) {
GeoConverterImpl.log.warn("Series was not usable: types=" + StringUtils.join(series.getSeriesTypes(), " "));
return null;
}
ExpressionExperiment expExp = ExpressionExperiment.Factory.newInstance();
expExp.setDescription("");
expExp.setDescription(series.getSummaries() + (series.getSummaries().endsWith("\n") ? "" : "\n"));
if (series.getLastUpdateDate() != null) {
expExp.setDescription(expExp.getDescription() + "Last Updated (by provider): " + series.getLastUpdateDate() + "\n");
}
expExp.setName(series.getTitle());
expExp.setShortName(series.getGeoAccession());
this.convertContacts(series, expExp);
this.convertPubMedIds(series, expExp);
expExp.setAccession(this.convertDatabaseEntry(series));
LocalFile expExpRawDataFile = this.convertSupplementaryFileToLocalFile(series);
expExp.setRawDataFile(expExpRawDataFile);
ExperimentalDesign design = ExperimentalDesign.Factory.newInstance();
design.setDescription("");
design.setName("");
Collection<GeoVariable> variables = series.getVariables().values();
for (GeoVariable variable : variables) {
GeoConverterImpl.log.debug("Adding variable " + variable);
ExperimentalFactor ef = this.convertVariableToFactor(variable);
this.convertVariableToFactorValue(variable, ef);
design.getExperimentalFactors().add(ef);
design.setName(variable.getDescription() + " " + design.getName());
}
if (series.getKeyWords().size() > 0) {
for (String keyWord : series.getKeyWords()) {
// design.setDescription( design.getDescription() + " Keyword: " + keyWord );
Characteristic o = Characteristic.Factory.newInstance();
o.setDescription("GEO Keyword");
o.setValue(keyWord);
o.setEvidenceCode(GOEvidenceCode.IIA);
o.setDescription("Keyword from GEO series definition file.");
}
}
if (series.getOverallDesign() != null) {
design.setDescription(design.getDescription() + " Overall design: " + series.getOverallDesign());
}
Collection<GeoReplication> replication = series.getReplicates().values();
for (GeoReplication replicate : replication) {
GeoConverterImpl.log.debug("Adding replication " + replicate);
ExperimentalFactor ef = this.convertReplicationToFactor(replicate);
this.convertReplicationToFactorValue(replicate, ef);
design.getExperimentalFactors().add(ef);
}
expExp.setExperimentalDesign(design);
// GEO does not have the concept of a biomaterial.
Collection<GeoSample> allSeriesSamples = series.getSamples();
GeoConverterImpl.log.info("Series has " + series.getSamples().size() + " samples");
if (samplesToSkip.size() > 0) {
GeoConverterImpl.log.info(samplesToSkip.size() + " samples will be skipped");
}
expExp.setBioAssays(new HashSet<BioAssay>());
if (series.getSampleCorrespondence().size() == 0) {
throw new IllegalArgumentException("No sample correspondence!");
}
// spits out a big summary of the correspondence.
if (GeoConverterImpl.log.isDebugEnabled())
GeoConverterImpl.log.debug(series.getSampleCorrespondence());
int numBioMaterials = 0;
/*
* For each _set_ of "corresponding" samples (from the same RNA, or so we think) we make up a new BioMaterial.
*/
Collection<String> seen = new HashSet<>();
for (Iterator<Set<String>> iter = series.getSampleCorrespondence().iterator(); iter.hasNext(); ) {
Set<String> correspondingSamples = iter.next();
if (correspondingSamples.isEmpty())
// can happen after removing samples (multitaxon)
continue;
BioMaterial bioMaterial = BioMaterial.Factory.newInstance();
String bioMaterialName = this.getBiomaterialPrefix(series, ++numBioMaterials);
StringBuilder bioMaterialDescription = new StringBuilder(GeoConverterImpl.BIOMATERIAL_DESCRIPTION_PREFIX + series.getGeoAccession());
// From the series samples, find the sample that corresponds and convert it.
for (String cSample : correspondingSamples) {
boolean found = false;
for (GeoSample sample : allSeriesSamples) {
if (sample == null || sample.getGeoAccession() == null) {
GeoConverterImpl.log.warn("Null sample or no accession for " + sample);
continue;
}
if (samplesToSkip.contains(sample)) {
continue;
}
String accession = sample.getGeoAccession();
if (accession.equals(cSample)) {
if (seen.contains(accession)) {
GeoConverterImpl.log.error("Got " + accession + " twice, this time in set " + correspondingSamples);
}
seen.add(accession);
BioAssay ba = this.convertSample(sample, bioMaterial, expExp.getExperimentalDesign());
assert (ba != null);
LocalFile rawDataFile = this.convertSupplementaryFileToLocalFile(sample);
// deal with null at UI
ba.setRawDataFile(rawDataFile);
ba.setDescription(ba.getDescription() + "\nSource GEO sample is " + sample.getGeoAccession() + "\nLast updated (according to GEO): " + sample.getLastUpdateDate());
assert ba.getSampleUsed() != null;
bioMaterial.getBioAssaysUsedIn().add(ba);
bioMaterialDescription.append(",").append(sample);
expExp.getBioAssays().add(ba);
found = true;
break;
}
}
if (!found) {
if (GeoConverterImpl.log.isDebugEnabled())
GeoConverterImpl.log.debug("No sample found in " + series + " to match " + cSample + "; this can happen if some samples were not run on all platforms.");
}
}
bioMaterial.setName(bioMaterialName);
bioMaterial.setDescription(bioMaterialDescription.toString());
}
GeoConverterImpl.log.info("Expression Experiment from " + series + " has " + expExp.getBioAssays().size() + " bioassays and " + numBioMaterials + " biomaterials.");
int expectedNumSamples = series.getSamples().size() - samplesToSkip.size();
int actualNumSamples = expExp.getBioAssays().size();
if (expectedNumSamples > actualNumSamples) {
GeoConverterImpl.log.warn((expectedNumSamples - actualNumSamples) + " samples were not in the 'sample correspondence'" + " and have been omitted. Possibly they were in the Series (GSE) but not in the corresponding Dataset (GDS)?");
}
if (dataSets.size() == 0) {
// we miss extra description and the subset information.
if (series.getValues().hasData())
this.convertSeriesDataVectors(series, expExp);
} else {
for (GeoDataset dataset : dataSets) {
if (dataSetsToSkip.contains(dataset.getGeoAccession()))
continue;
this.convertDataset(dataset, expExp);
}
}
return expExp;
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class ExpressionExperimentServiceImpl method checkHasBatchInfo.
@Override
public boolean checkHasBatchInfo(ExpressionExperiment ee) {
boolean hasBatchInformation = false;
for (ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors()) {
if (BatchInfoPopulationServiceImpl.isBatchFactor(ef)) {
hasBatchInformation = true;
break;
}
}
if (!hasBatchInformation) {
boolean allBAsHaveDate = true;
ee = this.thawBioAssays(ee);
for (BioAssay ba : ee.getBioAssays()) {
if (ba.getProcessingDate() == null) {
allBAsHaveDate = false;
break;
}
}
if (allBAsHaveDate) {
hasBatchInformation = true;
}
}
return hasBatchInformation;
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class ExpressionExperimentServiceImpl method addRawVectors.
@Override
@Transactional
public ExpressionExperiment addRawVectors(ExpressionExperiment ee, Collection<RawExpressionDataVector> newVectors) {
Collection<BioAssayDimension> BADs = new HashSet<>();
Collection<QuantitationType> qts = new HashSet<>();
for (RawExpressionDataVector vec : newVectors) {
BADs.add(vec.getBioAssayDimension());
qts.add(vec.getQuantitationType());
}
if (BADs.size() > 1) {
throw new IllegalArgumentException("Vectors must share a common bioassay dimension");
}
if (qts.size() > 1) {
throw new UnsupportedOperationException("Can only replace with one type of vector (only one quantitation type)");
}
BioAssayDimension bad = BADs.iterator().next();
bad = this.bioAssayDimensionService.findOrCreate(bad);
assert bad.getBioAssays().size() > 0;
QuantitationType newQt = qts.iterator().next();
if (newQt.getId() == null) {
newQt = this.quantitationTypeDao.create(newQt);
} else {
AbstractService.log.warn("Quantitation type already had an ID...:" + newQt);
}
/*
* This is probably a more or less redundant setting, but doesn't hurt to make sure.
*/
ArrayDesign vectorAd = newVectors.iterator().next().getDesignElement().getArrayDesign();
for (BioAssay ba : bad.getBioAssays()) {
ba.setArrayDesignUsed(vectorAd);
}
for (RawExpressionDataVector vec : newVectors) {
vec.setBioAssayDimension(bad);
vec.setQuantitationType(newQt);
}
ee = rawExpressionDataVectorDao.addVectors(ee.getId(), newVectors);
// this is a denormalization; easy to forget to update this.
ee.getQuantitationTypes().add(newQt);
AbstractService.log.info(ee.getRawExpressionDataVectors().size() + " vectors for experiment");
return ee;
}
Aggregations