use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class ExpressionExperimentDaoImpl method removeBioAssays.
private void removeBioAssays(Session session, Map<BioAssay, BioMaterial> copyOfRelations, Collection<BioMaterial> bioMaterialsToDelete, Collection<BioAssay> bioAssays) {
for (BioAssay ba : bioAssays) {
// relations to files cascade, so we only have to worry about biomaterials, which aren't cascaded from
// anywhere. BioAssay -> BioMaterial is many-to-one, but bioassaySet (experiment) owns the bioAssay.
BioMaterial biomaterial = ba.getSampleUsed();
if (biomaterial == null)
// shouldn't...
continue;
bioMaterialsToDelete.add(biomaterial);
copyOfRelations.put(ba, biomaterial);
session.buildLockRequest(LockOptions.NONE).lock(biomaterial);
Hibernate.initialize(biomaterial);
// this can easily end up with an unattached object.
Hibernate.initialize(biomaterial.getBioAssaysUsedIn());
biomaterial.getFactorValues().clear();
biomaterial.getBioAssaysUsedIn().clear();
ba.setSampleUsed(null);
}
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class PersistentDummyObjectHelper method getTestPersistentExpressionExperiment.
/**
* Convenience method to provide an ExpressionExperiment that can be used to fill non-nullable associations in test
* objects. This implementation does NOT fill in associations of the created object except for the creation of
* persistent BioMaterials and BioAssays so that database taxon lookups for this experiment will work.
*
* @param taxon the experiment will have this taxon
* @return EE
*/
public ExpressionExperiment getTestPersistentExpressionExperiment(Taxon taxon) {
BioAssay ba;
BioMaterial bm;
ArrayDesign ad;
bm = this.getTestPersistentBioMaterial(taxon);
ad = this.getTestPersistentArrayDesign(4, true, true);
ba = this.getTestPersistentBioAssay(ad, bm);
Set<BioAssay> bas1 = new HashSet<>();
bas1.add(ba);
ExpressionExperiment ee = ExpressionExperiment.Factory.newInstance();
ee.setName(RandomStringUtils.randomNumeric(PersistentDummyObjectHelper.RANDOM_STRING_LENGTH) + "_testee");
ee.setShortName(RandomStringUtils.randomNumeric(PersistentDummyObjectHelper.RANDOM_STRING_LENGTH) + "_testee");
ee.setBioAssays(bas1);
Collection<FactorValue> allFactorValues = new HashSet<>();
ExperimentalDesign ed = this.getExperimentalDesign(allFactorValues);
ee.setExperimentalDesign(ed);
ee.setOwner(this.getTestPersistentContact());
log.debug("expression experiment => design element data vectors");
Collection<RawExpressionDataVector> vectors = new HashSet<>();
Collection<QuantitationType> quantitationTypes = this.addQuantitationTypes(new HashSet<QuantitationType>());
assert quantitationTypes.size() > 0;
ee.setQuantitationTypes(quantitationTypes);
ee.setRawExpressionDataVectors(vectors);
ArrayDesignsForExperimentCache c = persisterHelper.prepare(ee);
return persisterHelper.persist(ee, c);
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class ExperimentalDesignVisualizationServiceImpl method getExperimentalDesignLayout.
/**
* @param bds a BioAssayDimension that represents the BioAssayDimensionValueObject. This is only needed to avoid
* making ExpressionMatrix use value objects, otherwise we could use the BioAssayDimensionValueObject
* @return A "Layout": a map of bioassays to map of factors to doubles that represent the position in the layout.
*/
private LinkedHashMap<BioAssayValueObject, LinkedHashMap<ExperimentalFactor, Double>> getExperimentalDesignLayout(ExpressionExperiment experiment, Collection<BioAssayDimension> bds) {
LinkedHashMap<BioAssayValueObject, LinkedHashMap<ExperimentalFactor, Double>> result = new LinkedHashMap<>();
ExpressionDataMatrix<Object> mat = new EmptyExpressionMatrix(bds);
// This is the place the actual sort order is determined.
List<BioMaterial> bms = ExpressionDataMatrixColumnSort.orderByExperimentalDesign(mat);
Map<Long, Double> fvV = new HashMap<>();
assert experiment != null;
assert experiment.getExperimentalDesign() != null;
if (experiment.getExperimentalDesign().getExperimentalFactors().isEmpty()) {
// Case of no experimental design; just put in a dummy factor.
ExperimentalFactor dummyFactor = ExperimentalFactor.Factory.newInstance();
dummyFactor.setName("No factors");
for (BioMaterial bm : bms) {
int j = mat.getColumnIndex(bm);
Collection<BioAssay> bas = mat.getBioAssaysForColumn(j);
for (BioAssay ba : bas) {
BioAssayValueObject baVo = new BioAssayValueObject(ba, false);
result.put(baVo, new LinkedHashMap<ExperimentalFactor, Double>());
result.get(baVo).put(dummyFactor, 0.0);
}
}
return result;
}
assert !experiment.getExperimentalDesign().getExperimentalFactors().isEmpty();
// Map<ExperimentalFactor, Map<FactorValue, Double>> continuousRanges = new HashMap<>();
for (ExperimentalFactor ef : experiment.getExperimentalDesign().getExperimentalFactors()) {
if (ef.getFactorValues().isEmpty()) {
// this can happen if the design isn't complete.
continue;
}
for (FactorValue fv : ef.getFactorValues()) {
assert fv.getId() != null;
// the id is just used as a convenience.
fvV.put(fv.getId(), new Double(fv.getId()));
}
}
assert !fvV.isEmpty();
assert !bms.isEmpty();
// either bioassay dimension.
for (BioMaterial bm : bms) {
int j = mat.getColumnIndex(bm);
Collection<BioAssay> bas = mat.getBioAssaysForColumn(j);
Collection<FactorValue> fvs = bm.getFactorValues();
for (BioAssay ba : bas) {
BioAssayValueObject baVo = new BioAssayValueObject(ba, false);
result.put(baVo, new LinkedHashMap<ExperimentalFactor, Double>(fvs.size()));
for (FactorValue fv : fvs) {
assert fv.getId() != null;
assert fvV.containsKey(fv.getId());
ExperimentalFactor ef = fv.getExperimentalFactor();
Double value;
if (fv.getMeasurement() != null) {
try {
value = Double.parseDouble(fv.getMeasurement().getValue());
} catch (NumberFormatException e) {
// not good.
value = fvV.get(fv.getId());
}
} else {
value = fvV.get(fv.getId());
}
assert result.containsKey(baVo);
assert value != null;
result.get(baVo).put(ef, value);
}
}
}
return result;
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class AffyPowerToolsProbesetSummarize method processData.
/**
* For either 3' or Exon arrays.
*
* @param ee ee
* @param aptOutputFileToRead file
* @param targetPlatform deal with data from this platform (call multiple times if there is more than one platform)
* @return raw data vectors
* @throws IOException io problem
* @throws FileNotFoundException file not found
*/
public Collection<RawExpressionDataVector> processData(ExpressionExperiment ee, String aptOutputFileToRead, ArrayDesign targetPlatform) throws IOException {
AffyPowerToolsProbesetSummarize.log.info("Parsing " + aptOutputFileToRead);
try (InputStream is = new FileInputStream(aptOutputFileToRead)) {
DoubleMatrix<String, String> matrix = this.parse(is);
if (matrix.rows() == 0) {
throw new IllegalStateException("Matrix from APT had no rows");
}
if (matrix.columns() == 0) {
throw new IllegalStateException("Matrix from APT had no columns");
}
Collection<BioAssay> allBioAssays = ee.getBioAssays();
Collection<BioAssay> bioAssaysToUse = new HashSet<>();
for (BioAssay bioAssay : allBioAssays) {
if (bioAssay.getArrayDesignUsed().equals(targetPlatform)) {
bioAssaysToUse.add(bioAssay);
}
}
if (allBioAssays.size() > bioAssaysToUse.size()) {
AffyPowerToolsProbesetSummarize.log.info("Using " + bioAssaysToUse.size() + "/" + allBioAssays.size() + " bioassays (those on " + targetPlatform.getShortName() + ")");
}
if (matrix.columns() < bioAssaysToUse.size()) {
// having > is okay, there can be extra.
throw new IllegalStateException("Matrix from APT had the wrong number of colummns: expected " + bioAssaysToUse.size() + ", got " + matrix.columns());
}
AffyPowerToolsProbesetSummarize.log.info("Read " + matrix.rows() + " x " + matrix.columns() + ", matching with " + bioAssaysToUse.size() + " samples on " + targetPlatform);
BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
bad.setName("For " + ee.getShortName() + " on " + targetPlatform);
bad.setDescription("Generated from output of apt-probeset-summarize");
/*
* Add them ...
*/
Map<String, BioAssay> bmap = new HashMap<>();
for (BioAssay bioAssay : bioAssaysToUse) {
assert bioAssay.getArrayDesignUsed().equals(targetPlatform);
if (bmap.containsKey(bioAssay.getAccession().getAccession()) || bmap.containsKey(bioAssay.getName())) {
throw new IllegalStateException("Duplicate");
}
bmap.put(bioAssay.getAccession().getAccession(), bioAssay);
bmap.put(bioAssay.getName(), bioAssay);
}
if (AffyPowerToolsProbesetSummarize.log.isDebugEnabled())
AffyPowerToolsProbesetSummarize.log.debug("Will match result data file columns to bioassays referred to by any of the following strings:\n" + StringUtils.join(bmap.keySet(), "\n"));
int found = 0;
List<String> columnsToKeep = new ArrayList<>();
for (int i = 0; i < matrix.columns(); i++) {
String columnName = matrix.getColName(i);
String sampleName = columnName.replaceAll(".(CEL|cel)$", "");
/*
* Look for patterns like GSM476194_SK_09-BALBcJ_622.CEL
*/
BioAssay assay = null;
if (sampleName.matches("^GSM[0-9]+_.+")) {
String geoAcc = sampleName.split("_")[0];
AffyPowerToolsProbesetSummarize.log.info("Found column for " + geoAcc);
if (bmap.containsKey(geoAcc)) {
assay = bmap.get(geoAcc);
} else {
AffyPowerToolsProbesetSummarize.log.warn("No bioassay for " + geoAcc);
}
} else {
/*
* Sometimes column names are like Aud_19L.CEL or
*/
assay = bmap.get(sampleName);
}
if (assay == null) {
/*
* This is okay, if we have extras
*/
if (matrix.columns() == bioAssaysToUse.size()) {
throw new IllegalStateException("No bioassay could be matched to CEL file identified by " + sampleName);
}
AffyPowerToolsProbesetSummarize.log.warn("No bioassay for " + sampleName);
continue;
}
AffyPowerToolsProbesetSummarize.log.info("Matching CEL sample " + sampleName + " to bioassay " + assay + " [" + assay.getAccession().getAccession() + "]");
columnsToKeep.add(columnName);
assert assay.getArrayDesignUsed().equals(targetPlatform);
bad.getBioAssays().add(assay);
found++;
}
if (found != bioAssaysToUse.size()) {
throw new IllegalStateException("Failed to find a data column for every bioassay on the given platform " + targetPlatform);
}
if (columnsToKeep.size() < matrix.columns()) {
matrix = matrix.subsetColumns(columnsToKeep);
}
if (quantitationType == null) {
quantitationType = AffyPowerToolsProbesetSummarize.makeAffyQuantitationType();
}
return this.convertDesignElementDataVectors(ee, bad, targetPlatform, matrix);
}
}
use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.
the class ExperimentalDesignWriter method write.
/**
* @param writeBaseHeader comments
* @param writeHeader column names
* @param ee ee
* @param bioAssays bas
* @param writer writer
* @throws IOException when the write failed
*/
public void write(Writer writer, ExpressionExperiment ee, Collection<BioAssay> bioAssays, boolean writeBaseHeader, boolean writeHeader) throws IOException {
ExperimentalDesign ed = ee.getExperimentalDesign();
/*
* See BaseExpressionDataMatrix.setUpColumnElements() for how this is constructed for the DataMatrix, and for
* some notes about complications.
*/
Map<BioMaterial, Collection<BioAssay>> bioMaterials = new HashMap<>();
for (BioAssay bioAssay : bioAssays) {
BioMaterial bm = bioAssay.getSampleUsed();
if (!bioMaterials.containsKey(bm)) {
bioMaterials.put(bm, new HashSet<BioAssay>());
}
bioMaterials.get(bm).add(bioAssay);
}
Collection<ExperimentalFactor> efs = ed.getExperimentalFactors();
List<ExperimentalFactor> orderedFactors = new ArrayList<>(efs);
StringBuffer buf = new StringBuffer();
if (writeHeader) {
this.writeHeader(ee, orderedFactors, writeBaseHeader, buf);
}
for (BioMaterial bioMaterial : bioMaterials.keySet()) {
/* column 0 of the design matrix */
String rowName = ExpressionDataWriterUtils.constructBioAssayName(bioMaterial, bioMaterials.get(bioMaterial));
buf.append(rowName);
buf.append("\t");
/* column 1 */
String externalId = ExpressionDataWriterUtils.getExternalId(bioMaterial, bioMaterials.get(bioMaterial));
buf.append(externalId);
/* columns 2 ... n where n+1 is the number of factors */
Collection<FactorValue> candidateFactorValues = bioMaterial.getFactorValues();
for (ExperimentalFactor ef : orderedFactors) {
buf.append("\t");
for (FactorValue candidateFactorValue : candidateFactorValues) {
if (candidateFactorValue.getExperimentalFactor().equals(ef)) {
log.debug(candidateFactorValue.getExperimentalFactor() + " matched.");
String matchedFactorValue = ExpressionDataWriterUtils.constructFactorValueName(candidateFactorValue);
buf.append(matchedFactorValue);
break;
}
log.debug(candidateFactorValue.getExperimentalFactor() + " didn't match ... trying the next factor.");
}
}
buf.append("\n");
}
if (log.isDebugEnabled())
log.debug(buf.toString());
writer.write(buf.toString());
writer.flush();
}
Aggregations