use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class BaseExpressionDataMatrix method getBioMaterialGroupsForAssays.
private void getBioMaterialGroupsForAssays(Map<BioMaterial, Collection<BioAssay>> bioMaterialMap, List<BioAssay> bioAssays) {
for (BioAssay ba : bioAssays) {
if (BaseExpressionDataMatrix.log.isDebugEnabled())
BaseExpressionDataMatrix.log.debug(" " + ba);
BioMaterial bm = ba.getSampleUsed();
if (!bioMaterialMap.containsKey(bm)) {
bioMaterialMap.put(bm, new HashSet<BioAssay>());
}
bioMaterialMap.get(bm).add(ba);
}
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class BaseExpressionDataMatrix method getBestBioAssayDimension.
@Override
public BioAssayDimension getBestBioAssayDimension() {
Collection<BioAssayDimension> dims = new HashSet<>(this.bioAssayDimensions.values());
BioAssayDimension b = dims.iterator().next();
if (dims.size() > 1) {
/*
* Special complication if there is more than one BioAssayDimension
*/
int s = -1;
Collection<BioMaterial> allBioMaterials = new HashSet<>();
// find the largest BioAssayDimension
for (BioAssayDimension bioAssayDimension : dims) {
if (bioAssayDimension.getBioAssays().size() > s) {
s = bioAssayDimension.getBioAssays().size();
b = bioAssayDimension;
}
for (BioAssay ba : b.getBioAssays()) {
allBioMaterials.add(ba.getSampleUsed());
}
}
for (BioAssay ba : b.getBioAssays()) {
if (!allBioMaterials.contains(ba.getSampleUsed())) {
/*
* In rare cases none of the usual ones has all the samples.
*
* This can also happen if the data are not sample-matched or vector-merged
*/
throw new IllegalStateException("Could not find an appropriate BioAssayDimension to represent the data matrix; data might need to be matched or merged");
}
}
}
return b;
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class BaseExpressionDataMatrix method setUpColumnElements.
/**
* <p>
* Note: In the current versions of Gemma, we require that there can be only a single BioAssayDimension. Thus this
* code is overly complex. If an experiment has multiple BioAssayDimensions (due to multiple arrays), we merge the
* vectors (e.g., needed in the last case shown below). However, the issue of having multiple "BioMaterials" per
* "BioAssay" still exists.
* <p>
* Deals with the fact that the bioassay dimensions can vary in size, and don't even need to overlap in the
* biomaterials used. In the case where there is a single BioAssayDimension this reduces to simply associating each
* column with a bioassay (though we are forced to use an integer under the hood).
* <p>
* For example, in the following diagram "-" indicates a biomaterial, while "*" indicates a bioassay. Each row of
* "*" indicates samples run on a different microarray design (a different bio assay material). In the examples we
* assume there is just a single biomaterial dimension.
* <pre>
* ---------------
* ***** -- only a few samples run on this platform
* ********** -- ditto
* **** -- these samples were not run on any of the other platforms .
* </pre>
* <p>
* A simpler case:
* </p>
* <pre>
* ---------------
* ***************
* ***********
* *******
* </pre>
* <p>
* A more typical and easy case (one microarray design used):
* </p>
* <pre>
* ----------------
* ****************
* </pre>
* <p>
* If every sample was run on two different array designs:
* </p>
* <pre>
* ----------------
* ****************
* ****************
* </pre>
* <p>
* Every sample was run on a different array design:
* <pre>
* -----------------------
* ******
* *********
* ********
* </pre>
* <p>
* Because there can be limited or no overlap between the bioassay dimensions, we cannot assume the dimensions of
* the matrix will be defined by the longest BioAssayDimension. Note that later in processing, this possible lack of
* overlap is fixed by sample matching or vector merging; this class has to deal with the general case.
* </p>
*/
int setUpColumnElements() {
BaseExpressionDataMatrix.log.debug("Setting up column elements");
assert this.bioAssayDimensions != null && this.bioAssayDimensions.size() > 0 : "No bioAssayDimensions defined";
Map<BioMaterial, Collection<BioAssay>> bioMaterialMap = new LinkedHashMap<>();
for (BioAssayDimension dimension : this.bioAssayDimensions.values()) {
List<BioAssay> bioAssays = dimension.getBioAssays();
BaseExpressionDataMatrix.log.debug("Processing: " + dimension + " with " + bioAssays.size() + " assays");
this.getBioMaterialGroupsForAssays(bioMaterialMap, bioAssays);
}
if (BaseExpressionDataMatrix.log.isDebugEnabled())
BaseExpressionDataMatrix.log.debug(bioMaterialMap.size() + " biomaterialGroups (correspond to columns)");
int column = 0;
for (BioMaterial bioMaterial : bioMaterialMap.keySet()) {
if (BaseExpressionDataMatrix.log.isDebugEnabled())
BaseExpressionDataMatrix.log.debug("Column " + column + " **--->>>> " + bioMaterial);
for (BioAssay assay : bioMaterialMap.get(bioMaterial)) {
if (this.columnBioMaterialMap.containsKey(bioMaterial)) {
int existingColumn = columnBioMaterialMap.get(bioMaterial);
this.columnAssayMap.put(assay, existingColumn);
if (BaseExpressionDataMatrix.log.isDebugEnabled())
BaseExpressionDataMatrix.log.debug(assay + " --> column " + existingColumn);
if (columnBioAssayMapByInteger.get(existingColumn) == null) {
columnBioAssayMapByInteger.put(existingColumn, new HashSet<BioAssay>());
}
columnBioAssayMapByInteger.get(existingColumn).add(assay);
} else {
if (BaseExpressionDataMatrix.log.isDebugEnabled()) {
BaseExpressionDataMatrix.log.debug(bioMaterial + " --> column " + column);
BaseExpressionDataMatrix.log.debug(assay + " --> column " + column);
}
this.columnBioMaterialMap.put(bioMaterial, column);
this.columnAssayMap.put(assay, column);
if (columnBioAssayMapByInteger.get(column) == null) {
columnBioAssayMapByInteger.put(column, new HashSet<BioAssay>());
}
columnBioMaterialMapByInteger.put(column, bioMaterial);
columnBioAssayMapByInteger.get(column).add(assay);
}
}
column++;
}
if (BaseExpressionDataMatrix.log.isDebugEnabled()) {
for (BioAssay o : this.columnAssayMap.keySet()) {
BaseExpressionDataMatrix.log.debug(o + " " + this.columnAssayMap.get(o));
}
}
assert bioMaterialMap.size() == columnBioMaterialMapByInteger.keySet().size();
return columnBioMaterialMapByInteger.keySet().size();
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ExpressionExperimentPlatformSwitchService method doMultiSample.
private BioAssayDimension doMultiSample(ExpressionExperiment ee, Collection<BioAssayDimension> unusedBADs, int maxSize) {
BioAssayDimension maxBAD = null;
for (BioAssay ba : ee.getBioAssays()) {
Collection<BioAssayDimension> oldBioAssayDims = bioAssayService.findBioAssayDimensions(ba);
for (BioAssayDimension bioAssayDim : oldBioAssayDims) {
unusedBADs.add(bioAssayDim);
int size = bioAssayDim.getBioAssays().size();
if (size > maxSize) {
maxSize = size;
maxBAD = bioAssayDim;
}
}
}
// otherwise we shouldn't be here.
assert unusedBADs.size() > 1;
unusedBADs.remove(maxBAD);
/*
* Make sure all biomaterials in the study are included in the chosen bioassaydimension. If not, we'd have
* to make a new BAD. I haven't implemented that case.
*/
if (maxBAD != null) {
Collection<BioMaterial> bmsInmaxBAD = new HashSet<>();
for (BioAssay ba : maxBAD.getBioAssays()) {
bmsInmaxBAD.add(ba.getSampleUsed());
}
for (BioAssay ba : ee.getBioAssays()) {
if (!bmsInmaxBAD.contains(ba.getSampleUsed())) {
ExpressionExperimentPlatformSwitchService.log.warn("This experiment looked like it had samples run on more than one platform, " + "but it also has no BioAssayDimension that is eligible to accomodate all samples (Example: " + ba.getSampleUsed() + ") The experiment will be switched to the merged platform, but no BioAssayDimension switch will be done.");
maxBAD = null;
break;
}
}
}
return maxBAD;
}
use of ubic.gemma.model.expression.biomaterial.BioMaterial in project Gemma by PavlidisLab.
the class ExperimentalDesignImporterImpl method addFactorValuesToBioMaterialsInExpressionExperiment.
/**
* Add the factor values to the biomaterials
*
* @param experimentBioMaterials Current expression experiment's biomaterials.
* @param experimentalDesign experimental design
* @param factorValueLines Lines from file containing factor values and biomaterial ids
* @param headerFields header fields
* @return Collection of biomaterials associated with this experiment, this is returned as the biomaterial is in a
* bioassay (first one retrieved)
*/
private Collection<BioMaterial> addFactorValuesToBioMaterialsInExpressionExperiment(Collection<BioMaterial> experimentBioMaterials, ExperimentalDesign experimentalDesign, List<String> factorValueLines, String[] headerFields) {
ExperimentalDesignImporterImpl.log.debug("Adding factors values to biomaterials: " + experimentalDesign.getId());
Collection<ExperimentalFactor> experimentalFactorsInExperiment = experimentalDesign.getExperimentalFactors();
Collection<BioMaterial> biomaterialsWithFactorValuesInExperiment = new HashSet<>();
Collection<BioMaterial> seenBioMaterials = new HashSet<>();
Map<ExperimentalFactor, Collection<BioMaterial>> factorsAssociatedWithBioMaterials = new HashMap<>();
for (String factorValueLine : factorValueLines) {
String[] factorValueFields = StringUtils.splitPreserveAllTokens(factorValueLine, "\t");
String externalId = null;
boolean hasExternalId = headerFields[1].toUpperCase().equals("EXTERNALID");
if (hasExternalId) {
externalId = factorValueFields[1];
}
BioMaterial currentBioMaterial = this.getBioMaterialFromExpressionExperiment(experimentBioMaterials, factorValueFields[0], externalId);
if (currentBioMaterial == null) {
throw new IllegalStateException("No biomaterial for " + factorValueFields[0]);
}
if (seenBioMaterials.contains(currentBioMaterial)) {
throw new IllegalArgumentException("A biomaterial occurred more than once in the file: " + currentBioMaterial);
}
seenBioMaterials.add(currentBioMaterial);
int start = 1;
if (hasExternalId) {
start = 2;
}
for (int i = start; i < factorValueFields.length; i++) {
ExperimentalFactor currentExperimentalFactor = null;
String currentExperimentalFactorName = StringUtils.strip(headerFields[i]);
FactorValue currentFactorValue = null;
String currentFVtext = StringUtils.strip(factorValueFields[i]);
if (StringUtils.isBlank(currentFVtext)) {
// Missing value. Note that catching 'NA' etc. is hard, because they could be valid strings.
continue;
}
for (ExperimentalFactor experimentalFactor : experimentalFactorsInExperiment) {
if (experimentalFactor.getName().equals(currentExperimentalFactorName)) {
currentExperimentalFactor = experimentalFactor;
}
}
if (currentExperimentalFactor == null)
throw new IllegalStateException("No factor matches column " + currentExperimentalFactorName);
Collection<FactorValue> factorValuesInCurrentExperimentalFactor = currentExperimentalFactor.getFactorValues();
for (FactorValue factorValue : factorValuesInCurrentExperimentalFactor) {
String fvv = factorValue.getValue();
if (StringUtils.isBlank(fvv)) {
// try characteristics; this would be a mess if there are more than one.
if (factorValue.getCharacteristics().size() == 1) {
fvv = factorValue.getCharacteristics().iterator().next().getValue();
if (StringUtils.isBlank(fvv)) {
// we can't match to factor values that lack a value string.
continue;
}
}
}
if (fvv.trim().equalsIgnoreCase(currentFVtext)) {
currentFactorValue = factorValue;
}
}
/*
* If we can't find the factorvalue that matches this, we don't get a value for this biomaterial.
*/
if (currentFactorValue == null) {
ExperimentalDesignImporterImpl.log.error("No factor value for " + currentExperimentalFactor + " matches the text value=" + currentFVtext);
} else {
if (!this.checkForDuplicateFactorOnBioMaterial(currentBioMaterial, currentFactorValue)) {
currentBioMaterial.getFactorValues().add(currentFactorValue);
}
}
ExperimentalDesignImporterImpl.log.debug("Added factor value " + currentFactorValue + " to biomaterial " + currentBioMaterial);
biomaterialsWithFactorValuesInExperiment.add(currentBioMaterial);
if (!factorsAssociatedWithBioMaterials.containsKey(currentExperimentalFactor)) {
factorsAssociatedWithBioMaterials.put(currentExperimentalFactor, new HashSet<BioMaterial>());
}
factorsAssociatedWithBioMaterials.get(currentExperimentalFactor).add(currentBioMaterial);
}
}
/*
* Check if every biomaterial got used. Worth a warning, at least.
*/
for (ExperimentalFactor ef : factorsAssociatedWithBioMaterials.keySet()) {
if (!factorsAssociatedWithBioMaterials.get(ef).containsAll(experimentBioMaterials)) {
ExperimentalDesignImporterImpl.log.warn("File did not contain values for all factor - biomaterial combinations: Missing at least one for " + ef + " [populated " + factorsAssociatedWithBioMaterials.get(ef).size() + "/" + experimentBioMaterials.size() + " ]");
}
}
return biomaterialsWithFactorValuesInExperiment;
}
Aggregations