use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class GeoConverterImpl method convertVectorsForPlatform.
/**
* For data coming from a single platform, create vectors.
*
* @param values A GeoValues object holding the parsed results.
*/
private void convertVectorsForPlatform(GeoValues values, ExpressionExperiment expExp, List<GeoSample> datasetSamples, GeoPlatform geoPlatform) {
assert datasetSamples.size() > 0 : "No samples in dataset";
if (!geoPlatform.useDataFromGeo()) {
// see bug 4181
GeoConverterImpl.log.warn("Platform characteristics indicate data from GEO should be ignored or will not be present anyway (" + geoPlatform + ")");
return;
}
GeoConverterImpl.log.info("Converting vectors for " + geoPlatform.getGeoAccession() + ", " + datasetSamples.size() + " samples.");
BioAssayDimension bioAssayDimension = this.convertGeoSampleList(datasetSamples, expExp);
if (bioAssayDimension.getBioAssays().size() == 0)
throw new IllegalStateException("No bioAssays in the BioAssayDimension");
this.sanityCheckQuantitationTypes(datasetSamples);
List<String> quantitationTypes = datasetSamples.iterator().next().getColumnNames();
List<String> quantitationTypeDescriptions = datasetSamples.iterator().next().getColumnDescriptions();
boolean first = true;
for (String quantitationType : quantitationTypes) {
// skip the first quantitationType, it's the ID or ID_REF.
if (first) {
first = false;
continue;
}
int columnAccordingToSample = quantitationTypes.indexOf(quantitationType);
int quantitationTypeIndex = values.getQuantitationTypeIndex(geoPlatform, quantitationType);
GeoConverterImpl.log.debug("Processing " + quantitationType + " (column=" + quantitationTypeIndex + " - according to sample, it's " + columnAccordingToSample + ")");
Map<String, List<Object>> dataVectors = this.makeDataVectors(values, datasetSamples, quantitationTypeIndex);
if (dataVectors == null || dataVectors.size() == 0) {
GeoConverterImpl.log.debug("No data for " + quantitationType + " (column=" + quantitationTypeIndex + ")");
continue;
}
GeoConverterImpl.log.info(dataVectors.size() + " data vectors for " + quantitationType);
Object exampleValue = dataVectors.values().iterator().next().iterator().next();
QuantitationType qt = QuantitationType.Factory.newInstance();
qt.setName(quantitationType);
String description = quantitationTypeDescriptions.get(columnAccordingToSample);
qt.setDescription(description);
QuantitationTypeParameterGuesser.guessQuantitationTypeParameters(qt, quantitationType, description, exampleValue);
int count = 0;
int skipped = 0;
for (String designElementName : dataVectors.keySet()) {
List<Object> dataVector = dataVectors.get(designElementName);
if (dataVector == null || dataVector.size() == 0)
continue;
RawExpressionDataVector vector = this.convertDesignElementDataVector(geoPlatform, expExp, bioAssayDimension, designElementName, dataVector, qt);
if (vector == null) {
skipped++;
if (GeoConverterImpl.log.isDebugEnabled())
GeoConverterImpl.log.debug("Null vector for DE=" + designElementName + " QT=" + quantitationType);
continue;
}
if (GeoConverterImpl.log.isTraceEnabled()) {
GeoConverterImpl.log.trace(designElementName + " " + qt.getName() + " " + qt.getRepresentation() + " " + dataVector.size() + " elements in vector");
}
expExp.getRawExpressionDataVectors().add(vector);
if (++count % GeoConverterImpl.LOGGING_VECTOR_COUNT_UPDATE == 0 && GeoConverterImpl.log.isDebugEnabled()) {
GeoConverterImpl.log.debug(count + " Data vectors added");
}
}
if (count > 0) {
expExp.getQuantitationTypes().add(qt);
if (GeoConverterImpl.log.isDebugEnabled() && count > 1000) {
GeoConverterImpl.log.debug(count + " Data vectors added for '" + quantitationType + "'");
}
} else {
GeoConverterImpl.log.info("No vectors were retained for " + quantitationType + " -- usually this is due to all values being missing.");
}
if (skipped > 0) {
GeoConverterImpl.log.info("Skipped " + skipped + " vectors");
}
}
GeoConverterImpl.log.info("Total of " + expExp.getRawExpressionDataVectors().size() + " vectors on platform " + geoPlatform + ", " + expExp.getQuantitationTypes().size() + " quantitation types.");
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class DataUpdater method makeNewVectors.
private Collection<RawExpressionDataVector> makeNewVectors(ExpressionExperiment ee, ArrayDesign targetPlatform, ExpressionDataDoubleMatrix data, QuantitationType qt) {
ByteArrayConverter bArrayConverter = new ByteArrayConverter();
Collection<RawExpressionDataVector> vectors = new HashSet<>();
BioAssayDimension bioAssayDimension = data.getBestBioAssayDimension();
assert bioAssayDimension != null;
assert !bioAssayDimension.getBioAssays().isEmpty();
bioAssayDimension = assayDimensionService.findOrCreate(bioAssayDimension);
assert !bioAssayDimension.getBioAssays().isEmpty();
for (int i = 0; i < data.rows(); i++) {
byte[] bdata = bArrayConverter.doubleArrayToBytes(data.getRow(i));
RawExpressionDataVector vector = RawExpressionDataVector.Factory.newInstance();
vector.setData(bdata);
CompositeSequence cs = data.getRowElement(i).getDesignElement();
if (cs == null) {
continue;
}
if (!cs.getArrayDesign().equals(targetPlatform)) {
throw new IllegalArgumentException("Input data must use the target platform (was: " + cs.getArrayDesign() + ", expected: " + targetPlatform);
}
vector.setDesignElement(cs);
vector.setQuantitationType(qt);
vector.setExpressionExperiment(ee);
vector.setBioAssayDimension(bioAssayDimension);
vectors.add(vector);
}
return vectors;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class MatrixConversionTest method getDesignElementDataVectors.
/**
* Creates an ugly (but not unusual) situation where there are two bioassay dimensions with different sizes,
* referring to the same set of biomaterials.
*
* @return design element data vectors
*/
private Collection<DesignElementDataVector> getDesignElementDataVectors(Collection<QuantitationType> quantTypes) {
Collection<DesignElementDataVector> vectors = new HashSet<>();
ArrayDesign ad = ArrayDesign.Factory.newInstance();
ad.setName("junk");
List<CompositeSequence> sequences = this.getCompositeSequences(ad);
ArrayDesign adb = ArrayDesign.Factory.newInstance();
adb.setName("bjunk");
List<CompositeSequence> sequencesb = this.getCompositeSequences(ad);
// resused
List<BioMaterial> bioMaterials = this.getBioMaterials();
for (QuantitationType quantType : quantTypes) {
/*
* Create two bioassay dimension which overlap; "A" does not use all the biomaterials.
*/
BioAssayDimension baDimA = BioAssayDimension.Factory.newInstance();
Iterator<BioMaterial> bmita = bioMaterials.iterator();
for (long i = 0; i < MatrixConversionTest.NUM_BIOMATERIALS - 20; i++) {
BioAssay ba = ubic.gemma.model.expression.bioAssay.BioAssay.Factory.newInstance();
ba.setName(RandomStringUtils.randomNumeric(5) + "_testbioassay");
ba.setSampleUsed(bmita.next());
ba.setArrayDesignUsed(ad);
ba.setId(i);
baDimA.getBioAssays().add(ba);
}
baDimA.setName(RandomStringUtils.randomAlphanumeric(10));
BioAssayDimension baDimB = BioAssayDimension.Factory.newInstance();
Iterator<BioMaterial> bmitb = bioMaterials.iterator();
for (long i = 0; i < MatrixConversionTest.NUM_BIOMATERIALS; i++) {
BioAssay ba = ubic.gemma.model.expression.bioAssay.BioAssay.Factory.newInstance();
ba.setName(RandomStringUtils.randomNumeric(15) + "_testbioassay");
ba.setSampleUsed(bmitb.next());
ba.setArrayDesignUsed(adb);
ba.setId(i + 20);
baDimB.getBioAssays().add(ba);
}
baDimB.setName(RandomStringUtils.randomAlphanumeric(10));
// bio.a gets cs 0-99, bio.b gets 100-199.
long j = 0;
j = this.loopVectors(vectors, sequencesb, quantType, baDimA, j, MatrixConversionTest.NUM_CS - 100);
// noinspection UnusedAssignment // Better readability
j = this.loopVectors(vectors, sequences, quantType, baDimB, j, MatrixConversionTest.NUM_CS);
}
return vectors;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class SimpleExpressionDataLoaderServiceImpl method convert.
@Override
public ExpressionExperiment convert(SimpleExpressionExperimentMetaData metaData, DoubleMatrix<String, String> matrix) {
if (matrix == null || metaData == null) {
throw new IllegalArgumentException("One or all of method arguments was null");
}
ExpressionExperiment experiment = ExpressionExperiment.Factory.newInstance();
Taxon taxon = this.convertTaxon(metaData.getTaxon());
experiment.setName(metaData.getName());
experiment.setShortName(metaData.getShortName());
experiment.setDescription(metaData.getDescription());
experiment.setSource("Import via matrix flat file." + (StringUtils.isBlank(metaData.getSourceUrl()) ? "" : "Downloaded from " + metaData.getSourceUrl()));
ExperimentalDesign ed = ExperimentalDesign.Factory.newInstance();
experiment.setExperimentalDesign(ed);
if (metaData.getPubMedId() != null) {
PubMedXMLFetcher pubfetch = new PubMedXMLFetcher();
BibliographicReference ref = pubfetch.retrieveByHTTP(metaData.getPubMedId());
experiment.setPrimaryPublication(ref);
}
QuantitationType quantitationType = this.convertQuantitationType(metaData);
/* set the quantitation types on the experiment */
Collection<QuantitationType> qTypes = new HashSet<>();
qTypes.add(quantitationType);
experiment.setQuantitationTypes(qTypes);
Collection<ArrayDesign> arrayDesigns = this.convertArrayDesigns(metaData, matrix);
// Divide up multiple array designs into multiple BioAssayDimensions.
Collection<RawExpressionDataVector> allVectors = new HashSet<>();
Collection<BioAssay> allBioAssays = new HashSet<>();
Collection<Object> usedDesignElements = new HashSet<>();
for (ArrayDesign design : arrayDesigns) {
SimpleExpressionDataLoaderServiceImpl.log.info("Processing " + design);
DoubleMatrix<String, String> subMatrix = this.getSubMatrixForArrayDesign(matrix, usedDesignElements, design);
if (subMatrix == null) {
throw new IllegalStateException("Got a null matix");
}
BioAssayDimension bad = this.convertBioAssayDimension(experiment, design, taxon, subMatrix);
Collection<RawExpressionDataVector> vectors = this.convertDesignElementDataVectors(experiment, bad, design, quantitationType, subMatrix);
allVectors.addAll(vectors);
allBioAssays.addAll(bad.getBioAssays());
}
// sanity
if (usedDesignElements.size() != matrix.rows()) {
SimpleExpressionDataLoaderServiceImpl.log.warn("Some rows of matrix were not matched to any of the given platforms (" + matrix.rows() + " rows, " + usedDesignElements.size() + " found");
}
experiment.setRawExpressionDataVectors(allVectors);
experiment.setBioAssays(allBioAssays);
return experiment;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class ExpressionPersister method getBioAssayDimensionFromCacheOrCreate.
private BioAssayDimension getBioAssayDimensionFromCacheOrCreate(DesignElementDataVector vector, ArrayDesignsForExperimentCache c) {
if (!this.isTransient(vector.getBioAssayDimension()))
return vector.getBioAssayDimension();
String dimensionName = vector.getBioAssayDimension().getName();
if (bioAssayDimensionCache.containsKey(dimensionName)) {
vector.setBioAssayDimension(bioAssayDimensionCache.get(dimensionName));
} else {
vector.getBioAssayDimension().setId(null);
BioAssayDimension bAd = this.persistBioAssayDimension(vector.getBioAssayDimension(), c);
bioAssayDimensionCache.put(dimensionName, bAd);
vector.setBioAssayDimension(bAd);
}
BioAssayDimension bioAssayDimension = bioAssayDimensionCache.get(dimensionName);
assert !this.isTransient(bioAssayDimension);
return bioAssayDimension;
}
Aggregations