use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class DataUpdater method replaceData.
/**
* Replace the data associated with the experiment (or add it if there is none). These data become the 'preferred'
* quantitation type. Note that this replaces the "raw" data.
* Similar to AffyPowerToolsProbesetSummarize.convertDesignElementDataVectors and code in
* SimpleExpressionDataLoaderService.
*
* @param ee the experiment to be modified
* @param targetPlatform the platform for the new data (this can only be used for single-platform data sets)
* @param data the data to be used
* @return ee
*/
public ExpressionExperiment replaceData(ExpressionExperiment ee, ArrayDesign targetPlatform, ExpressionDataDoubleMatrix data) {
Collection<ArrayDesign> ads = experimentService.getArrayDesignsUsed(ee);
if (ads.size() > 1) {
throw new IllegalArgumentException("Can only replace data for an experiment that uses one platform; " + "you must switch/merge first and then provide appropriate replacement data.");
}
if (data.rows() == 0) {
throw new IllegalArgumentException("Data had no rows");
}
ArrayDesign originalArrayDesign = ads.iterator().next();
Collection<QuantitationType> qts = data.getQuantitationTypes();
if (qts.size() > 1) {
throw new IllegalArgumentException("Only supports a single quantitation type");
}
if (qts.isEmpty()) {
throw new IllegalArgumentException("Please supply a quantitation type with the data");
}
QuantitationType qt = qts.iterator().next();
qt.setIsPreferred(true);
Collection<RawExpressionDataVector> vectors = this.makeNewVectors(ee, targetPlatform, data, qt);
if (vectors.isEmpty()) {
throw new IllegalStateException("no vectors!");
}
/*
* remove all analyses, etc.
*/
analysisUtilService.deleteOldAnalyses(ee);
ee = experimentService.replaceRawVectors(ee, vectors);
// audit if we switched platforms.
if (!targetPlatform.equals(originalArrayDesign)) {
AuditEventType eventType = ExpressionExperimentPlatformSwitchEvent.Factory.newInstance();
auditTrailService.addUpdateEvent(ee, eventType, "Switched in course of updating vectors using data input (from " + originalArrayDesign.getShortName() + " to " + targetPlatform.getShortName() + ")");
}
this.audit(ee, "Data vector replacement for " + targetPlatform, true);
experimentService.update(ee);
ee = this.postprocess(ee);
assert ee.getNumberOfDataVectors() != null;
// debug code.
for (BioAssay ba : ee.getBioAssays()) {
assert ba.getArrayDesignUsed().equals(targetPlatform);
}
return ee;
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class ExpressionPersister method fillInExpressionExperimentDataVectorAssociations.
private Collection<BioAssay> fillInExpressionExperimentDataVectorAssociations(ExpressionExperiment ee, ArrayDesignsForExperimentCache c) {
AbstractPersister.log.info("Filling in DesignElementDataVectors...");
Collection<BioAssay> bioAssays = new HashSet<>();
StopWatch timer = new StopWatch();
timer.start();
int count = 0;
for (RawExpressionDataVector dataVector : ee.getRawExpressionDataVectors()) {
BioAssayDimension bioAssayDimension = this.fillInDesignElementDataVectorAssociations(dataVector, c);
if (timer.getTime() > 5000) {
if (count == 0) {
AbstractPersister.log.info("Setup: " + timer.getTime());
} else {
AbstractPersister.log.info("Filled in " + (count) + " DesignElementDataVectors (" + timer.getTime() + "ms since last check)");
}
timer.reset();
timer.start();
}
bioAssays.addAll(bioAssayDimension.getBioAssays());
++count;
if (Thread.interrupted()) {
AbstractPersister.log.info("Cancelled");
return null;
}
}
AbstractPersister.log.info("Filled in total of " + count + " DesignElementDataVectors, " + bioAssays.size() + " bioassays");
return bioAssays;
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesImpl method examineVector.
private int examineVector(ExpressionExperiment source, ExpressionDataDoubleMatrix preferred, ExpressionDataDoubleMatrix signalChannelA, ExpressionDataDoubleMatrix signalChannelB, ExpressionDataDoubleMatrix bkgChannelA, ExpressionDataDoubleMatrix bkgChannelB, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators, Collection<RawExpressionDataVector> results, ByteArrayConverter converter, int count, ExpressionDataDoubleMatrix baseChannel, Double signalThreshold, QuantitationType present, ExpressionDataMatrixRowElement element) {
CompositeSequence designElement = element.getDesignElement();
RawExpressionDataVector vect = RawExpressionDataVector.Factory.newInstance();
vect.setQuantitationType(present);
vect.setExpressionExperiment(source);
vect.setDesignElement(designElement);
assert baseChannel != null;
vect.setBioAssayDimension(baseChannel.getBioAssayDimension(designElement));
int numCols = preferred.columns(designElement);
Boolean[] detectionCalls = new Boolean[numCols];
Double[] prefRow = preferred.getRow(designElement);
Double[] signalA = null;
if (signalChannelA != null) {
signalA = signalChannelA.getRow(designElement);
}
Double[] signalB = null;
if (signalChannelB != null) {
signalB = signalChannelB.getRow(designElement);
}
Double[] bkgA = null;
Double[] bkgB = null;
if (bkgChannelA != null)
bkgA = bkgChannelA.getRow(designElement);
if (bkgChannelB != null)
bkgB = bkgChannelB.getRow(designElement);
// columns only for this design element!
// we use this to track
boolean gaps = false;
for (int col = 0; col < numCols; col++) {
if (this.checkMissingValue(extraMissingValueIndicators, detectionCalls, prefRow, col))
continue;
Double bkgAV = Double.NaN;
Double bkgBV = Double.NaN;
if (bkgA != null)
bkgAV = bkgA[col];
if (bkgB != null)
bkgBV = bkgB[col];
Double sigAV = (signalA == null || signalA[col] == null) ? Double.NaN : signalA[col];
Double sigBV = (signalB == null || signalB[col] == null) ? Double.NaN : signalB[col];
/*
* Missing values here wreak havoc. Sometimes in multiarray studies data are missing.
*/
Boolean call = this.computeCall(signalToNoiseThreshold, signalThreshold, sigAV, sigBV, bkgAV, bkgBV);
if (call == null)
gaps = true;
detectionCalls[col] = call;
}
if (gaps) {
this.fillGapsInCalls(detectionCalls);
}
vect.setData(converter.booleanArrayToBytes(ArrayUtils.toPrimitive(detectionCalls)));
results.add(vect);
if (++count % 4000 == 0) {
TwoChannelMissingValuesImpl.log.info(count + " vectors examined for missing values, " + results.size() + " vectors generated so far.");
}
return count;
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesImpl method computeMissingValues.
@Override
public Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment ee, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
ee = expressionExperimentService.thawLite(ee);
Collection<QuantitationType> usefulQuantitationTypes = ExpressionDataMatrixBuilder.getUsefulQuantitationTypes(ee);
StopWatch timer = new StopWatch();
timer.start();
TwoChannelMissingValuesImpl.log.info("Loading vectors ...");
Collection<RawExpressionDataVector> rawVectors = rawExpressionDataVectorService.find(usefulQuantitationTypes);
Collection<ProcessedExpressionDataVector> procVectors = new HashSet<>();
if (rawVectors.isEmpty()) {
procVectors = processedExpressionDataVectorService.find(usefulQuantitationTypes);
processedExpressionDataVectorService.thaw(procVectors);
} else {
rawExpressionDataVectorService.thaw(rawVectors);
}
timer.stop();
this.logTimeInfo(timer, procVectors.size() + rawVectors.size());
Collection<? extends DesignElementDataVector> builderVectors = new HashSet<>(rawVectors.isEmpty() ? procVectors : rawVectors);
System.out.println("Building matrix with vectors that I just thawed");
ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(builderVectors);
Collection<BioAssayDimension> dims = builder.getBioAssayDimensions();
/*
* Note we have to do this one array design at a time, because we are producing DesignElementDataVectors which
* must be associated with the correct BioAssayDimension.
*/
TwoChannelMissingValuesImpl.log.info("Study has " + dims.size() + " bioassaydimensions");
if (extraMissingValueIndicators != null && extraMissingValueIndicators.size() > 0) {
TwoChannelMissingValuesImpl.log.info("There are " + extraMissingValueIndicators.size() + " manually-set missing value indicators");
}
ExpressionDataDoubleMatrix preferredData = builder.getPreferredData();
ExpressionDataDoubleMatrix bkgDataA = builder.getBackgroundChannelA();
ExpressionDataDoubleMatrix bkgDataB = builder.getBackgroundChannelB();
ExpressionDataDoubleMatrix signalDataA = builder.getSignalChannelA();
ExpressionDataDoubleMatrix signalDataB = builder.getSignalChannelB();
if (builder.isAnyMissing()) {
if (bkgDataA != null) {
for (QuantitationType qt : bkgDataA.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in bkgDataA");
break;
}
}
}
if (bkgDataB != null) {
for (QuantitationType qt : bkgDataB.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in bkgDataB");
break;
}
}
}
if (signalDataA != null) {
for (QuantitationType qt : signalDataA.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in signalDataA");
break;
}
}
}
if (signalDataB != null) {
for (QuantitationType qt : signalDataB.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in signalDataB");
break;
}
}
}
}
Collection<RawExpressionDataVector> dimRes = this.computeMissingValues(ee, preferredData, signalDataA, signalDataB, bkgDataA, bkgDataB, signalToNoiseThreshold, extraMissingValueIndicators);
return new HashSet<>(dimRes);
}
use of ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesImpl method computeMissingValues.
/**
* Attempt to compute 'missing value' information for a two-channel data set. We attempt to do this even if we are
* missing background intensity information or one intensity channel, though obviously it is better to have all four
* sets of values.
*
* @param bkgChannelA background channel A
* @param bkgChannelB background channel B
* @param extraMissingValueIndicators extra missing value indicators
* @param preferred preferred matrix
* @param signalChannelA signal channel A
* @param signalChannelB signal channel B
* @param signalToNoiseThreshold noise threshold
* @param source the source
* @return DesignElementDataVectors corresponding to a new PRESENTCALL quantitation type for the design elements and
* biomaterial dimension represented in the inputs.
*/
private Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment source, ExpressionDataDoubleMatrix preferred, ExpressionDataDoubleMatrix signalChannelA, ExpressionDataDoubleMatrix signalChannelB, ExpressionDataDoubleMatrix bkgChannelA, ExpressionDataDoubleMatrix bkgChannelB, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
boolean okToProceed = this.validate(preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold);
Collection<RawExpressionDataVector> results = new HashSet<>();
if (!okToProceed) {
TwoChannelMissingValuesImpl.log.warn("Missing value computation cannot proceed");
return results;
}
ByteArrayConverter converter = new ByteArrayConverter();
int count = 0;
ExpressionDataDoubleMatrix baseChannel = signalChannelA == null ? signalChannelB : signalChannelA;
Double signalThreshold = Double.NaN;
if (bkgChannelA == null && bkgChannelB == null) {
signalThreshold = this.computeSignalThreshold(preferred, signalChannelA, signalChannelB, baseChannel);
}
QuantitationType present = this.getMissingDataQuantitationType(signalToNoiseThreshold, signalThreshold);
source.getQuantitationTypes().add(present);
for (ExpressionDataMatrixRowElement element : baseChannel.getRowElements()) {
count = this.examineVector(source, preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold, extraMissingValueIndicators, results, converter, count, baseChannel, signalThreshold, present, element);
}
TwoChannelMissingValuesImpl.log.info("Finished: " + count + " vectors examined for missing values");
results = twoChannelMissingValueHelperService.persist(source, results);
return results;
}
Aggregations