use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class LinkAnalysisServiceImpl method processVectors.
@Override
public LinkAnalysis processVectors(Taxon t, Collection<ProcessedExpressionDataVector> dataVectors, FilterConfig filterConfig, LinkAnalysisConfig linkAnalysisConfig) {
ExpressionDataDoubleMatrix datamatrix = expressionDataMatrixService.getFilteredMatrix(linkAnalysisConfig.getArrayName(), filterConfig, dataVectors);
this.checkDatamatrix(datamatrix);
LinkAnalysis la = new LinkAnalysis(linkAnalysisConfig);
datamatrix = this.normalize(datamatrix, linkAnalysisConfig);
this.setUpForAnalysis(t, la, dataVectors, datamatrix);
la.analyze();
try {
this.writeLinks(la, filterConfig, new PrintWriter(System.out));
} catch (IOException e) {
throw new RuntimeException(e);
}
return la;
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class DataUpdater method log2cpmFromCounts.
/**
* For back filling log2cpm when only counts are available. This wouldn't be used routinely, because new experiments
* get log2cpm computed when loaded.
*
* @param ee ee
* @param qt qt
*/
public void log2cpmFromCounts(ExpressionExperiment ee, QuantitationType qt) {
ee = experimentService.thawLite(ee);
/*
* Get the count data; Make sure it is currently preferred (so we don't do this twice by accident)
* We need to do this from the Raw data, not the data that has been normalized etc.
*/
Collection<RawExpressionDataVector> counts = rawExpressionDataVectorService.find(qt);
ExpressionDataDoubleMatrix countMatrix = new ExpressionDataDoubleMatrix(counts);
try {
/*
* Get the count data quantitation type and make it non-preferred
*/
qt.setIsPreferred(false);
qtService.update(qt);
// so updated QT is attached.
ee = experimentService.thawLite(ee);
QuantitationType log2cpmQt = this.makelog2cpmQt();
DoubleMatrix1D librarySize = MatrixStats.colSums(countMatrix.getMatrix());
DoubleMatrix<CompositeSequence, BioMaterial> log2cpmMatrix = MatrixStats.convertToLog2Cpm(countMatrix.getMatrix(), librarySize);
ExpressionDataDoubleMatrix log2cpmEEMatrix = new ExpressionDataDoubleMatrix(ee, log2cpmQt, log2cpmMatrix);
assert log2cpmEEMatrix.getQuantitationTypes().iterator().next().getIsPreferred();
Collection<ArrayDesign> platforms = experimentService.getArrayDesignsUsed(ee);
if (platforms.size() > 1)
throw new IllegalArgumentException("Cannot apply to multiplatform data sets");
this.addData(ee, platforms.iterator().next(), log2cpmEEMatrix);
} catch (Exception e) {
DataUpdater.log.error(e, e);
// try to recover.
qt.setIsPreferred(true);
qtService.update(qt);
}
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class SampleCoexpressionMatrixServiceImpl method create.
@Override
public DoubleMatrix<BioAssay, BioAssay> create(ExpressionExperiment ee, boolean useRegression, boolean removeOutliers) {
// Load data and create matrix
ExpressionDataDoubleMatrix mat = this.loadDataMatrix(ee, useRegression, this.loadVectors(ee));
DoubleMatrix<BioAssay, BioAssay> cormat = this.loadCorMat(removeOutliers, mat);
// Check consistency
BioAssayDimension bestBioAssayDimension = mat.getBestBioAssayDimension();
if (cormat.rows() != bestBioAssayDimension.getBioAssays().size()) {
throw new IllegalStateException("Number of bioassays doesn't match length of the best bioAssayDimension. BAs in dimension: " + bestBioAssayDimension.getBioAssays().size() + ", rows in cormat: " + cormat.rows());
}
// Persist
sampleCoexpressionMatrixHelperService.create(cormat, bestBioAssayDimension, mat.getExpressionExperiment());
return cormat;
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesImpl method computeMissingValues.
@Override
public Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment ee, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
ee = expressionExperimentService.thawLite(ee);
Collection<QuantitationType> usefulQuantitationTypes = ExpressionDataMatrixBuilder.getUsefulQuantitationTypes(ee);
StopWatch timer = new StopWatch();
timer.start();
TwoChannelMissingValuesImpl.log.info("Loading vectors ...");
Collection<RawExpressionDataVector> rawVectors = rawExpressionDataVectorService.find(usefulQuantitationTypes);
Collection<ProcessedExpressionDataVector> procVectors = new HashSet<>();
if (rawVectors.isEmpty()) {
procVectors = processedExpressionDataVectorService.find(usefulQuantitationTypes);
processedExpressionDataVectorService.thaw(procVectors);
} else {
rawExpressionDataVectorService.thaw(rawVectors);
}
timer.stop();
this.logTimeInfo(timer, procVectors.size() + rawVectors.size());
Collection<? extends DesignElementDataVector> builderVectors = new HashSet<>(rawVectors.isEmpty() ? procVectors : rawVectors);
System.out.println("Building matrix with vectors that I just thawed");
ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder(builderVectors);
Collection<BioAssayDimension> dims = builder.getBioAssayDimensions();
/*
* Note we have to do this one array design at a time, because we are producing DesignElementDataVectors which
* must be associated with the correct BioAssayDimension.
*/
TwoChannelMissingValuesImpl.log.info("Study has " + dims.size() + " bioassaydimensions");
if (extraMissingValueIndicators != null && extraMissingValueIndicators.size() > 0) {
TwoChannelMissingValuesImpl.log.info("There are " + extraMissingValueIndicators.size() + " manually-set missing value indicators");
}
ExpressionDataDoubleMatrix preferredData = builder.getPreferredData();
ExpressionDataDoubleMatrix bkgDataA = builder.getBackgroundChannelA();
ExpressionDataDoubleMatrix bkgDataB = builder.getBackgroundChannelB();
ExpressionDataDoubleMatrix signalDataA = builder.getSignalChannelA();
ExpressionDataDoubleMatrix signalDataB = builder.getSignalChannelB();
if (builder.isAnyMissing()) {
if (bkgDataA != null) {
for (QuantitationType qt : bkgDataA.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in bkgDataA");
break;
}
}
}
if (bkgDataB != null) {
for (QuantitationType qt : bkgDataB.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in bkgDataB");
break;
}
}
}
if (signalDataA != null) {
for (QuantitationType qt : signalDataA.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in signalDataA");
break;
}
}
}
if (signalDataB != null) {
for (QuantitationType qt : signalDataB.getQuantitationTypes()) {
if (builder.getNumMissingValues(qt) > 0) {
TwoChannelMissingValuesImpl.log.warn("Missing values in signalDataB");
break;
}
}
}
}
Collection<RawExpressionDataVector> dimRes = this.computeMissingValues(ee, preferredData, signalDataA, signalDataB, bkgDataA, bkgDataB, signalToNoiseThreshold, extraMissingValueIndicators);
return new HashSet<>(dimRes);
}
use of ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix in project Gemma by PavlidisLab.
the class TwoChannelMissingValuesImpl method computeMissingValues.
/**
* Attempt to compute 'missing value' information for a two-channel data set. We attempt to do this even if we are
* missing background intensity information or one intensity channel, though obviously it is better to have all four
* sets of values.
*
* @param bkgChannelA background channel A
* @param bkgChannelB background channel B
* @param extraMissingValueIndicators extra missing value indicators
* @param preferred preferred matrix
* @param signalChannelA signal channel A
* @param signalChannelB signal channel B
* @param signalToNoiseThreshold noise threshold
* @param source the source
* @return DesignElementDataVectors corresponding to a new PRESENTCALL quantitation type for the design elements and
* biomaterial dimension represented in the inputs.
*/
private Collection<RawExpressionDataVector> computeMissingValues(ExpressionExperiment source, ExpressionDataDoubleMatrix preferred, ExpressionDataDoubleMatrix signalChannelA, ExpressionDataDoubleMatrix signalChannelB, ExpressionDataDoubleMatrix bkgChannelA, ExpressionDataDoubleMatrix bkgChannelB, double signalToNoiseThreshold, Collection<Double> extraMissingValueIndicators) {
boolean okToProceed = this.validate(preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold);
Collection<RawExpressionDataVector> results = new HashSet<>();
if (!okToProceed) {
TwoChannelMissingValuesImpl.log.warn("Missing value computation cannot proceed");
return results;
}
ByteArrayConverter converter = new ByteArrayConverter();
int count = 0;
ExpressionDataDoubleMatrix baseChannel = signalChannelA == null ? signalChannelB : signalChannelA;
Double signalThreshold = Double.NaN;
if (bkgChannelA == null && bkgChannelB == null) {
signalThreshold = this.computeSignalThreshold(preferred, signalChannelA, signalChannelB, baseChannel);
}
QuantitationType present = this.getMissingDataQuantitationType(signalToNoiseThreshold, signalThreshold);
source.getQuantitationTypes().add(present);
for (ExpressionDataMatrixRowElement element : baseChannel.getRowElements()) {
count = this.examineVector(source, preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB, signalToNoiseThreshold, extraMissingValueIndicators, results, converter, count, baseChannel, signalThreshold, present, element);
}
TwoChannelMissingValuesImpl.log.info("Finished: " + count + " vectors examined for missing values");
results = twoChannelMissingValueHelperService.persist(source, results);
return results;
}
Aggregations