use of ubic.basecode.dataStructure.matrix.DenseDoubleMatrix in project Gemma by PavlidisLab.
the class DataUpdaterTest method testAddData.
@Test
public void testAddData() throws Exception {
/*
* Load a regular data set that has no data. Platform is (basically) irrelevant.
*/
geoService.setGeoDomainObjectGenerator(new GeoDomainObjectGeneratorLocal(this.getTestFileBasePath()));
ExpressionExperiment ee;
try {
// RNA-seq data.
Collection<?> results = geoService.fetchAndLoad("GSE37646", false, true, false);
ee = (ExpressionExperiment) results.iterator().next();
} catch (AlreadyExistsInSystemException e) {
// log.warn( "Test skipped because GSE37646 was not removed from the system prior to test" );
ee = (ExpressionExperiment) ((List<?>) e.getData()).get(0);
}
ee = experimentService.thawLite(ee);
List<BioAssay> bioAssays = new ArrayList<>(ee.getBioAssays());
assertEquals(31, bioAssays.size());
List<BioMaterial> bms = new ArrayList<>();
for (BioAssay ba : bioAssays) {
bms.add(ba.getSampleUsed());
}
targetArrayDesign = this.getTestPersistentArrayDesign(100, true);
DoubleMatrix<CompositeSequence, BioMaterial> rawMatrix = new DenseDoubleMatrix<>(targetArrayDesign.getCompositeSequences().size(), bms.size());
/*
* make up some fake data on another platform, and match it to those samples
*/
for (int i = 0; i < rawMatrix.rows(); i++) {
for (int j = 0; j < rawMatrix.columns(); j++) {
rawMatrix.set(i, j, (i + 1) * (j + 1) * Math.random() / 100.0);
}
}
List<CompositeSequence> probes = new ArrayList<>(targetArrayDesign.getCompositeSequences());
rawMatrix.setRowNames(probes);
rawMatrix.setColumnNames(bms);
QuantitationType qt = this.makeQt(true);
ExpressionDataDoubleMatrix data = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
assertNotNull(data.getBestBioAssayDimension());
assertEquals(rawMatrix.columns(), data.getBestBioAssayDimension().getBioAssays().size());
assertEquals(probes.size(), data.getMatrix().rows());
/*
* Replace it.
*/
ee = dataUpdater.replaceData(ee, targetArrayDesign, data);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
ee = experimentService.thaw(ee);
for (BioAssay ba : ee.getBioAssays()) {
assertEquals(targetArrayDesign, ba.getArrayDesignUsed());
}
assertEquals(100, ee.getRawExpressionDataVectors().size());
for (RawExpressionDataVector v : ee.getRawExpressionDataVectors()) {
assertTrue(v.getQuantitationType().getIsPreferred());
}
assertEquals(100, ee.getProcessedExpressionDataVectors().size());
Collection<DoubleVectorValueObject> processedDataArrays = dataVectorService.getProcessedDataArrays(ee);
for (DoubleVectorValueObject v : processedDataArrays) {
assertEquals(31, v.getBioAssays().size());
}
/*
* Test adding data (non-preferred)
*/
qt = this.makeQt(false);
ExpressionDataDoubleMatrix moreData = new ExpressionDataDoubleMatrix(ee, qt, rawMatrix);
ee = dataUpdater.addData(ee, targetArrayDesign, moreData);
ee = experimentService.thaw(ee);
try {
// add preferred data twice.
dataUpdater.addData(ee, targetArrayDesign, data);
fail("Should have gotten an exception");
} catch (IllegalArgumentException e) {
// okay.
}
dataUpdater.deleteData(ee, qt);
}
use of ubic.basecode.dataStructure.matrix.DenseDoubleMatrix in project Gemma by PavlidisLab.
the class ExpressionDataSVD method equalize.
/**
* Implements the method described in the SPELL paper, alternative interpretation as related by Q. Morris. Set all
* components to have equal weight (set all singular values to 1)
*
* @return the reconstructed matrix; values that were missing before are re-masked.
*/
public ExpressionDataDoubleMatrix equalize() {
DoubleMatrix<Integer, Integer> copy = svd.getS().copy();
for (int i = 0; i < copy.columns(); i++) {
copy.set(i, i, 1.0);
}
double[][] rawU = svd.getU().getRawMatrix();
double[][] rawS = copy.getRawMatrix();
double[][] rawV = svd.getV().getRawMatrix();
DoubleMatrix2D u = new DenseDoubleMatrix2D(rawU);
DoubleMatrix2D s = new DenseDoubleMatrix2D(rawS);
DoubleMatrix2D v = new DenseDoubleMatrix2D(rawV);
Algebra a = new Algebra();
DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(a.mult(a.mult(u, s), a.transpose(v)).toArray());
reconstructed.setRowNames(this.expressionData.getMatrix().getRowNames());
reconstructed.setColumnNames(this.expressionData.getMatrix().getColNames());
// re-mask the missing values.
for (int i = 0; i < reconstructed.rows(); i++) {
for (int j = 0; j < reconstructed.columns(); j++) {
if (Double.isNaN(this.missingValueInfo.get(i, j))) {
reconstructed.set(i, j, Double.NaN);
}
}
}
return new ExpressionDataDoubleMatrix(this.expressionData, reconstructed);
}
use of ubic.basecode.dataStructure.matrix.DenseDoubleMatrix in project Gemma by PavlidisLab.
the class SampleCoexpressionAnalysisDaoImpl method load.
@Override
public DoubleMatrix<BioAssay, BioAssay> load(ExpressionExperiment ee) {
Collection<SampleCoexpressionMatrix> r = this.findByExperiment(ee);
if (r.isEmpty())
return null;
if (r.size() > 1) {
AbstractDao.log.warn("More than one matrix was available, only the first is being returned.");
}
SampleCoexpressionMatrix matObj = r.iterator().next();
byte[] matrixBytes = matObj.getCoexpressionMatrix();
final List<BioAssay> bioAssays = matObj.getBioAssayDimension().getBioAssays();
int numBa = bioAssays.size();
if (numBa == 0) {
throw new IllegalArgumentException("No bioassays in the bioassaydimension with id=" + matObj.getBioAssayDimension().getId());
}
double[][] rawMatrix;
try {
rawMatrix = SampleCoexpressionAnalysisDaoImpl.bac.byteArrayToDoubleMatrix(matrixBytes, numBa);
} catch (IllegalArgumentException e) {
AbstractDao.log.error("EE id = " + ee.getId() + ": " + e.getMessage());
return null;
}
DoubleMatrix<BioAssay, BioAssay> result = new DenseDoubleMatrix<>(rawMatrix);
try {
result.setRowNames(bioAssays);
} catch (IllegalArgumentException e) {
AbstractDao.log.error("EE id = " + ee.getId() + ": " + e.getLocalizedMessage());
}
try {
result.setColumnNames(bioAssays);
} catch (IllegalArgumentException e) {
AbstractDao.log.error("EE id = " + ee.getId() + ": " + e.getLocalizedMessage());
}
return result;
}
use of ubic.basecode.dataStructure.matrix.DenseDoubleMatrix in project Gemma by PavlidisLab.
the class SampleCoexpressionMatrixServiceImpl method getMatrix.
private static DoubleMatrix<BioAssay, BioAssay> getMatrix(ExpressionDataDoubleMatrix matrix) {
DoubleMatrix<BioMaterial, CompositeSequence> transposeR = matrix.getMatrix().transpose();
DoubleMatrix<BioAssay, CompositeSequence> transpose = new DenseDoubleMatrix<>(transposeR.getRawMatrix());
transpose.setColumnNames(transposeR.getColNames());
for (int i = 0; i < transpose.rows(); i++) {
BioAssay s = transposeR.getRowName(i).getBioAssaysUsedIn().iterator().next();
transpose.setRowName(s, i);
}
return MatrixStats.correlationMatrix(transpose);
}
use of ubic.basecode.dataStructure.matrix.DenseDoubleMatrix in project Gemma by PavlidisLab.
the class ExpressionExperimentBatchCorrectionServiceImpl method doComBat.
private ExpressionDataDoubleMatrix doComBat(ExpressionExperiment ee, ExpressionDataDoubleMatrix originalDataMatrix, ObjectMatrix<BioMaterial, ExperimentalFactor, Object> design) {
ObjectMatrix<BioMaterial, String, Object> designU = this.convertFactorValuesToStrings(design);
DoubleMatrix<CompositeSequence, BioMaterial> matrix = originalDataMatrix.getMatrix();
designU = this.orderMatrix(matrix, designU);
ScaleType scale = originalDataMatrix.getQuantitationTypes().iterator().next().getScale();
boolean transformed = false;
if (!(scale.equals(ScaleType.LOG2) || scale.equals(ScaleType.LOG10) || scale.equals(ScaleType.LOGBASEUNKNOWN) || scale.equals(ScaleType.LN))) {
ExpressionExperimentBatchCorrectionServiceImpl.log.info(" *** COMBAT: LOG TRANSFORMING ***");
transformed = true;
MatrixStats.logTransform(matrix);
}
/*
* Process
*/
ComBat<CompositeSequence, BioMaterial> comBat = new ComBat<>(matrix, designU);
// false: NONPARAMETRIC
DoubleMatrix2D results = comBat.run(true);
// note these plots always reflect the parametric setup.
// TEMPORARY?
comBat.plot(ee.getId() + "." + FileTools.cleanForFileName(ee.getShortName()));
/*
* Postprocess. Results is a raw matrix/
*/
DoubleMatrix<CompositeSequence, BioMaterial> correctedDataMatrix = new DenseDoubleMatrix<>(results.toArray());
correctedDataMatrix.setRowNames(matrix.getRowNames());
correctedDataMatrix.setColumnNames(matrix.getColNames());
if (transformed) {
MatrixStats.unLogTransform(correctedDataMatrix);
}
ExpressionDataDoubleMatrix correctedExpressionDataMatrix = new ExpressionDataDoubleMatrix(originalDataMatrix, correctedDataMatrix);
assert correctedExpressionDataMatrix.getQuantitationTypes().size() == 1;
/*
* It is easier if we make a new quantitationtype.
*/
QuantitationType oldQt = correctedExpressionDataMatrix.getQuantitationTypes().iterator().next();
QuantitationType newQt = this.makeNewQuantitationType(oldQt);
correctedExpressionDataMatrix.getQuantitationTypes().clear();
correctedExpressionDataMatrix.getQuantitationTypes().add(newQt);
// Sanity check...
for (int i = 0; i < correctedExpressionDataMatrix.columns(); i++) {
assert correctedExpressionDataMatrix.getBioMaterialForColumn(i).equals(originalDataMatrix.getBioMaterialForColumn(i));
}
return correctedExpressionDataMatrix;
}
Aggregations