use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class ExpressionDataDoubleMatrix method createMatrix.
/**
* Fill in the data
*
* @return DoubleMatrixNamed
*/
private DoubleMatrix<CompositeSequence, BioMaterial> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
int numRows = this.rowDesignElementMapByInteger.keySet().size();
DoubleMatrix<CompositeSequence, BioMaterial> mat = new DenseDoubleMatrix<>(numRows, maxSize);
for (int j = 0; j < mat.columns(); j++) {
mat.addColumnName(this.getBioMaterialForColumn(j));
}
// initialize the matrix to -Infinity; this marks values that are not yet initialized.
for (int i = 0; i < mat.rows(); i++) {
for (int j = 0; j < mat.columns(); j++) {
mat.set(i, j, Double.NEGATIVE_INFINITY);
}
}
ByteArrayConverter bac = new ByteArrayConverter();
Map<Integer, CompositeSequence> rowNames = new TreeMap<>();
for (DesignElementDataVector vector : vectors) {
BioAssayDimension dimension = vector.getBioAssayDimension();
byte[] bytes = vector.getData();
CompositeSequence designElement = vector.getDesignElement();
assert designElement != null : "No design element for " + vector;
Integer rowIndex = this.rowElementMap.get(designElement);
assert rowIndex != null;
rowNames.put(rowIndex, designElement);
double[] vals = bac.byteArrayToDoubles(bytes);
Collection<BioAssay> bioAssays = dimension.getBioAssays();
if (bioAssays.size() != vals.length)
throw new IllegalStateException("Mismatch: " + vals.length + " values in vector ( " + bytes.length + " bytes) for " + designElement + " got " + bioAssays.size() + " bioassays in the bioAssayDimension");
Iterator<BioAssay> it = bioAssays.iterator();
this.setMatBioAssayValues(mat, rowIndex, ArrayUtils.toObject(vals), bioAssays, it);
}
/*
* Note: these row names aren't that important unless we use the bare matrix.
*/
for (int i = 0; i < mat.rows(); i++) {
mat.addRowName(rowNames.get(i));
}
assert mat.getRowNames().size() == mat.rows();
// fill in remaining missing values.
for (int i = 0; i < mat.rows(); i++) {
for (int j = 0; j < mat.columns(); j++) {
if (mat.get(i, j) == Double.NEGATIVE_INFINITY) {
// log.debug( "Missing value at " + i + " " + j );
mat.set(i, j, Double.NaN);
}
}
}
ExpressionDataDoubleMatrix.log.debug("Created a " + mat.rows() + " x " + mat.columns() + " matrix");
return mat;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class LinkAnalysisCli method doWork.
@Override
protected Exception doWork(String[] args) {
Exception err = this.processCommandLine(args);
if (err != null) {
return err;
}
if (initializeFromOldData) {
AbstractCLI.log.info("Initializing links from old data for " + this.taxon);
LinkAnalysisPersister s = this.getBean(LinkAnalysisPersister.class);
s.initializeLinksFromOldData(this.taxon);
return null;
} else if (updateNodeDegree) {
// we waste some time here getting the experiments.
this.loadTaxon();
this.getBean(CoexpressionService.class).updateNodeDegrees(this.taxon);
return null;
}
this.linkAnalysisService = this.getBean(LinkAnalysisService.class);
if (this.dataFileName != null) {
/*
* Read vectors from file. Could provide as a matrix, but it's easier to provide vectors (less mess in later
* code)
*/
ArrayDesignService arrayDesignService = this.getBean(ArrayDesignService.class);
ArrayDesign arrayDesign = arrayDesignService.findByShortName(this.linkAnalysisConfig.getArrayName());
if (arrayDesign == null) {
return new IllegalArgumentException("No such array design " + this.linkAnalysisConfig.getArrayName());
}
this.loadTaxon();
arrayDesign = arrayDesignService.thawLite(arrayDesign);
Collection<ProcessedExpressionDataVector> dataVectors = new HashSet<>();
Map<String, CompositeSequence> csMap = new HashMap<>();
for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
csMap.put(cs.getName(), cs);
}
QuantitationType qtype = this.makeQuantitationType();
SimpleExpressionDataLoaderService simpleExpressionDataLoaderService = this.getBean(SimpleExpressionDataLoaderService.class);
ByteArrayConverter bArrayConverter = new ByteArrayConverter();
try (InputStream data = new FileInputStream(new File(this.dataFileName))) {
DoubleMatrix<String, String> matrix = simpleExpressionDataLoaderService.parse(data);
BioAssayDimension bad = this.makeBioAssayDimension(arrayDesign, matrix);
for (int i = 0; i < matrix.rows(); i++) {
byte[] bData = bArrayConverter.doubleArrayToBytes(matrix.getRow(i));
ProcessedExpressionDataVector vector = ProcessedExpressionDataVector.Factory.newInstance();
vector.setData(bData);
CompositeSequence cs = csMap.get(matrix.getRowName(i));
if (cs == null) {
continue;
}
vector.setDesignElement(cs);
vector.setBioAssayDimension(bad);
vector.setQuantitationType(qtype);
dataVectors.add(vector);
}
AbstractCLI.log.info("Read " + dataVectors.size() + " data vectors");
} catch (Exception e) {
return e;
}
this.linkAnalysisService.processVectors(this.taxon, dataVectors, filterConfig, linkAnalysisConfig);
} else {
/*
* Do in decreasing order of size, to help capture more links earlier - reduces fragmentation.
*/
List<BioAssaySet> sees = new ArrayList<>(expressionExperiments);
if (expressionExperiments.size() > 1) {
AbstractCLI.log.info("Sorting data sets by number of samples, doing large data sets first.");
Collection<ExpressionExperimentValueObject> vos = eeService.loadValueObjects(EntityUtils.getIds(expressionExperiments), true);
final Map<Long, ExpressionExperimentValueObject> idMap = EntityUtils.getIdMap(vos);
Collections.sort(sees, new Comparator<BioAssaySet>() {
@Override
public int compare(BioAssaySet o1, BioAssaySet o2) {
ExpressionExperimentValueObject e1 = idMap.get(o1.getId());
ExpressionExperimentValueObject e2 = idMap.get(o2.getId());
assert e1 != null : "No valueobject: " + e2;
assert e2 != null : "No valueobject: " + e1;
return -e1.getBioMaterialCount().compareTo(e2.getBioMaterialCount());
}
});
}
for (BioAssaySet ee : sees) {
if (ee instanceof ExpressionExperiment) {
this.processExperiment((ExpressionExperiment) ee);
} else {
throw new UnsupportedOperationException("Can't handle non-EE BioAssaySets yet");
}
}
this.summarizeProcessing();
}
return null;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class LinkAnalysisCli method makeBioAssayDimension.
private BioAssayDimension makeBioAssayDimension(ArrayDesign arrayDesign, DoubleMatrix<String, String> matrix) {
BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
bad.setName("For " + this.dataFileName);
bad.setDescription("Generated from flat file");
for (int i = 0; i < matrix.columns(); i++) {
Object columnName = matrix.getColName(i);
BioMaterial bioMaterial = BioMaterial.Factory.newInstance();
bioMaterial.setName(columnName.toString());
bioMaterial.setSourceTaxon(taxon);
BioAssay assay = BioAssay.Factory.newInstance();
assay.setName(columnName.toString());
assay.setArrayDesignUsed(arrayDesign);
assay.setSampleUsed(bioMaterial);
assay.setIsOutlier(false);
assay.setSequencePairedReads(false);
bad.getBioAssays().add(assay);
}
return bad;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class ExpressionDataBooleanMatrix method createMatrix.
/**
* Fill in the data
*/
private ObjectMatrixImpl<CompositeSequence, Integer, Boolean> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
ObjectMatrixImpl<CompositeSequence, Integer, Boolean> mat = new ObjectMatrixImpl<>(vectors.size(), maxSize);
// initialize the matrix to false
for (int i = 0; i < mat.rows(); i++) {
for (int j = 0; j < mat.columns(); j++) {
mat.set(i, j, Boolean.FALSE);
}
}
for (int j = 0; j < mat.columns(); j++) {
mat.addColumnName(j);
}
ByteArrayConverter bac = new ByteArrayConverter();
Map<Integer, CompositeSequence> rowNames = new TreeMap<>();
for (DesignElementDataVector vector : vectors) {
BioAssayDimension dimension = vector.getBioAssayDimension();
byte[] bytes = vector.getData();
CompositeSequence designElement = vector.getDesignElement();
Integer rowIndex = this.rowElementMap.get(designElement);
assert rowIndex != null;
rowNames.put(rowIndex, designElement);
boolean[] vals = this.getVals(bac, vector, bytes);
Collection<BioAssay> bioAssays = dimension.getBioAssays();
if (bioAssays.size() != vals.length) {
throw new IllegalStateException("Expected " + vals.length + " bioassays at design element " + designElement + ", got " + bioAssays.size());
}
Iterator<BioAssay> it = bioAssays.iterator();
this.setMatBioAssayValues(mat, rowIndex, ArrayUtils.toObject(vals), bioAssays, it);
}
for (int i = 0; i < mat.rows(); i++) {
mat.addRowName(rowNames.get(i));
}
assert mat.getRowNames().size() == mat.rows();
return mat;
}
Aggregations