use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class QuantitationTypeData method getBackgroundChannelB.
public ExpressionDataDoubleMatrix getBackgroundChannelB() {
if (dat == null)
dat = this.getQuantitationTypesNeeded();
List<BioAssayDimension> dimensions = this.getBioAssayDimensions();
List<QuantitationType> qTypes = new ArrayList<>();
for (BioAssayDimension dimension : dimensions) {
QuantitationType qType = dat.getBackgroundChannelB(dimension);
if (qType != null)
qTypes.add(qType);
}
if (qTypes.size() != 0) {
return this.makeMatrix(qTypes);
}
return null;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class QuantitationTypeData method getProcessedDataVectors.
/**
* @return Collection of <em>ProcessedExpressionDataVector</em>s.
*/
private Collection<ProcessedExpressionDataVector> getProcessedDataVectors() {
if (this.processedDataVectors != null) {
return this.processedDataVectors;
}
Collection<ProcessedExpressionDataVector> result = new HashSet<>();
List<BioAssayDimension> dimensions = this.getBioAssayDimensions();
List<QuantitationType> qtypes = this.getPreferredQTypes();
for (DesignElementDataVector vector : vectors) {
if (vector instanceof ProcessedExpressionDataVector && dimensions.contains(vector.getBioAssayDimension()) && qtypes.contains(vector.getQuantitationType()))
result.add((ProcessedExpressionDataVector) vector);
}
return result;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class AffyPowerToolsProbesetSummarize method processData.
/**
* For either 3' or Exon arrays.
*
* @param ee ee
* @param aptOutputFileToRead file
* @param targetPlatform deal with data from this platform (call multiple times if there is more than one platform)
* @return raw data vectors
* @throws IOException io problem
* @throws FileNotFoundException file not found
*/
public Collection<RawExpressionDataVector> processData(ExpressionExperiment ee, String aptOutputFileToRead, ArrayDesign targetPlatform) throws IOException {
AffyPowerToolsProbesetSummarize.log.info("Parsing " + aptOutputFileToRead);
try (InputStream is = new FileInputStream(aptOutputFileToRead)) {
DoubleMatrix<String, String> matrix = this.parse(is);
if (matrix.rows() == 0) {
throw new IllegalStateException("Matrix from APT had no rows");
}
if (matrix.columns() == 0) {
throw new IllegalStateException("Matrix from APT had no columns");
}
Collection<BioAssay> allBioAssays = ee.getBioAssays();
Collection<BioAssay> bioAssaysToUse = new HashSet<>();
for (BioAssay bioAssay : allBioAssays) {
if (bioAssay.getArrayDesignUsed().equals(targetPlatform)) {
bioAssaysToUse.add(bioAssay);
}
}
if (allBioAssays.size() > bioAssaysToUse.size()) {
AffyPowerToolsProbesetSummarize.log.info("Using " + bioAssaysToUse.size() + "/" + allBioAssays.size() + " bioassays (those on " + targetPlatform.getShortName() + ")");
}
if (matrix.columns() < bioAssaysToUse.size()) {
// having > is okay, there can be extra.
throw new IllegalStateException("Matrix from APT had the wrong number of colummns: expected " + bioAssaysToUse.size() + ", got " + matrix.columns());
}
AffyPowerToolsProbesetSummarize.log.info("Read " + matrix.rows() + " x " + matrix.columns() + ", matching with " + bioAssaysToUse.size() + " samples on " + targetPlatform);
BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
bad.setName("For " + ee.getShortName() + " on " + targetPlatform);
bad.setDescription("Generated from output of apt-probeset-summarize");
/*
* Add them ...
*/
Map<String, BioAssay> bmap = new HashMap<>();
for (BioAssay bioAssay : bioAssaysToUse) {
assert bioAssay.getArrayDesignUsed().equals(targetPlatform);
if (bmap.containsKey(bioAssay.getAccession().getAccession()) || bmap.containsKey(bioAssay.getName())) {
throw new IllegalStateException("Duplicate");
}
bmap.put(bioAssay.getAccession().getAccession(), bioAssay);
bmap.put(bioAssay.getName(), bioAssay);
}
if (AffyPowerToolsProbesetSummarize.log.isDebugEnabled())
AffyPowerToolsProbesetSummarize.log.debug("Will match result data file columns to bioassays referred to by any of the following strings:\n" + StringUtils.join(bmap.keySet(), "\n"));
int found = 0;
List<String> columnsToKeep = new ArrayList<>();
for (int i = 0; i < matrix.columns(); i++) {
String columnName = matrix.getColName(i);
String sampleName = columnName.replaceAll(".(CEL|cel)$", "");
/*
* Look for patterns like GSM476194_SK_09-BALBcJ_622.CEL
*/
BioAssay assay = null;
if (sampleName.matches("^GSM[0-9]+_.+")) {
String geoAcc = sampleName.split("_")[0];
AffyPowerToolsProbesetSummarize.log.info("Found column for " + geoAcc);
if (bmap.containsKey(geoAcc)) {
assay = bmap.get(geoAcc);
} else {
AffyPowerToolsProbesetSummarize.log.warn("No bioassay for " + geoAcc);
}
} else {
/*
* Sometimes column names are like Aud_19L.CEL or
*/
assay = bmap.get(sampleName);
}
if (assay == null) {
/*
* This is okay, if we have extras
*/
if (matrix.columns() == bioAssaysToUse.size()) {
throw new IllegalStateException("No bioassay could be matched to CEL file identified by " + sampleName);
}
AffyPowerToolsProbesetSummarize.log.warn("No bioassay for " + sampleName);
continue;
}
AffyPowerToolsProbesetSummarize.log.info("Matching CEL sample " + sampleName + " to bioassay " + assay + " [" + assay.getAccession().getAccession() + "]");
columnsToKeep.add(columnName);
assert assay.getArrayDesignUsed().equals(targetPlatform);
bad.getBioAssays().add(assay);
found++;
}
if (found != bioAssaysToUse.size()) {
throw new IllegalStateException("Failed to find a data column for every bioassay on the given platform " + targetPlatform);
}
if (columnsToKeep.size() < matrix.columns()) {
matrix = matrix.subsetColumns(columnsToKeep);
}
if (quantitationType == null) {
quantitationType = AffyPowerToolsProbesetSummarize.makeAffyQuantitationType();
}
return this.convertDesignElementDataVectors(ee, bad, targetPlatform, matrix);
}
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class ExpressionDataIntegerMatrix method createMatrix.
/**
* Fill in the data
*
* @return DoubleMatrixNamed
*/
private IntegerMatrix<CompositeSequence, Integer> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
int numRows = this.rowDesignElementMapByInteger.keySet().size();
IntegerMatrix<CompositeSequence, Integer> mat = new IntegerMatrix<>(numRows, maxSize);
for (int j = 0; j < mat.columns(); j++) {
mat.addColumnName(j);
}
// initialize the matrix to 0
for (int i = 0; i < mat.rows(); i++) {
for (int j = 0; j < mat.columns(); j++) {
mat.set(i, j, 0);
}
}
ByteArrayConverter bac = new ByteArrayConverter();
Map<Integer, CompositeSequence> rowNames = new TreeMap<>();
for (DesignElementDataVector vector : vectors) {
CompositeSequence designElement = vector.getDesignElement();
assert designElement != null : "No design element for " + vector;
Integer rowIndex = this.rowElementMap.get(designElement);
assert rowIndex != null;
rowNames.put(rowIndex, designElement);
byte[] bytes = vector.getData();
int[] vals = bac.byteArrayToInts(bytes);
BioAssayDimension dimension = vector.getBioAssayDimension();
Collection<BioAssay> bioAssays = dimension.getBioAssays();
assert bioAssays.size() == vals.length : "Expected " + vals.length + " got " + bioAssays.size();
Iterator<BioAssay> it = bioAssays.iterator();
this.setMatBioAssayValues(mat, rowIndex, ArrayUtils.toObject(vals), bioAssays, it);
}
for (int i = 0; i < mat.rows(); i++) {
mat.addRowName(rowNames.get(i));
}
ExpressionDataIntegerMatrix.log.debug("Created a " + mat.rows() + " x " + mat.columns() + " matrix");
return mat;
}
use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.
the class ExpressionDataStringMatrix method createMatrix.
private StringMatrix<Integer, Integer> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
int numRows = this.rowDesignElementMapByInteger.keySet().size();
StringMatrix<Integer, Integer> mat = new StringMatrix<>(numRows, maxSize);
for (int j = 0; j < mat.columns(); j++) {
mat.addColumnName(j);
}
// initialize the matrix to "";
for (int i = 0; i < mat.rows(); i++) {
for (int j = 0; j < mat.columns(); j++) {
mat.set(i, j, "");
}
}
ByteArrayConverter bac = new ByteArrayConverter();
for (DesignElementDataVector vector : vectors) {
CompositeSequence designElement = vector.getDesignElement();
assert designElement != null : "No designelement for " + vector;
Integer rowIndex = this.rowElementMap.get(designElement);
assert rowIndex != null;
mat.addRowName(rowIndex);
byte[] bytes = vector.getData();
String[] vals = bac.byteArrayToStrings(bytes);
BioAssayDimension dimension = vector.getBioAssayDimension();
Collection<BioAssay> bioAssays = dimension.getBioAssays();
assert bioAssays.size() == vals.length : "Expected " + vals.length + " got " + bioAssays.size();
Iterator<BioAssay> it = bioAssays.iterator();
for (int j = 0; j < bioAssays.size(); j++) {
BioAssay bioAssay = it.next();
Integer column = this.columnAssayMap.get(bioAssay);
assert column != null;
mat.setByKeys(rowIndex, column, vals[j]);
}
}
ExpressionDataStringMatrix.log.debug("Created a " + mat.rows() + " x " + mat.columns() + " matrix");
return mat;
}
Aggregations