Search in sources :

Example 31 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class QuantitationTypeData method getBackgroundChannelB.

public ExpressionDataDoubleMatrix getBackgroundChannelB() {
    if (dat == null)
        dat = this.getQuantitationTypesNeeded();
    List<BioAssayDimension> dimensions = this.getBioAssayDimensions();
    List<QuantitationType> qTypes = new ArrayList<>();
    for (BioAssayDimension dimension : dimensions) {
        QuantitationType qType = dat.getBackgroundChannelB(dimension);
        if (qType != null)
            qTypes.add(qType);
    }
    if (qTypes.size() != 0) {
        return this.makeMatrix(qTypes);
    }
    return null;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) StandardQuantitationType(ubic.gemma.model.common.quantitationtype.StandardQuantitationType)

Example 32 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class QuantitationTypeData method getProcessedDataVectors.

/**
 * @return Collection of <em>ProcessedExpressionDataVector</em>s.
 */
private Collection<ProcessedExpressionDataVector> getProcessedDataVectors() {
    if (this.processedDataVectors != null) {
        return this.processedDataVectors;
    }
    Collection<ProcessedExpressionDataVector> result = new HashSet<>();
    List<BioAssayDimension> dimensions = this.getBioAssayDimensions();
    List<QuantitationType> qtypes = this.getPreferredQTypes();
    for (DesignElementDataVector vector : vectors) {
        if (vector instanceof ProcessedExpressionDataVector && dimensions.contains(vector.getBioAssayDimension()) && qtypes.contains(vector.getQuantitationType()))
            result.add((ProcessedExpressionDataVector) vector);
    }
    return result;
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) QuantitationType(ubic.gemma.model.common.quantitationtype.QuantitationType) StandardQuantitationType(ubic.gemma.model.common.quantitationtype.StandardQuantitationType)

Example 33 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class AffyPowerToolsProbesetSummarize method processData.

/**
 * For either 3' or Exon arrays.
 *
 * @param ee                  ee
 * @param aptOutputFileToRead file
 * @param targetPlatform      deal with data from this platform (call multiple times if there is more than one platform)
 * @return raw data vectors
 * @throws IOException           io problem
 * @throws FileNotFoundException file not found
 */
public Collection<RawExpressionDataVector> processData(ExpressionExperiment ee, String aptOutputFileToRead, ArrayDesign targetPlatform) throws IOException {
    AffyPowerToolsProbesetSummarize.log.info("Parsing " + aptOutputFileToRead);
    try (InputStream is = new FileInputStream(aptOutputFileToRead)) {
        DoubleMatrix<String, String> matrix = this.parse(is);
        if (matrix.rows() == 0) {
            throw new IllegalStateException("Matrix from APT had no rows");
        }
        if (matrix.columns() == 0) {
            throw new IllegalStateException("Matrix from APT had no columns");
        }
        Collection<BioAssay> allBioAssays = ee.getBioAssays();
        Collection<BioAssay> bioAssaysToUse = new HashSet<>();
        for (BioAssay bioAssay : allBioAssays) {
            if (bioAssay.getArrayDesignUsed().equals(targetPlatform)) {
                bioAssaysToUse.add(bioAssay);
            }
        }
        if (allBioAssays.size() > bioAssaysToUse.size()) {
            AffyPowerToolsProbesetSummarize.log.info("Using " + bioAssaysToUse.size() + "/" + allBioAssays.size() + " bioassays (those on " + targetPlatform.getShortName() + ")");
        }
        if (matrix.columns() < bioAssaysToUse.size()) {
            // having > is okay, there can be extra.
            throw new IllegalStateException("Matrix from APT had the wrong number of colummns: expected " + bioAssaysToUse.size() + ", got " + matrix.columns());
        }
        AffyPowerToolsProbesetSummarize.log.info("Read " + matrix.rows() + " x " + matrix.columns() + ", matching with " + bioAssaysToUse.size() + " samples on " + targetPlatform);
        BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
        bad.setName("For " + ee.getShortName() + " on " + targetPlatform);
        bad.setDescription("Generated from output of apt-probeset-summarize");
        /*
             * Add them ...
             */
        Map<String, BioAssay> bmap = new HashMap<>();
        for (BioAssay bioAssay : bioAssaysToUse) {
            assert bioAssay.getArrayDesignUsed().equals(targetPlatform);
            if (bmap.containsKey(bioAssay.getAccession().getAccession()) || bmap.containsKey(bioAssay.getName())) {
                throw new IllegalStateException("Duplicate");
            }
            bmap.put(bioAssay.getAccession().getAccession(), bioAssay);
            bmap.put(bioAssay.getName(), bioAssay);
        }
        if (AffyPowerToolsProbesetSummarize.log.isDebugEnabled())
            AffyPowerToolsProbesetSummarize.log.debug("Will match result data file columns to bioassays referred to by any of the following strings:\n" + StringUtils.join(bmap.keySet(), "\n"));
        int found = 0;
        List<String> columnsToKeep = new ArrayList<>();
        for (int i = 0; i < matrix.columns(); i++) {
            String columnName = matrix.getColName(i);
            String sampleName = columnName.replaceAll(".(CEL|cel)$", "");
            /*
                 * Look for patterns like GSM476194_SK_09-BALBcJ_622.CEL
                 */
            BioAssay assay = null;
            if (sampleName.matches("^GSM[0-9]+_.+")) {
                String geoAcc = sampleName.split("_")[0];
                AffyPowerToolsProbesetSummarize.log.info("Found column for " + geoAcc);
                if (bmap.containsKey(geoAcc)) {
                    assay = bmap.get(geoAcc);
                } else {
                    AffyPowerToolsProbesetSummarize.log.warn("No bioassay for " + geoAcc);
                }
            } else {
                /*
                     * Sometimes column names are like Aud_19L.CEL or
                     */
                assay = bmap.get(sampleName);
            }
            if (assay == null) {
                /*
                     * This is okay, if we have extras
                     */
                if (matrix.columns() == bioAssaysToUse.size()) {
                    throw new IllegalStateException("No bioassay could be matched to CEL file identified by " + sampleName);
                }
                AffyPowerToolsProbesetSummarize.log.warn("No bioassay for " + sampleName);
                continue;
            }
            AffyPowerToolsProbesetSummarize.log.info("Matching CEL sample " + sampleName + " to bioassay " + assay + " [" + assay.getAccession().getAccession() + "]");
            columnsToKeep.add(columnName);
            assert assay.getArrayDesignUsed().equals(targetPlatform);
            bad.getBioAssays().add(assay);
            found++;
        }
        if (found != bioAssaysToUse.size()) {
            throw new IllegalStateException("Failed to find a data column for every bioassay on the given platform " + targetPlatform);
        }
        if (columnsToKeep.size() < matrix.columns()) {
            matrix = matrix.subsetColumns(columnsToKeep);
        }
        if (quantitationType == null) {
            quantitationType = AffyPowerToolsProbesetSummarize.makeAffyQuantitationType();
        }
        return this.convertDesignElementDataVectors(ee, bad, targetPlatform, matrix);
    }
}
Also used : BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 34 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionDataIntegerMatrix method createMatrix.

/**
 * Fill in the data
 *
 * @return DoubleMatrixNamed
 */
private IntegerMatrix<CompositeSequence, Integer> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
    int numRows = this.rowDesignElementMapByInteger.keySet().size();
    IntegerMatrix<CompositeSequence, Integer> mat = new IntegerMatrix<>(numRows, maxSize);
    for (int j = 0; j < mat.columns(); j++) {
        mat.addColumnName(j);
    }
    // initialize the matrix to 0
    for (int i = 0; i < mat.rows(); i++) {
        for (int j = 0; j < mat.columns(); j++) {
            mat.set(i, j, 0);
        }
    }
    ByteArrayConverter bac = new ByteArrayConverter();
    Map<Integer, CompositeSequence> rowNames = new TreeMap<>();
    for (DesignElementDataVector vector : vectors) {
        CompositeSequence designElement = vector.getDesignElement();
        assert designElement != null : "No design element for " + vector;
        Integer rowIndex = this.rowElementMap.get(designElement);
        assert rowIndex != null;
        rowNames.put(rowIndex, designElement);
        byte[] bytes = vector.getData();
        int[] vals = bac.byteArrayToInts(bytes);
        BioAssayDimension dimension = vector.getBioAssayDimension();
        Collection<BioAssay> bioAssays = dimension.getBioAssays();
        assert bioAssays.size() == vals.length : "Expected " + vals.length + " got " + bioAssays.size();
        Iterator<BioAssay> it = bioAssays.iterator();
        this.setMatBioAssayValues(mat, rowIndex, ArrayUtils.toObject(vals), bioAssays, it);
    }
    for (int i = 0; i < mat.rows(); i++) {
        mat.addRowName(rowNames.get(i));
    }
    ExpressionDataIntegerMatrix.log.debug("Created a " + mat.rows() + " x " + mat.columns() + " matrix");
    return mat;
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) IntegerMatrix(ubic.basecode.dataStructure.matrix.IntegerMatrix) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 35 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionDataStringMatrix method createMatrix.

private StringMatrix<Integer, Integer> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
    int numRows = this.rowDesignElementMapByInteger.keySet().size();
    StringMatrix<Integer, Integer> mat = new StringMatrix<>(numRows, maxSize);
    for (int j = 0; j < mat.columns(); j++) {
        mat.addColumnName(j);
    }
    // initialize the matrix to "";
    for (int i = 0; i < mat.rows(); i++) {
        for (int j = 0; j < mat.columns(); j++) {
            mat.set(i, j, "");
        }
    }
    ByteArrayConverter bac = new ByteArrayConverter();
    for (DesignElementDataVector vector : vectors) {
        CompositeSequence designElement = vector.getDesignElement();
        assert designElement != null : "No designelement for " + vector;
        Integer rowIndex = this.rowElementMap.get(designElement);
        assert rowIndex != null;
        mat.addRowName(rowIndex);
        byte[] bytes = vector.getData();
        String[] vals = bac.byteArrayToStrings(bytes);
        BioAssayDimension dimension = vector.getBioAssayDimension();
        Collection<BioAssay> bioAssays = dimension.getBioAssays();
        assert bioAssays.size() == vals.length : "Expected " + vals.length + " got " + bioAssays.size();
        Iterator<BioAssay> it = bioAssays.iterator();
        for (int j = 0; j < bioAssays.size(); j++) {
            BioAssay bioAssay = it.next();
            Integer column = this.columnAssayMap.get(bioAssay);
            assert column != null;
            mat.setByKeys(rowIndex, column, vals[j]);
        }
    }
    ExpressionDataStringMatrix.log.debug("Created a " + mat.rows() + " x " + mat.columns() + " matrix");
    return mat;
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) StringMatrix(ubic.basecode.dataStructure.matrix.StringMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)59 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)29 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)20 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)15 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)15 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)15 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)10 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)9 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)9 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 HashSet (java.util.HashSet)4 Test (org.junit.Test)4 Transactional (org.springframework.transaction.annotation.Transactional)4 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)4 StopWatch (org.apache.commons.lang3.time.StopWatch)3 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)3 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2