Search in sources :

Example 56 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionDataDoubleMatrix method createMatrix.

/**
 * Fill in the data
 *
 * @return DoubleMatrixNamed
 */
private DoubleMatrix<CompositeSequence, BioMaterial> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
    int numRows = this.rowDesignElementMapByInteger.keySet().size();
    DoubleMatrix<CompositeSequence, BioMaterial> mat = new DenseDoubleMatrix<>(numRows, maxSize);
    for (int j = 0; j < mat.columns(); j++) {
        mat.addColumnName(this.getBioMaterialForColumn(j));
    }
    // initialize the matrix to -Infinity; this marks values that are not yet initialized.
    for (int i = 0; i < mat.rows(); i++) {
        for (int j = 0; j < mat.columns(); j++) {
            mat.set(i, j, Double.NEGATIVE_INFINITY);
        }
    }
    ByteArrayConverter bac = new ByteArrayConverter();
    Map<Integer, CompositeSequence> rowNames = new TreeMap<>();
    for (DesignElementDataVector vector : vectors) {
        BioAssayDimension dimension = vector.getBioAssayDimension();
        byte[] bytes = vector.getData();
        CompositeSequence designElement = vector.getDesignElement();
        assert designElement != null : "No design element for " + vector;
        Integer rowIndex = this.rowElementMap.get(designElement);
        assert rowIndex != null;
        rowNames.put(rowIndex, designElement);
        double[] vals = bac.byteArrayToDoubles(bytes);
        Collection<BioAssay> bioAssays = dimension.getBioAssays();
        if (bioAssays.size() != vals.length)
            throw new IllegalStateException("Mismatch: " + vals.length + " values in vector ( " + bytes.length + " bytes) for " + designElement + " got " + bioAssays.size() + " bioassays in the bioAssayDimension");
        Iterator<BioAssay> it = bioAssays.iterator();
        this.setMatBioAssayValues(mat, rowIndex, ArrayUtils.toObject(vals), bioAssays, it);
    }
    /*
         * Note: these row names aren't that important unless we use the bare matrix.
         */
    for (int i = 0; i < mat.rows(); i++) {
        mat.addRowName(rowNames.get(i));
    }
    assert mat.getRowNames().size() == mat.rows();
    // fill in remaining missing values.
    for (int i = 0; i < mat.rows(); i++) {
        for (int j = 0; j < mat.columns(); j++) {
            if (mat.get(i, j) == Double.NEGATIVE_INFINITY) {
                // log.debug( "Missing value at " + i + " " + j );
                mat.set(i, j, Double.NaN);
            }
        }
    }
    ExpressionDataDoubleMatrix.log.debug("Created a " + mat.rows() + " x " + mat.columns() + " matrix");
    return mat;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 57 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class LinkAnalysisCli method doWork.

@Override
protected Exception doWork(String[] args) {
    Exception err = this.processCommandLine(args);
    if (err != null) {
        return err;
    }
    if (initializeFromOldData) {
        AbstractCLI.log.info("Initializing links from old data for " + this.taxon);
        LinkAnalysisPersister s = this.getBean(LinkAnalysisPersister.class);
        s.initializeLinksFromOldData(this.taxon);
        return null;
    } else if (updateNodeDegree) {
        // we waste some time here getting the experiments.
        this.loadTaxon();
        this.getBean(CoexpressionService.class).updateNodeDegrees(this.taxon);
        return null;
    }
    this.linkAnalysisService = this.getBean(LinkAnalysisService.class);
    if (this.dataFileName != null) {
        /*
             * Read vectors from file. Could provide as a matrix, but it's easier to provide vectors (less mess in later
             * code)
             */
        ArrayDesignService arrayDesignService = this.getBean(ArrayDesignService.class);
        ArrayDesign arrayDesign = arrayDesignService.findByShortName(this.linkAnalysisConfig.getArrayName());
        if (arrayDesign == null) {
            return new IllegalArgumentException("No such array design " + this.linkAnalysisConfig.getArrayName());
        }
        this.loadTaxon();
        arrayDesign = arrayDesignService.thawLite(arrayDesign);
        Collection<ProcessedExpressionDataVector> dataVectors = new HashSet<>();
        Map<String, CompositeSequence> csMap = new HashMap<>();
        for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
            csMap.put(cs.getName(), cs);
        }
        QuantitationType qtype = this.makeQuantitationType();
        SimpleExpressionDataLoaderService simpleExpressionDataLoaderService = this.getBean(SimpleExpressionDataLoaderService.class);
        ByteArrayConverter bArrayConverter = new ByteArrayConverter();
        try (InputStream data = new FileInputStream(new File(this.dataFileName))) {
            DoubleMatrix<String, String> matrix = simpleExpressionDataLoaderService.parse(data);
            BioAssayDimension bad = this.makeBioAssayDimension(arrayDesign, matrix);
            for (int i = 0; i < matrix.rows(); i++) {
                byte[] bData = bArrayConverter.doubleArrayToBytes(matrix.getRow(i));
                ProcessedExpressionDataVector vector = ProcessedExpressionDataVector.Factory.newInstance();
                vector.setData(bData);
                CompositeSequence cs = csMap.get(matrix.getRowName(i));
                if (cs == null) {
                    continue;
                }
                vector.setDesignElement(cs);
                vector.setBioAssayDimension(bad);
                vector.setQuantitationType(qtype);
                dataVectors.add(vector);
            }
            AbstractCLI.log.info("Read " + dataVectors.size() + " data vectors");
        } catch (Exception e) {
            return e;
        }
        this.linkAnalysisService.processVectors(this.taxon, dataVectors, filterConfig, linkAnalysisConfig);
    } else {
        /*
             * Do in decreasing order of size, to help capture more links earlier - reduces fragmentation.
             */
        List<BioAssaySet> sees = new ArrayList<>(expressionExperiments);
        if (expressionExperiments.size() > 1) {
            AbstractCLI.log.info("Sorting data sets by number of samples, doing large data sets first.");
            Collection<ExpressionExperimentValueObject> vos = eeService.loadValueObjects(EntityUtils.getIds(expressionExperiments), true);
            final Map<Long, ExpressionExperimentValueObject> idMap = EntityUtils.getIdMap(vos);
            Collections.sort(sees, new Comparator<BioAssaySet>() {

                @Override
                public int compare(BioAssaySet o1, BioAssaySet o2) {
                    ExpressionExperimentValueObject e1 = idMap.get(o1.getId());
                    ExpressionExperimentValueObject e2 = idMap.get(o2.getId());
                    assert e1 != null : "No valueobject: " + e2;
                    assert e2 != null : "No valueobject: " + e1;
                    return -e1.getBioMaterialCount().compareTo(e2.getBioMaterialCount());
                }
            });
        }
        for (BioAssaySet ee : sees) {
            if (ee instanceof ExpressionExperiment) {
                this.processExperiment((ExpressionExperiment) ee);
            } else {
                throw new UnsupportedOperationException("Can't handle non-EE BioAssaySets yet");
            }
        }
        this.summarizeProcessing();
    }
    return null;
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) BioAssaySet(ubic.gemma.model.expression.experiment.BioAssaySet) ExpressionExperimentValueObject(ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) SimpleExpressionDataLoaderService(ubic.gemma.core.loader.expression.simple.SimpleExpressionDataLoaderService) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) FileInputStream(java.io.FileInputStream) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) LinkAnalysisPersister(ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisPersister) LinkAnalysisService(ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisService) File(java.io.File) ArrayDesignService(ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService)

Example 58 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class LinkAnalysisCli method makeBioAssayDimension.

private BioAssayDimension makeBioAssayDimension(ArrayDesign arrayDesign, DoubleMatrix<String, String> matrix) {
    BioAssayDimension bad = BioAssayDimension.Factory.newInstance();
    bad.setName("For " + this.dataFileName);
    bad.setDescription("Generated from flat file");
    for (int i = 0; i < matrix.columns(); i++) {
        Object columnName = matrix.getColName(i);
        BioMaterial bioMaterial = BioMaterial.Factory.newInstance();
        bioMaterial.setName(columnName.toString());
        bioMaterial.setSourceTaxon(taxon);
        BioAssay assay = BioAssay.Factory.newInstance();
        assay.setName(columnName.toString());
        assay.setArrayDesignUsed(arrayDesign);
        assay.setSampleUsed(bioMaterial);
        assay.setIsOutlier(false);
        assay.setSequencePairedReads(false);
        bad.getBioAssays().add(assay);
    }
    return bad;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) ExpressionExperimentValueObject(ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 59 with BioAssayDimension

use of ubic.gemma.model.expression.bioAssayData.BioAssayDimension in project Gemma by PavlidisLab.

the class ExpressionDataBooleanMatrix method createMatrix.

/**
 * Fill in the data
 */
private ObjectMatrixImpl<CompositeSequence, Integer, Boolean> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
    ObjectMatrixImpl<CompositeSequence, Integer, Boolean> mat = new ObjectMatrixImpl<>(vectors.size(), maxSize);
    // initialize the matrix to false
    for (int i = 0; i < mat.rows(); i++) {
        for (int j = 0; j < mat.columns(); j++) {
            mat.set(i, j, Boolean.FALSE);
        }
    }
    for (int j = 0; j < mat.columns(); j++) {
        mat.addColumnName(j);
    }
    ByteArrayConverter bac = new ByteArrayConverter();
    Map<Integer, CompositeSequence> rowNames = new TreeMap<>();
    for (DesignElementDataVector vector : vectors) {
        BioAssayDimension dimension = vector.getBioAssayDimension();
        byte[] bytes = vector.getData();
        CompositeSequence designElement = vector.getDesignElement();
        Integer rowIndex = this.rowElementMap.get(designElement);
        assert rowIndex != null;
        rowNames.put(rowIndex, designElement);
        boolean[] vals = this.getVals(bac, vector, bytes);
        Collection<BioAssay> bioAssays = dimension.getBioAssays();
        if (bioAssays.size() != vals.length) {
            throw new IllegalStateException("Expected " + vals.length + " bioassays at design element " + designElement + ", got " + bioAssays.size());
        }
        Iterator<BioAssay> it = bioAssays.iterator();
        this.setMatBioAssayValues(mat, rowIndex, ArrayUtils.toObject(vals), bioAssays, it);
    }
    for (int i = 0; i < mat.rows(); i++) {
        mat.addRowName(rowNames.get(i));
    }
    assert mat.getRowNames().size() == mat.rows();
    return mat;
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) ObjectMatrixImpl(ubic.basecode.dataStructure.matrix.ObjectMatrixImpl) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)59 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)29 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)20 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)15 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)15 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)15 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)10 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)9 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)9 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)6 HashSet (java.util.HashSet)4 Test (org.junit.Test)4 Transactional (org.springframework.transaction.annotation.Transactional)4 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)4 StopWatch (org.apache.commons.lang3.time.StopWatch)3 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)3 ExpressionExperimentValueObject (ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2