Search in sources :

Example 26 with ProcessedExpressionDataVector

use of ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector in project Gemma by PavlidisLab.

the class LinkAnalysisCli method doWork.

@Override
protected Exception doWork(String[] args) {
    Exception err = this.processCommandLine(args);
    if (err != null) {
        return err;
    }
    if (initializeFromOldData) {
        AbstractCLI.log.info("Initializing links from old data for " + this.taxon);
        LinkAnalysisPersister s = this.getBean(LinkAnalysisPersister.class);
        s.initializeLinksFromOldData(this.taxon);
        return null;
    } else if (updateNodeDegree) {
        // we waste some time here getting the experiments.
        this.loadTaxon();
        this.getBean(CoexpressionService.class).updateNodeDegrees(this.taxon);
        return null;
    }
    this.linkAnalysisService = this.getBean(LinkAnalysisService.class);
    if (this.dataFileName != null) {
        /*
             * Read vectors from file. Could provide as a matrix, but it's easier to provide vectors (less mess in later
             * code)
             */
        ArrayDesignService arrayDesignService = this.getBean(ArrayDesignService.class);
        ArrayDesign arrayDesign = arrayDesignService.findByShortName(this.linkAnalysisConfig.getArrayName());
        if (arrayDesign == null) {
            return new IllegalArgumentException("No such array design " + this.linkAnalysisConfig.getArrayName());
        }
        this.loadTaxon();
        arrayDesign = arrayDesignService.thawLite(arrayDesign);
        Collection<ProcessedExpressionDataVector> dataVectors = new HashSet<>();
        Map<String, CompositeSequence> csMap = new HashMap<>();
        for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
            csMap.put(cs.getName(), cs);
        }
        QuantitationType qtype = this.makeQuantitationType();
        SimpleExpressionDataLoaderService simpleExpressionDataLoaderService = this.getBean(SimpleExpressionDataLoaderService.class);
        ByteArrayConverter bArrayConverter = new ByteArrayConverter();
        try (InputStream data = new FileInputStream(new File(this.dataFileName))) {
            DoubleMatrix<String, String> matrix = simpleExpressionDataLoaderService.parse(data);
            BioAssayDimension bad = this.makeBioAssayDimension(arrayDesign, matrix);
            for (int i = 0; i < matrix.rows(); i++) {
                byte[] bData = bArrayConverter.doubleArrayToBytes(matrix.getRow(i));
                ProcessedExpressionDataVector vector = ProcessedExpressionDataVector.Factory.newInstance();
                vector.setData(bData);
                CompositeSequence cs = csMap.get(matrix.getRowName(i));
                if (cs == null) {
                    continue;
                }
                vector.setDesignElement(cs);
                vector.setBioAssayDimension(bad);
                vector.setQuantitationType(qtype);
                dataVectors.add(vector);
            }
            AbstractCLI.log.info("Read " + dataVectors.size() + " data vectors");
        } catch (Exception e) {
            return e;
        }
        this.linkAnalysisService.processVectors(this.taxon, dataVectors, filterConfig, linkAnalysisConfig);
    } else {
        /*
             * Do in decreasing order of size, to help capture more links earlier - reduces fragmentation.
             */
        List<BioAssaySet> sees = new ArrayList<>(expressionExperiments);
        if (expressionExperiments.size() > 1) {
            AbstractCLI.log.info("Sorting data sets by number of samples, doing large data sets first.");
            Collection<ExpressionExperimentValueObject> vos = eeService.loadValueObjects(EntityUtils.getIds(expressionExperiments), true);
            final Map<Long, ExpressionExperimentValueObject> idMap = EntityUtils.getIdMap(vos);
            Collections.sort(sees, new Comparator<BioAssaySet>() {

                @Override
                public int compare(BioAssaySet o1, BioAssaySet o2) {
                    ExpressionExperimentValueObject e1 = idMap.get(o1.getId());
                    ExpressionExperimentValueObject e2 = idMap.get(o2.getId());
                    assert e1 != null : "No valueobject: " + e2;
                    assert e2 != null : "No valueobject: " + e1;
                    return -e1.getBioMaterialCount().compareTo(e2.getBioMaterialCount());
                }
            });
        }
        for (BioAssaySet ee : sees) {
            if (ee instanceof ExpressionExperiment) {
                this.processExperiment((ExpressionExperiment) ee);
            } else {
                throw new UnsupportedOperationException("Can't handle non-EE BioAssaySets yet");
            }
        }
        this.summarizeProcessing();
    }
    return null;
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) BioAssaySet(ubic.gemma.model.expression.experiment.BioAssaySet) ExpressionExperimentValueObject(ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) SimpleExpressionDataLoaderService(ubic.gemma.core.loader.expression.simple.SimpleExpressionDataLoaderService) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) FileInputStream(java.io.FileInputStream) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) LinkAnalysisPersister(ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisPersister) LinkAnalysisService(ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisService) File(java.io.File) ArrayDesignService(ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService)

Aggregations

ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)26 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)10 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)9 Test (org.junit.Test)8 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)8 GeoDomainObjectGeneratorLocal (ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal)8 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)7 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)6 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)6 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)5 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)5 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)5 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)5 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)5 HashSet (java.util.HashSet)4 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)4 File (java.io.File)3 StandardQuantitationType (ubic.gemma.model.common.quantitationtype.StandardQuantitationType)3 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)3 DoubleArrayList (cern.colt.list.DoubleArrayList)2