Search in sources :

Example 26 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class LinkAnalysisCli method doWork.

@Override
protected Exception doWork(String[] args) {
    Exception err = this.processCommandLine(args);
    if (err != null) {
        return err;
    }
    if (initializeFromOldData) {
        AbstractCLI.log.info("Initializing links from old data for " + this.taxon);
        LinkAnalysisPersister s = this.getBean(LinkAnalysisPersister.class);
        s.initializeLinksFromOldData(this.taxon);
        return null;
    } else if (updateNodeDegree) {
        // we waste some time here getting the experiments.
        this.loadTaxon();
        this.getBean(CoexpressionService.class).updateNodeDegrees(this.taxon);
        return null;
    }
    this.linkAnalysisService = this.getBean(LinkAnalysisService.class);
    if (this.dataFileName != null) {
        /*
             * Read vectors from file. Could provide as a matrix, but it's easier to provide vectors (less mess in later
             * code)
             */
        ArrayDesignService arrayDesignService = this.getBean(ArrayDesignService.class);
        ArrayDesign arrayDesign = arrayDesignService.findByShortName(this.linkAnalysisConfig.getArrayName());
        if (arrayDesign == null) {
            return new IllegalArgumentException("No such array design " + this.linkAnalysisConfig.getArrayName());
        }
        this.loadTaxon();
        arrayDesign = arrayDesignService.thawLite(arrayDesign);
        Collection<ProcessedExpressionDataVector> dataVectors = new HashSet<>();
        Map<String, CompositeSequence> csMap = new HashMap<>();
        for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
            csMap.put(cs.getName(), cs);
        }
        QuantitationType qtype = this.makeQuantitationType();
        SimpleExpressionDataLoaderService simpleExpressionDataLoaderService = this.getBean(SimpleExpressionDataLoaderService.class);
        ByteArrayConverter bArrayConverter = new ByteArrayConverter();
        try (InputStream data = new FileInputStream(new File(this.dataFileName))) {
            DoubleMatrix<String, String> matrix = simpleExpressionDataLoaderService.parse(data);
            BioAssayDimension bad = this.makeBioAssayDimension(arrayDesign, matrix);
            for (int i = 0; i < matrix.rows(); i++) {
                byte[] bData = bArrayConverter.doubleArrayToBytes(matrix.getRow(i));
                ProcessedExpressionDataVector vector = ProcessedExpressionDataVector.Factory.newInstance();
                vector.setData(bData);
                CompositeSequence cs = csMap.get(matrix.getRowName(i));
                if (cs == null) {
                    continue;
                }
                vector.setDesignElement(cs);
                vector.setBioAssayDimension(bad);
                vector.setQuantitationType(qtype);
                dataVectors.add(vector);
            }
            AbstractCLI.log.info("Read " + dataVectors.size() + " data vectors");
        } catch (Exception e) {
            return e;
        }
        this.linkAnalysisService.processVectors(this.taxon, dataVectors, filterConfig, linkAnalysisConfig);
    } else {
        /*
             * Do in decreasing order of size, to help capture more links earlier - reduces fragmentation.
             */
        List<BioAssaySet> sees = new ArrayList<>(expressionExperiments);
        if (expressionExperiments.size() > 1) {
            AbstractCLI.log.info("Sorting data sets by number of samples, doing large data sets first.");
            Collection<ExpressionExperimentValueObject> vos = eeService.loadValueObjects(EntityUtils.getIds(expressionExperiments), true);
            final Map<Long, ExpressionExperimentValueObject> idMap = EntityUtils.getIdMap(vos);
            Collections.sort(sees, new Comparator<BioAssaySet>() {

                @Override
                public int compare(BioAssaySet o1, BioAssaySet o2) {
                    ExpressionExperimentValueObject e1 = idMap.get(o1.getId());
                    ExpressionExperimentValueObject e2 = idMap.get(o2.getId());
                    assert e1 != null : "No valueobject: " + e2;
                    assert e2 != null : "No valueobject: " + e1;
                    return -e1.getBioMaterialCount().compareTo(e2.getBioMaterialCount());
                }
            });
        }
        for (BioAssaySet ee : sees) {
            if (ee instanceof ExpressionExperiment) {
                this.processExperiment((ExpressionExperiment) ee);
            } else {
                throw new UnsupportedOperationException("Can't handle non-EE BioAssaySets yet");
            }
        }
        this.summarizeProcessing();
    }
    return null;
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) BioAssaySet(ubic.gemma.model.expression.experiment.BioAssaySet) ExpressionExperimentValueObject(ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) SimpleExpressionDataLoaderService(ubic.gemma.core.loader.expression.simple.SimpleExpressionDataLoaderService) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) ExpressionExperiment(ubic.gemma.model.expression.experiment.ExpressionExperiment) FileInputStream(java.io.FileInputStream) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) LinkAnalysisPersister(ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisPersister) LinkAnalysisService(ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisService) File(java.io.File) ArrayDesignService(ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService)

Example 27 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class ExpressionDataBooleanMatrix method createMatrix.

/**
 * Fill in the data
 */
private ObjectMatrixImpl<CompositeSequence, Integer, Boolean> createMatrix(Collection<? extends DesignElementDataVector> vectors, int maxSize) {
    ObjectMatrixImpl<CompositeSequence, Integer, Boolean> mat = new ObjectMatrixImpl<>(vectors.size(), maxSize);
    // initialize the matrix to false
    for (int i = 0; i < mat.rows(); i++) {
        for (int j = 0; j < mat.columns(); j++) {
            mat.set(i, j, Boolean.FALSE);
        }
    }
    for (int j = 0; j < mat.columns(); j++) {
        mat.addColumnName(j);
    }
    ByteArrayConverter bac = new ByteArrayConverter();
    Map<Integer, CompositeSequence> rowNames = new TreeMap<>();
    for (DesignElementDataVector vector : vectors) {
        BioAssayDimension dimension = vector.getBioAssayDimension();
        byte[] bytes = vector.getData();
        CompositeSequence designElement = vector.getDesignElement();
        Integer rowIndex = this.rowElementMap.get(designElement);
        assert rowIndex != null;
        rowNames.put(rowIndex, designElement);
        boolean[] vals = this.getVals(bac, vector, bytes);
        Collection<BioAssay> bioAssays = dimension.getBioAssays();
        if (bioAssays.size() != vals.length) {
            throw new IllegalStateException("Expected " + vals.length + " bioassays at design element " + designElement + ", got " + bioAssays.size());
        }
        Iterator<BioAssay> it = bioAssays.iterator();
        this.setMatBioAssayValues(mat, rowIndex, ArrayUtils.toObject(vals), bioAssays, it);
    }
    for (int i = 0; i < mat.rows(); i++) {
        mat.addRowName(rowNames.get(i));
    }
    assert mat.getRowNames().size() == mat.rows();
    return mat;
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) ObjectMatrixImpl(ubic.basecode.dataStructure.matrix.ObjectMatrixImpl) DesignElementDataVector(ubic.gemma.model.expression.bioAssayData.DesignElementDataVector) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 28 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class PersistentDummyObjectHelper method getDoubleData.

private byte[] getDoubleData() {
    double[] data = new double[PersistentDummyObjectHelper.NUM_BIOMATERIALS];
    double bump = 0.0;
    for (int j = 0; j < data.length; j++) {
        data[j] = new Random().nextDouble() + bump;
        if (j % 3 == 0) {
            // add some correlation structure to the data.
            bump += 0.5;
        }
    }
    ByteArrayConverter bconverter = new ByteArrayConverter();
    return bconverter.doubleArrayToBytes(data);
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter)

Example 29 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class PrincipalComponentAnalysis method getEigenvectorArrays.

/**
 * @return Convenience method to access the eigenvectors, as a List of Double[].
 */
@Transient
public List<Double[]> getEigenvectorArrays() throws IllegalArgumentException {
    ByteArrayConverter bac = new ByteArrayConverter();
    List<Double[]> result = new ArrayList<>(this.getNumComponentsStored());
    Collection<BioAssay> bioAssays = this.getBioAssayDimension().getBioAssays();
    if (bioAssays.size() < this.getNumComponentsStored()) {
        /*
             * This is a sanity check. The number of components stored is fixed at some lower value
             */
        throw new IllegalArgumentException("EE id = " + this.getExperimentAnalyzed().getId() + ", PCA: Number of components stored (" + this.getNumComponentsStored() + ") is less than the number of bioAssays (" + bioAssays.size() + ")");
    }
    for (int i = 0; i < bioAssays.size(); i++) {
        result.add(null);
    }
    for (Eigenvector ev : this.getEigenVectors()) {
        int index = ev.getComponentNumber() - 1;
        if (index >= this.getNumComponentsStored())
            continue;
        double[] doubleArr = bac.byteArrayToDoubles(ev.getVector());
        Double[] dA = ArrayUtils.toObject(doubleArr);
        result.set(index, dA);
    }
    CollectionUtils.filter(result, new Predicate() {

        @Override
        public boolean evaluate(Object object) {
            return object != null;
        }
    });
    return result;
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) ArrayList(java.util.ArrayList) Predicate(org.apache.commons.collections.Predicate) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay) Transient(javax.persistence.Transient)

Example 30 with ByteArrayConverter

use of ubic.basecode.io.ByteArrayConverter in project Gemma by PavlidisLab.

the class PrincipalComponentAnalysisServiceImpl method create.

@Override
@Transactional
public PrincipalComponentAnalysis create(ExpressionExperiment ee, DoubleMatrix<CompositeSequence, Integer> u, double[] eigenvalues, DoubleMatrix<Integer, BioMaterial> v, BioAssayDimension bad, int numComponentsToStore, int numLoadingsToStore) {
    PrincipalComponentAnalysis pca = PrincipalComponentAnalysis.Factory.newInstance();
    int actualNumberOfComponentsStored = Math.min(numComponentsToStore, v.columns());
    pca.setNumComponentsStored(actualNumberOfComponentsStored);
    pca.setBioAssayDimension(bad);
    pca.setMaxNumProbesPerComponent(numLoadingsToStore);
    pca.setExperimentAnalyzed(ee);
    /*
         * deal with U. We keep only the first numComponentsToStore components for the first numLoadingsToStore genes.
         */
    for (int i = 0; i < actualNumberOfComponentsStored; i++) {
        List<CompositeSequence> inOrder = u.sortByColumnAbsoluteValues(i, true);
        for (int j = 0; j < Math.min(u.rows(), numLoadingsToStore) - 1; j++) {
            CompositeSequence probe = inOrder.get(j);
            ProbeLoading plr = ProbeLoading.Factory.newInstance(i + 1, u.getRowByName(probe)[i], j, probe);
            pca.getProbeLoadings().add(plr);
        }
    }
    /*
         * deal with V. note we store all of it.
         */
    ByteArrayConverter bac = new ByteArrayConverter();
    for (int i = 0; i < v.columns(); i++) {
        double[] column = v.getColumn(i);
        byte[] eigenVectorBytes = bac.doubleArrayToBytes(column);
        int componentNumber = i + 1;
        log.debug(componentNumber);
        Eigenvector evec = Eigenvector.Factory.newInstance(componentNumber, eigenVectorBytes);
        pca.getEigenVectors().add(evec);
    }
    /*
         * Deal with eigenvalues; note we store all of them.
         */
    double sum = 0.0;
    List<Eigenvalue> eigv = new ArrayList<>();
    for (int i = 0; i < eigenvalues.length; i++) {
        double d = eigenvalues[i];
        sum += d;
        Eigenvalue ev = Eigenvalue.Factory.newInstance();
        ev.setComponentNumber(i + 1);
        ev.setValue(d);
        eigv.add(ev);
    }
    for (int i = 0; i < eigenvalues.length; i++) {
        Eigenvalue eigenvalue = eigv.get(i);
        eigenvalue.setVarianceFraction(eigenvalue.getValue() / sum);
        pca.getEigenValues().add(eigenvalue);
    }
    return this.principalComponentAnalysisDao.create(pca);
}
Also used : ByteArrayConverter(ubic.basecode.io.ByteArrayConverter) Eigenvector(ubic.gemma.model.analysis.expression.pca.Eigenvector) Eigenvalue(ubic.gemma.model.analysis.expression.pca.Eigenvalue) ArrayList(java.util.ArrayList) ProbeLoading(ubic.gemma.model.analysis.expression.pca.ProbeLoading) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) PrincipalComponentAnalysis(ubic.gemma.model.analysis.expression.pca.PrincipalComponentAnalysis) Transactional(org.springframework.transaction.annotation.Transactional)

Aggregations

ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)32 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)11 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)11 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)10 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)9 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)6 ProcessedExpressionDataVector (ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector)5 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)5 Test (org.junit.Test)4 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)4 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)4 DoubleArrayList (cern.colt.list.DoubleArrayList)3 Transactional (org.springframework.transaction.annotation.Transactional)3 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)3 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)3 InputStream (java.io.InputStream)2 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 HashSet (java.util.HashSet)2 XYSeries (org.jfree.data.xy.XYSeries)2