Search in sources :

Example 41 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class BaseAnalyzerConfigurationTest method configureVectors.

void configureVectors(List<BioMaterial> bioMaterials, String resourcePath) throws Exception {
    this.vectors = new HashSet<>();
    DoubleMatrixReader r = new DoubleMatrixReader();
    String path;
    if (resourcePath == null) {
        path = "/data/stat-tests/anova-test-data.txt";
    } else {
        path = resourcePath;
    }
    DoubleMatrix<String, String> dataMatrix = r.read(this.getClass().getResourceAsStream(path));
    // RandomData randomData = new RandomDataImpl( new MersenneTwister( 0 ) ); // fixed seed - important!
    Collection<CompositeSequence> compositeSequences = new HashSet<>();
    for (int i = 0; i < BaseAnalyzerConfigurationTest.NUM_DESIGN_ELEMENTS; i++) {
        ProcessedExpressionDataVector vector = ProcessedExpressionDataVector.Factory.newInstance();
        vector.setBioAssayDimension(bioAssayDimension);
        vector.setQuantitationType(quantitationType);
        CompositeSequence cs = CompositeSequence.Factory.newInstance();
        cs.setName(dataMatrix.getRowName(i));
        cs.setId(i + 1000L);
        cs.setArrayDesign(arrayDesign);
        vector.setDesignElement(cs);
        vector.setId(i + 10000L);
        double[] dvals = new double[bioMaterials.size()];
        for (int j = 0; j < dvals.length; j++) {
            dvals[j] = dataMatrix.get(i, j);
        }
        byte[] bvals = bac.doubleArrayToBytes(dvals);
        vector.setData(bvals);
        vectors.add(vector);
        compositeSequences.add(cs);
    }
    expressionExperiment.setProcessedExpressionDataVectors(vectors);
    arrayDesign.setCompositeSequences(compositeSequences);
}
Also used : ProcessedExpressionDataVector(ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) DoubleMatrixReader(ubic.basecode.io.reader.DoubleMatrixReader) HashSet(java.util.HashSet)

Example 42 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ExpressionDataFileServiceImpl method writeDataFile.

/**
 * @param compress if true, file will be output in GZIP format.
 */
private File writeDataFile(ExpressionExperiment ee, boolean filtered, File f, boolean compress) throws IOException {
    ExpressionDataFileServiceImpl.log.info("Creating new expression data file: " + f.getName());
    ExpressionDataDoubleMatrix matrix = this.getDataMatrix(ee, filtered);
    Collection<ArrayDesign> arrayDesigns = expressionExperimentService.getArrayDesignsUsed(ee);
    Map<CompositeSequence, String[]> geneAnnotations = this.getGeneAnnotationsAsStringsByProbe(arrayDesigns);
    this.writeMatrix(f, geneAnnotations, matrix, compress);
    return f;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ExpressionDataDoubleMatrix(ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 43 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ArrayDesignProbeMapperCli method processProbes.

private void processProbes(ArrayDesign arrayDesign) {
    assert this.probeNames != null && this.probeNames.length > 0;
    arrayDesign = arrayDesignService.thawLite(arrayDesign);
    CompositeSequenceService compositeSequenceService = this.getBean(CompositeSequenceService.class);
    for (String probeName : this.probeNames) {
        CompositeSequence probe = compositeSequenceService.findByName(arrayDesign, probeName);
        if (probe == null) {
            AbstractCLI.log.warn("No such probe: " + probeName + " on " + arrayDesign.getShortName());
            continue;
        }
        probe = compositeSequenceService.thaw(probe);
        Map<String, Collection<BlatAssociation>> results = this.arrayDesignProbeMapperService.processCompositeSequence(this.config, taxon, null, probe);
        for (Collection<BlatAssociation> col : results.values()) {
            for (BlatAssociation association : col) {
                if (AbstractCLI.log.isDebugEnabled())
                    AbstractCLI.log.debug(association);
            }
            arrayDesignProbeMapperService.printResult(probe, col);
        }
    }
}
Also used : CompositeSequenceService(ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BlatAssociation(ubic.gemma.model.genome.sequenceAnalysis.BlatAssociation)

Example 44 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class CompositeSequenceGeneMapperService method getGene2ProbeMapByOfficialSymbols.

/**
 * @param arrayDesigns    to look in
 * @param officialSymbols official symbols
 * @return map of gene to composite sequences
 */
public LinkedHashMap<Gene, Collection<CompositeSequence>> getGene2ProbeMapByOfficialSymbols(Collection<String> officialSymbols, Collection<ArrayDesign> arrayDesigns) {
    LinkedHashMap<String, Collection<Gene>> genesMap = this.findGenesByOfficialSymbols(officialSymbols);
    Set<String> geneOfficialSymbolKeySet = genesMap.keySet();
    LinkedHashMap<Gene, Collection<CompositeSequence>> compositeSequencesForGeneMap = new LinkedHashMap<>();
    for (String officialSymbol : geneOfficialSymbolKeySet) {
        log.debug("official symbol: " + officialSymbol);
        Collection<Gene> genes = genesMap.get(officialSymbol);
        for (Gene g : genes) {
            Collection<CompositeSequence> compositeSequences = geneService.getCompositeSequencesById(g.getId());
            for (CompositeSequence sequence : compositeSequences) {
                if (arrayDesigns.contains(sequence.getArrayDesign())) {
                    if (compositeSequencesForGeneMap.get(g) == null) {
                        compositeSequencesForGeneMap.put(g, new HashSet<CompositeSequence>());
                    }
                    compositeSequencesForGeneMap.get(g).add(sequence);
                }
            }
        }
    }
    return compositeSequencesForGeneMap;
}
Also used : Gene(ubic.gemma.model.genome.Gene) Collection(java.util.Collection) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) LinkedHashMap(java.util.LinkedHashMap)

Example 45 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ExpressionDataFileServiceImpl method analysisResultSetToString.

@Override
public List<DifferentialExpressionAnalysisResult> analysisResultSetToString(ExpressionAnalysisResultSet ears, Map<Long, String[]> geneAnnotations, StringBuilder buf, Map<Long, StringBuilder> probe2String, List<DifferentialExpressionAnalysisResult> sortedFirstColumnOfResults) {
    if (sortedFirstColumnOfResults == null) {
        // Sort P values in ears (because 1st column)
        sortedFirstColumnOfResults = new ArrayList<>(ears.getResults());
        Collections.sort(sortedFirstColumnOfResults, DifferentialExpressionAnalysisResultComparator.Factory.newInstance());
    }
    // Generate a description of the factors involved "factor1_factor2", trying to be R-friendly
    StringBuilder factorColumnName = new StringBuilder();
    for (ExperimentalFactor ef : ears.getExperimentalFactors()) {
        factorColumnName.append(ef.getName().replaceAll("\\s+", "_")).append("_");
    }
    factorColumnName = new StringBuilder(StringUtil.makeValidForR(StringUtils.removeEnd(factorColumnName.toString(), "_")));
    // Generate headers
    buf.append("\tQValue_").append(factorColumnName);
    buf.append("\tPValue_").append(factorColumnName);
    // Generate probe details
    for (DifferentialExpressionAnalysisResult dear : ears.getResults()) {
        StringBuilder probeBuffer = new StringBuilder();
        CompositeSequence cs = dear.getProbe();
        // Make a hashMap so we can organize the data by probe with factors as columns
        // Need to cache the information until we have it organized in the correct format to write
        Long csid = cs.getId();
        if (probe2String.containsKey(csid)) {
            probeBuffer = probe2String.get(csid);
        } else {
            // no entry for probe yet
            probeBuffer.append(cs.getName());
            if (geneAnnotations.containsKey(csid)) {
                String[] annotationStrings = geneAnnotations.get(csid);
                /*
                     * Fields:
                     *
                     * 1: gene symbols
                     * 2: gene name
                     * 4: ncbi ID
                     */
                probeBuffer.append("\t").append(annotationStrings[1]).append("\t").append(annotationStrings[2]).append("\t").append(annotationStrings[4]);
            } else {
                probeBuffer.append("\t\t\t");
            }
            probe2String.put(csid, probeBuffer);
        }
        Double correctedPvalue = dear.getCorrectedPvalue();
        Double pvalue = dear.getPvalue();
        String formattedCP = correctedPvalue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, correctedPvalue);
        String formattedP = pvalue == null ? "" : String.format(ExpressionDataFileServiceImpl.DECIMAL_FORMAT, pvalue);
        probeBuffer.append("\t").append(formattedCP).append("\t").append(formattedP);
    }
    return sortedFirstColumnOfResults;
}
Also used : DifferentialExpressionAnalysisResult(ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)206 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)43 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)40 Gene (ubic.gemma.model.genome.Gene)32 Test (org.junit.Test)30 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)19 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)18 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)18 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)18 StopWatch (org.apache.commons.lang3.time.StopWatch)17 HashSet (java.util.HashSet)15 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)15 ArrayList (java.util.ArrayList)14 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)14 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)13 Taxon (ubic.gemma.model.genome.Taxon)12 Collection (java.util.Collection)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11