Search in sources :

Example 61 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ArrayDesignSequenceProcessingServiceImpl method assignSequencesToDesignElements.

@Override
public void assignSequencesToDesignElements(Collection<CompositeSequence> designElements, Collection<BioSequence> sequences) {
    Map<String, BioSequence> nameMap = new HashMap<>();
    for (BioSequence sequence : sequences) {
        nameMap.put(this.deMangleProbeId(sequence.getName()), sequence);
    }
    int numNotFound = 0;
    for (CompositeSequence designElement : designElements) {
        if (!nameMap.containsKey(designElement.getName())) {
            ArrayDesignSequenceProcessingServiceImpl.log.debug("No sequence matches " + designElement.getName());
            numNotFound++;
            continue;
        }
        designElement.setBiologicalCharacteristic(nameMap.get(designElement.getName()));
    }
    ArrayDesignSequenceProcessingServiceImpl.log.info(sequences.size() + " sequences processed for " + designElements.size() + " design elements");
    if (numNotFound > 0) {
        ArrayDesignSequenceProcessingServiceImpl.log.warn(numNotFound + " probes had no matching sequence");
    }
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 62 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class ArrayDesignSequenceProcessingServiceImpl method initializeFetchList.

/**
 * @param force if true, sequence will be replaced even if it is already there.
 * @return map of biosequence accessions to BioSequences (the existing ones)
 */
private Map<String, BioSequence> initializeFetchList(ArrayDesign arrayDesign, boolean force) {
    Map<String, BioSequence> accessionsToFetch = new HashMap<>();
    int sequenceProvided = 0;
    int noSequence = 0;
    boolean warned = false;
    for (CompositeSequence cs : arrayDesign.getCompositeSequences()) {
        BioSequence bs = cs.getBiologicalCharacteristic();
        if (bs == null) {
            warned = this.warnAboutMissingSequence(noSequence, warned, cs);
            noSequence++;
            continue;
        }
        if (!force && StringUtils.isNotBlank(bs.getSequence())) {
            sequenceProvided++;
            continue;
        }
        String accession = this.getAccession(cs);
        if (accession == null) {
            if (ArrayDesignSequenceProcessingServiceImpl.log.isDebugEnabled())
                ArrayDesignSequenceProcessingServiceImpl.log.debug("No accession for " + cs + ": " + bs);
            continue;
        }
        accessionsToFetch.put(accession, bs);
    }
    this.informAboutFetchListResults(arrayDesign, accessionsToFetch, sequenceProvided, noSequence);
    return accessionsToFetch;
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 63 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class GeoServiceImpl method matchToExistingPlatform.

private void matchToExistingPlatform(GeoConverter geoConverter, GeoPlatform rawGEOPlatform, ArrayDesignsForExperimentCache c) {
    // we have to populate this.
    Map<String, String> probeNamesInGemma = rawGEOPlatform.getProbeNamesInGemma();
    // do a partial conversion. We will throw this away;
    ArrayDesign geoArrayDesign = (ArrayDesign) geoConverter.convert(rawGEOPlatform);
    if (geoArrayDesign == null) {
        if (!rawGEOPlatform.useDataFromGeo()) {
            // MPSS, exon arrays
            return;
        }
        throw new IllegalStateException("Platform is missing");
    }
    // find in our system. Note we only use the short name. The full name can change in GEO, causing trouble.
    ArrayDesign existing = arrayDesignService.findByShortName(geoArrayDesign.getShortName());
    if (existing == null) {
        AbstractGeoService.log.info(rawGEOPlatform + " looks new to Gemma");
        for (CompositeSequence cs : geoArrayDesign.getCompositeSequences()) {
            String geoProbeName = cs.getName();
            probeNamesInGemma.put(geoProbeName, geoProbeName);
        // no mapping needed. NB the converter fills
        // this in already, we're just being defensive
        // here.
        }
    } else {
        AbstractGeoService.log.info("Platform " + rawGEOPlatform.getGeoAccession() + " exists in Gemma, checking for correct probe names and re-matching if necessary ...");
        String columnWithGeoNames;
        columnWithGeoNames = this.getGEOIDColumnName(rawGEOPlatform, geoArrayDesign);
        if (columnWithGeoNames == null) {
            // no problem: this means the design has no elements, so it is actually a placeholder (e.g., MPSS)
            return;
        }
        AbstractGeoService.log.info("Loading probes ...");
        Map<CompositeSequence, BioSequence> m = arrayDesignService.getBioSequences(existing);
        c.add(existing, m);
        this.getGemmaIDColumnNameInGEO(rawGEOPlatform, m, columnWithGeoNames);
    }
}
Also used : BioSequence(ubic.gemma.model.genome.biosequence.BioSequence) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Example 64 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class GeoServiceImpl method addElements.

@Override
public ArrayDesign addElements(ArrayDesign targetPlatform) {
    if (!targetPlatform.getCompositeSequences().isEmpty()) {
        throw new IllegalArgumentException("Only call this if you are filling in an empty platform");
    }
    String geoAccession = targetPlatform.getExternalReferences().iterator().next().getAccession();
    Collection<? extends GeoData> platforms = geoDomainObjectGenerator.generate(geoAccession);
    if (platforms.size() == 0) {
        throw new IllegalStateException();
    }
    /*
         * We do this to get a fresh instantiation of GeoConverter (prototype scope)
         */
    GeoConverter geoConverter = (GeoConverter) this.beanFactory.getBean("geoConverter");
    if (this.geoDomainObjectGenerator == null) {
        this.geoDomainObjectGenerator = new GeoDomainObjectGenerator();
    } else {
        this.geoDomainObjectGenerator.initialize();
    }
    geoDomainObjectGenerator.setProcessPlatformsOnly(true);
    geoConverter.setForceConvertElements(true);
    Collection<Object> arrayDesigns = geoConverter.convert(platforms);
    Collection<CompositeSequence> els = ((ArrayDesign) arrayDesigns.iterator().next()).getCompositeSequences();
    for (CompositeSequence cs : els) {
        cs.setArrayDesign(targetPlatform);
        cs.setBiologicalCharacteristic((BioSequence) persisterHelper.persist(cs.getBiologicalCharacteristic()));
    }
    AbstractGeoService.log.info("Adding " + els.size() + " elements to " + targetPlatform);
    targetPlatform.getCompositeSequences().addAll(els);
    arrayDesignService.update(targetPlatform);
    this.arrayDesignReportService.generateArrayDesignReport(targetPlatform.getId());
    return targetPlatform;
}
Also used : GeoDomainObjectGenerator(ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) GeoConverter(ubic.gemma.core.loader.expression.geo.GeoConverter)

Example 65 with CompositeSequence

use of ubic.gemma.model.expression.designElement.CompositeSequence in project Gemma by PavlidisLab.

the class SimpleExpressionDataLoaderServiceImpl method getSubMatrixForArrayDesign.

@Override
public DoubleMatrix<String, String> getSubMatrixForArrayDesign(DoubleMatrix<String, String> matrix, Collection<Object> usedDesignElements, ArrayDesign design) {
    List<String> designElements = new ArrayList<>();
    List<String> columnNames = new ArrayList<>(matrix.getColNames());
    List<double[]> rows = new ArrayList<>();
    Collection<Object> arrayDesignElementNames = new HashSet<>();
    for (CompositeSequence cs : design.getCompositeSequences()) {
        arrayDesignElementNames.add(cs.getName());
    }
    for (String object : matrix.getRowNames()) {
        /*
             * disallow using design elements more than once; if two array designs match a given row name, we just end
             * up arbitrarily assigning it to one of the array designs.
             */
        if (arrayDesignElementNames.contains(object) && !usedDesignElements.contains(object)) {
            rows.add(matrix.getRow(matrix.getRowIndexByName(object)));
            usedDesignElements.add(object);
            designElements.add(object);
        }
    }
    if (usedDesignElements.size() == 0) {
        throw new IllegalArgumentException("No design elements matched?");
    }
    SimpleExpressionDataLoaderServiceImpl.log.info("Found " + rows.size() + " data rows for " + design);
    if (rows.size() == 0) {
        SimpleExpressionDataLoaderServiceImpl.log.warn("A platform was entered ( " + design + " ) for which there are no matching rows in the data");
        return null;
    }
    double[][] allSubMatrixRows = new double[rows.size()][rows.iterator().next().length];
    rows.toArray(allSubMatrixRows);
    DoubleMatrix<String, String> subMatrix = DoubleMatrixFactory.fastrow(allSubMatrixRows);
    subMatrix.setRowNames(designElements);
    subMatrix.setColumnNames(columnNames);
    return subMatrix;
}
Also used : CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence)

Aggregations

CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)206 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)43 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)40 Gene (ubic.gemma.model.genome.Gene)32 Test (org.junit.Test)30 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)19 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)18 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)18 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)18 StopWatch (org.apache.commons.lang3.time.StopWatch)17 HashSet (java.util.HashSet)15 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)15 CompositeSequenceValueObject (ubic.gemma.model.expression.designElement.CompositeSequenceValueObject)15 ArrayList (java.util.ArrayList)14 QuantitationType (ubic.gemma.model.common.quantitationtype.QuantitationType)14 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)13 Taxon (ubic.gemma.model.genome.Taxon)12 Collection (java.util.Collection)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)11