Search in sources :

Example 76 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class SampleCoexpressionMatrixServiceImpl method getMatrix.

private static DoubleMatrix<BioAssay, BioAssay> getMatrix(ExpressionDataDoubleMatrix matrix) {
    DoubleMatrix<BioMaterial, CompositeSequence> transposeR = matrix.getMatrix().transpose();
    DoubleMatrix<BioAssay, CompositeSequence> transpose = new DenseDoubleMatrix<>(transposeR.getRawMatrix());
    transpose.setColumnNames(transposeR.getColNames());
    for (int i = 0; i < transpose.rows(); i++) {
        BioAssay s = transposeR.getRowName(i).getBioAssaysUsedIn().iterator().next();
        transpose.setRowName(s, i);
    }
    return MatrixStats.correlationMatrix(transpose);
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) DenseDoubleMatrix(ubic.basecode.dataStructure.matrix.DenseDoubleMatrix) CompositeSequence(ubic.gemma.model.expression.designElement.CompositeSequence) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 77 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class BatchInfoParser method getBatchInformationFromFiles.

/**
 * Now we can parse the file to get the batch information.
 * We allow ourselves to add dates to _some_ of the bioassays. It turns out to be common for there to be a single
 * corrupted date in CEL files, for example. However, downstream code has to be careful, and the batch factor could
 * be a problem too.
 *
 * @param bioAssays2Files BA 2 files
 * @return map of biomaterials to dates. Biomaterials which did not have associated dates are not included in the
 * map.
 */
private Map<BioMaterial, Date> getBatchInformationFromFiles(Map<BioAssay, File> bioAssays2Files) {
    Map<BioMaterial, Date> result = new HashMap<>();
    Collection<File> missingDate = new HashSet<>();
    for (BioAssay ba : bioAssays2Files.keySet()) {
        File f = bioAssays2Files.get(ba);
        ArrayDesign arrayDesignUsed = ba.getArrayDesignUsed();
        try (InputStream is = FileTools.getInputStreamFromPlainOrCompressedFile(f.getAbsolutePath())) {
            this.locateExtractor(arrayDesignUsed, ba, f);
            Date d = scanDateExtractor.extract(is);
            // to be okay, but let's assume we're not getting data the same day it was generated!
            if (d != null && d.after(new Date())) {
                throw new RuntimeException("Date was in the future for: " + ba + " from " + f.getName());
            }
            BioMaterial bm = ba.getSampleUsed();
            result.put(bm, d);
        } catch (RuntimeException | IOException e) {
            BatchInfoParser.log.warn("Failure while parsing: " + f + ": " + e.getMessage());
            missingDate.add(f);
        }
    }
    if (missingDate.size() == bioAssays2Files.size()) {
        throw new IllegalStateException("Dates were not found for any of the files.");
    }
    if (missingDate.size() > 0) {
        BatchInfoParser.log.warn("Dates were not obtained for " + missingDate + " files: ");
        for (File f : missingDate) {
            BatchInfoParser.log.info("Missing date for: " + f.getName());
        }
    }
    return result;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) InputStream(java.io.InputStream) IOException(java.io.IOException) File(java.io.File) LocalFile(ubic.gemma.model.common.description.LocalFile) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 78 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class BatchInfoParser method matchBioAssaysToRawDataFiles.

/**
 * From the file names, match to the bioassays. GEO names things consistently (??) so this should work but not
 * ideal.
 *
 * @param files           files
 * @param assayAccessions accessions
 * @return map
 */
// Better readability
@SuppressWarnings("StatementWithEmptyBody")
private Map<BioAssay, File> matchBioAssaysToRawDataFiles(Collection<LocalFile> files, Map<String, BioAssay> assayAccessions) {
    Pattern regex = Pattern.compile("(GSM[0-9]+).+");
    Map<BioAssay, File> bioAssays2Files = new HashMap<>();
    for (LocalFile file : files) {
        File f = file.asFile();
        String n = f.getName();
        /*
             * We only support the newer style of storing these.
             */
        if (!n.startsWith("GSM")) {
            continue;
        }
        if (n.toUpperCase().contains(".CHP") || n.toUpperCase().contains(".DAT") || n.toUpperCase().contains(".EXP") || n.toUpperCase().contains(".RPT") || n.toUpperCase().contains(".TIF")) {
            continue;
        }
        /*
             * keep just the GSMNNNNNN part. FIXME: only works with GEO
             */
        Matcher matcher = regex.matcher(n);
        if (!matcher.matches()) {
            continue;
        }
        String acc = matcher.group(1);
        assert acc.matches("GSM[0-9]+");
        BioAssay ba = assayAccessions.get(acc);
        if (ba == null) {
            /*
                 * Warn? Throw exception?
                 */
            continue;
        }
        if (bioAssays2Files.containsKey(ba)) {
            /*
                 * Don't clobber a valid file. For affymetrix, CEL is what we want. Other cases harder to predict, but
                 * .txt files can be either good or bad. (We could do this check earlier)
                 */
            if (bioAssays2Files.get(ba).getName().toUpperCase().contains(".CEL")) {
                BatchInfoParser.log.debug("Retaining CEL file, ignoring " + f.getName());
                continue;
            } else if (f.getName().toUpperCase().contains(".CEL")) {
            // we displace the old file with this CEL file, but there is no need to warn.
            } else {
                BatchInfoParser.log.warn("Multiple files matching " + ba + ": " + bioAssays2Files.get(ba) + "; using new file: " + f);
            }
        }
        bioAssays2Files.put(ba, f);
    }
    return bioAssays2Files;
}
Also used : Pattern(java.util.regex.Pattern) LocalFile(ubic.gemma.model.common.description.LocalFile) Matcher(java.util.regex.Matcher) File(java.io.File) LocalFile(ubic.gemma.model.common.description.LocalFile) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 79 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class SVDServiceHelperImpl method getFactorsForAnalysis.

private void getFactorsForAnalysis(Collection<BioAssay> bioAssays, Map<Long, Date> bioMaterialDates, Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap) {
    for (BioAssay bioAssay : bioAssays) {
        Date processingDate = bioAssay.getProcessingDate();
        BioMaterial bm = bioAssay.getSampleUsed();
        // can be null
        bioMaterialDates.put(bm.getId(), processingDate);
        SVDServiceHelperImpl.populateBMFMap(bioMaterialFactorMap, bm);
    }
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 80 with BioAssay

use of ubic.gemma.model.expression.bioAssay.BioAssay in project Gemma by PavlidisLab.

the class SVDServiceHelperImpl method svdFactorAnalysis.

@Override
public SVDValueObject svdFactorAnalysis(PrincipalComponentAnalysis pca) {
    BioAssayDimension bad = pca.getBioAssayDimension();
    List<BioAssay> bioAssays = bad.getBioAssays();
    SVDValueObject svo;
    try {
        svo = new SVDValueObject(pca);
    } catch (Exception e) {
        SVDServiceHelperImpl.log.error(e.getLocalizedMessage());
        return null;
    }
    Map<Long, Date> bioMaterialDates = new HashMap<>();
    Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap = new HashMap<>();
    this.prepareForFactorComparisons(svo, bioAssays, bioMaterialDates, bioMaterialFactorMap);
    if (bioMaterialDates.isEmpty() && bioMaterialFactorMap.isEmpty()) {
        SVDServiceHelperImpl.log.warn("No factor or date information to compare to the eigenGenes");
        return svo;
    }
    Long[] svdBioMaterials = svo.getBioMaterialIds();
    svo.getDateCorrelations().clear();
    svo.getFactorCorrelations().clear();
    svo.getDates().clear();
    svo.getFactors().clear();
    for (int componentNumber = 0; componentNumber < Math.min(svo.getvMatrix().columns(), SVDServiceHelperImpl.MAX_EIGEN_GENES_TO_TEST); componentNumber++) {
        this.analyzeComponent(svo, componentNumber, svo.getvMatrix(), bioMaterialDates, bioMaterialFactorMap, svdBioMaterials);
    }
    return svo;
}
Also used : ExperimentalFactor(ubic.gemma.model.expression.experiment.ExperimentalFactor) BioAssayDimension(ubic.gemma.model.expression.bioAssayData.BioAssayDimension) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Aggregations

BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)144 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)67 Test (org.junit.Test)29 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)29 ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)24 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)20 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)18 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)15 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)14 ExperimentalFactor (ubic.gemma.model.expression.experiment.ExperimentalFactor)14 InputStream (java.io.InputStream)11 ByteArrayConverter (ubic.basecode.io.ByteArrayConverter)10 HashSet (java.util.HashSet)9 AlreadyExistsInSystemException (ubic.gemma.core.loader.util.AlreadyExistsInSystemException)8 DesignElementDataVector (ubic.gemma.model.expression.bioAssayData.DesignElementDataVector)8 FactorValue (ubic.gemma.model.expression.experiment.FactorValue)8 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)7 ModelAndView (org.springframework.web.servlet.ModelAndView)7 ExpressionDataDoubleMatrix (ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix)7