Search in sources :

Example 91 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class ExpressionDataFileServiceImpl method writeDiffExArchiveFile.

@Override
public void writeDiffExArchiveFile(BioAssaySet experimentAnalyzed, DifferentialExpressionAnalysis analysis, DifferentialExpressionAnalysisConfig config) throws IOException {
    Collection<ArrayDesign> arrayDesigns = this.expressionExperimentService.getArrayDesignsUsed(experimentAnalyzed);
    Map<Long, String[]> geneAnnotations = this.getGeneAnnotationsAsStrings(arrayDesigns);
    String filename = this.getDiffExArchiveFileName(analysis);
    File f = this.getOutputFile(filename);
    ExpressionDataFileServiceImpl.log.info("Creating differential expression analysis archive file: " + f.getName());
    try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(f))) {
        // top-level analysis results - ANOVA-style
        zipOut.putNextEntry(new ZipEntry("analysis.results.txt"));
        String analysisData = this.convertDiffExpressionAnalysisData(analysis, geneAnnotations, config);
        zipOut.write(analysisData.getBytes());
        zipOut.closeEntry();
        differentialExpressionAnalysisService.thaw(analysis);
        // Add a file for each result set with contrasts information.
        int i = 0;
        for (ExpressionAnalysisResultSet resultSet : analysis.getResultSets()) {
            if (resultSet.getExperimentalFactors().size() > 1) {
                // Skip interactions.
                // Why?
                ExpressionDataFileServiceImpl.log.info("Result file for interaction is omitted");
                continue;
            }
            String resultSetData = this.convertDiffExpressionResultSetData(resultSet, geneAnnotations, config);
            if (resultSet.getId() == null) {
                // -nodb option on analysis
                zipOut.putNextEntry(new ZipEntry("resultset_" + ++i + "of" + analysis.getResultSets().size() + // to make it clearer this is not an ID
                ".data.txt"));
            } else {
                zipOut.putNextEntry(new ZipEntry("resultset_ID" + resultSet.getId() + ".data.txt"));
            }
            zipOut.write(resultSetData.getBytes());
            zipOut.closeEntry();
        }
    }
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ZipOutputStream(java.util.zip.ZipOutputStream) ZipEntry(java.util.zip.ZipEntry) ExpressionAnalysisResultSet(ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet)

Example 92 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class BatchInfoParser method getBatchInformationFromFiles.

/**
 * Now we can parse the file to get the batch information.
 * We allow ourselves to add dates to _some_ of the bioassays. It turns out to be common for there to be a single
 * corrupted date in CEL files, for example. However, downstream code has to be careful, and the batch factor could
 * be a problem too.
 *
 * @param bioAssays2Files BA 2 files
 * @return map of biomaterials to dates. Biomaterials which did not have associated dates are not included in the
 * map.
 */
private Map<BioMaterial, Date> getBatchInformationFromFiles(Map<BioAssay, File> bioAssays2Files) {
    Map<BioMaterial, Date> result = new HashMap<>();
    Collection<File> missingDate = new HashSet<>();
    for (BioAssay ba : bioAssays2Files.keySet()) {
        File f = bioAssays2Files.get(ba);
        ArrayDesign arrayDesignUsed = ba.getArrayDesignUsed();
        try (InputStream is = FileTools.getInputStreamFromPlainOrCompressedFile(f.getAbsolutePath())) {
            this.locateExtractor(arrayDesignUsed, ba, f);
            Date d = scanDateExtractor.extract(is);
            // to be okay, but let's assume we're not getting data the same day it was generated!
            if (d != null && d.after(new Date())) {
                throw new RuntimeException("Date was in the future for: " + ba + " from " + f.getName());
            }
            BioMaterial bm = ba.getSampleUsed();
            result.put(bm, d);
        } catch (RuntimeException | IOException e) {
            BatchInfoParser.log.warn("Failure while parsing: " + f + ": " + e.getMessage());
            missingDate.add(f);
        }
    }
    if (missingDate.size() == bioAssays2Files.size()) {
        throw new IllegalStateException("Dates were not found for any of the files.");
    }
    if (missingDate.size() > 0) {
        BatchInfoParser.log.warn("Dates were not obtained for " + missingDate + " files: ");
        for (File f : missingDate) {
            BatchInfoParser.log.info("Missing date for: " + f.getName());
        }
    }
    return result;
}
Also used : BioMaterial(ubic.gemma.model.expression.biomaterial.BioMaterial) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) InputStream(java.io.InputStream) IOException(java.io.IOException) File(java.io.File) LocalFile(ubic.gemma.model.common.description.LocalFile) BioAssay(ubic.gemma.model.expression.bioAssay.BioAssay)

Example 93 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class ArrayDesignReportServiceImpl method getLastEvent.

private String getLastEvent(Long id, Class<? extends AuditEventType> eventType) {
    ArrayDesign ad = arrayDesignService.load(id);
    if (ad == null)
        return "";
    List<AuditEvent> events2 = auditEventService.getEvents(ad);
    String analysisEventString;
    List<AuditEvent> events = new ArrayList<>();
    for (AuditEvent event : events2) {
        if (event == null)
            // legacy of ordered-list which could end up with gaps; should not be needed
            continue;
        // any more
        if (event.getEventType() != null && eventType.isAssignableFrom(event.getEventType().getClass())) {
            events.add(event);
        }
    }
    if (events.size() == 0) {
        return "[None]";
    }
    // add the most recent events to the report. There should always be at least one creation event.
    AuditEvent lastEvent = events.get(events.size() - 1);
    analysisEventString = DateFormatUtils.format(lastEvent.getDate(), "yyyy.MMM.dd hh:mm aa");
    return analysisEventString;
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) AuditEvent(ubic.gemma.model.common.auditAndSecurity.AuditEvent)

Example 94 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class ArrayDesignReportServiceImpl method generateArrayDesignReport.

@Override
public void generateArrayDesignReport(ArrayDesignValueObject adVo) {
    ArrayDesign ad = arrayDesignService.load(adVo.getId());
    if (ad == null)
        return;
    // obtain time information (for timestamp)
    Date d = new Date(System.currentTimeMillis());
    String timestamp = DateFormatUtils.format(d, "yyyy.MM.dd HH:mm");
    long numProbes = arrayDesignService.getCompositeSequenceCount(ad);
    long numCsBioSequences = arrayDesignService.numCompositeSequenceWithBioSequences(ad);
    long numCsBlatResults = arrayDesignService.numCompositeSequenceWithBlatResults(ad);
    long numCsGenes = arrayDesignService.numCompositeSequenceWithGenes(ad);
    long numGenes = arrayDesignService.numGenes(ad);
    adVo.setDesignElementCount((int) numProbes);
    adVo.setNumProbeSequences(Long.toString(numCsBioSequences));
    adVo.setNumProbeAlignments(Long.toString(numCsBlatResults));
    adVo.setNumProbesToGenes(Long.toString(numCsGenes));
    adVo.setNumGenes(Long.toString(numGenes));
    adVo.setDateCached(timestamp);
    // check the directory exists.
    String reportDir = ArrayDesignReportServiceImpl.HOME_DIR + File.separatorChar + ArrayDesignReportServiceImpl.ARRAY_DESIGN_REPORT_DIR;
    File reportDirF = new File(reportDir);
    if (!reportDirF.exists()) {
        EntityUtils.mkdirs(reportDirF);
    }
    String reportFileName = reportDir + File.separatorChar + ArrayDesignReportServiceImpl.ARRAY_DESIGN_REPORT_FILE_NAME_PREFIX + "." + adVo.getId();
    File f = new File(reportFileName);
    if (f.exists()) {
        if (!f.canWrite() || !f.delete()) {
            ArrayDesignReportServiceImpl.log.error("Report exists but cannot overwrite, leaving the old one in place: " + reportFileName);
            return;
        }
    }
    try (FileOutputStream fos = new FileOutputStream(reportFileName);
        ObjectOutputStream oos = new ObjectOutputStream(fos)) {
        oos.writeObject(adVo);
    } catch (Throwable e) {
        ArrayDesignReportServiceImpl.log.error("Cannot write to file: " + reportFileName);
        return;
    }
    ArrayDesignReportServiceImpl.log.info("Generated report for " + ad);
}
Also used : ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign)

Example 95 with ArrayDesign

use of ubic.gemma.model.expression.arrayDesign.ArrayDesign in project Gemma by PavlidisLab.

the class ArrayDesignReportServiceImpl method fillEventInformation.

/**
 * Fill in event information
 */
@Override
public void fillEventInformation(Collection<ArrayDesignValueObject> adVos) {
    if (adVos == null || adVos.size() == 0)
        return;
    StopWatch watch = new StopWatch();
    watch.start();
    Collection<Long> ids = new ArrayList<>();
    for (Object object : adVos) {
        ArrayDesignValueObject adVo = (ArrayDesignValueObject) object;
        Long id = adVo.getId();
        if (id == null)
            continue;
        ids.add(id);
    }
    if (ids.size() == 0)
        return;
    Collection<Class<? extends AuditEventType>> typesToGet = Arrays.asList(eventTypes);
    Collection<ArrayDesign> arrayDesigns = arrayDesignService.load(ids);
    Map<Long, ArrayDesign> idMap = EntityUtils.getIdMap(arrayDesigns);
    Map<Class<? extends AuditEventType>, Map<Auditable, AuditEvent>> events = auditEventService.getLastEvents(arrayDesigns, typesToGet);
    Map<Auditable, AuditEvent> geneMappingEvents = events.get(ArrayDesignGeneMappingEvent.class);
    Map<Auditable, AuditEvent> sequenceUpdateEvents = events.get(ArrayDesignSequenceUpdateEvent.class);
    Map<Auditable, AuditEvent> sequenceAnalysisEvents = events.get(ArrayDesignSequenceAnalysisEvent.class);
    Map<Auditable, AuditEvent> repeatAnalysisEvents = events.get(ArrayDesignRepeatAnalysisEvent.class);
    for (ArrayDesignValueObject adVo : adVos) {
        Long id = adVo.getId();
        ArrayDesign ad = idMap.get(id);
        if (geneMappingEvents.containsKey(ad)) {
            AuditEvent event = geneMappingEvents.get(ad);
            if (event != null) {
                adVo.setLastGeneMapping(event.getDate());
            }
        }
        if (sequenceUpdateEvents.containsKey(ad)) {
            AuditEvent event = sequenceUpdateEvents.get(ad);
            if (event != null) {
                adVo.setLastSequenceUpdate(event.getDate());
            }
        }
        if (sequenceAnalysisEvents.containsKey(ad)) {
            AuditEvent event = sequenceAnalysisEvents.get(ad);
            if (event != null) {
                adVo.setLastSequenceAnalysis(event.getDate());
            }
        }
        if (repeatAnalysisEvents.containsKey(ad)) {
            AuditEvent event = repeatAnalysisEvents.get(ad);
            if (event != null) {
                adVo.setLastRepeatMask(event.getDate());
            }
        }
    }
    watch.stop();
    if (watch.getTime() > 1000)
        ArrayDesignReportServiceImpl.log.info("Added event information in " + watch.getTime() + "ms");
}
Also used : Auditable(ubic.gemma.model.common.Auditable) ArrayDesign(ubic.gemma.model.expression.arrayDesign.ArrayDesign) ArrayDesignValueObject(ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject) StopWatch(org.apache.commons.lang3.time.StopWatch) ArrayDesignValueObject(ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject) AuditEvent(ubic.gemma.model.common.auditAndSecurity.AuditEvent)

Aggregations

ArrayDesign (ubic.gemma.model.expression.arrayDesign.ArrayDesign)186 CompositeSequence (ubic.gemma.model.expression.designElement.CompositeSequence)43 Test (org.junit.Test)32 ExpressionExperiment (ubic.gemma.model.expression.experiment.ExpressionExperiment)26 InputStream (java.io.InputStream)25 BioAssay (ubic.gemma.model.expression.bioAssay.BioAssay)24 BioSequence (ubic.gemma.model.genome.biosequence.BioSequence)24 Taxon (ubic.gemma.model.genome.Taxon)23 BaseSpringContextTest (ubic.gemma.core.testing.BaseSpringContextTest)19 HashSet (java.util.HashSet)16 RawExpressionDataVector (ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector)16 Collection (java.util.Collection)14 AbstractGeoServiceTest (ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest)13 StopWatch (org.apache.commons.lang3.time.StopWatch)12 Before (org.junit.Before)12 BioMaterial (ubic.gemma.model.expression.biomaterial.BioMaterial)12 BioAssayDimension (ubic.gemma.model.expression.bioAssayData.BioAssayDimension)9 GZIPInputStream (java.util.zip.GZIPInputStream)8 SimpleExpressionExperimentMetaData (ubic.gemma.core.loader.expression.simple.model.SimpleExpressionExperimentMetaData)8 File (java.io.File)7