Search in sources :

Example 1 with StopWatch

use of htsjdk.samtools.util.StopWatch in project pgs-calc by lukfor.

the class ApplyScoreCommand method call.

public Integer call() throws Exception {
    if (noAnsi) {
        TaskService.setAnimated(false);
        TaskService.setAnsiColors(false);
    }
    if (vcfs == null || vcfs.isEmpty()) {
        System.out.println();
        System.out.println("Please provide at least one VCF file.");
        System.out.println();
        return 1;
    }
    System.out.println();
    System.out.println("Input:");
    System.out.println("  ref: " + ref);
    System.out.println("  out: " + out);
    System.out.println("  genotypes: " + genotypeFormat);
    System.out.println("  minR2: " + minR2);
    if (chunk != null) {
        System.out.println("  Chunk: " + chunk.getStart() + " - " + chunk.getEnd());
    }
    System.out.println("  vcfs (" + vcfs.size() + "):");
    for (String vcf : vcfs) {
        System.out.println("   - " + vcf);
    }
    System.out.println();
    StopWatch watch = new StopWatch();
    watch.start();
    String outParent = new File(out).getParent();
    String tempFolder = FileUtil.path(outParent, "temp");
    File tempFolderFile = new File(tempFolder);
    tempFolderFile.mkdirs();
    List<ApplyScoreTask> tasks = new Vector<ApplyScoreTask>();
    for (String vcf : vcfs) {
        ApplyScoreTask task = new ApplyScoreTask();
        String[] refs = parseRef(ref);
        task.setRiskScoreFilenames(refs);
        for (String file : refs) {
            String autoFormat = file + ".format";
            if (new File(autoFormat).exists()) {
                task.setRiskScoreFormat(file, RiskScoreFormat.MAPPING_FILE);
            }
        }
        if (chunk != null) {
            task.setChunk(chunk);
        }
        String taskPrefix = FileUtil.path(tempFolder, "task_" + tasks.size());
        if (dbsnp != null) {
            task.setDbSnp(dbsnp);
        }
        task.setVcfFilename(vcf);
        task.setMinR2(minR2);
        task.setGenotypeFormat(genotypeFormat);
        task.setOutputVariantFilename(outputVariantFilename);
        if (outputEffectsFilename != null) {
            task.setOutputEffectsFilename(taskPrefix + ".effects.txt");
        }
        task.setIncludeVariantFilename(includeVariantFilename);
        task.setIncludeSamplesFilename(includeSamplesFilename);
        task.setOutput(taskPrefix + ".scores.txt");
        tasks.add(task);
    }
    TaskService.setThreads(threads);
    List<Task> results = TaskService.monitor(App.STYLE_LONG_TASK).run(tasks);
    if (isFailed(results)) {
        cleanUp();
        return 1;
    }
    System.out.println();
    MergeScoreTask mergeScore = new MergeScoreTask();
    mergeScore.setInputs(tasks);
    mergeScore.setOutput(out);
    if (isFailed(TaskService.monitor(App.STYLE_SHORT_TASK).run(mergeScore))) {
        cleanUp();
        return 1;
    }
    if (outputEffectsFilename != null) {
        MergeEffectsTask mergeEffectsTask = new MergeEffectsTask();
        mergeEffectsTask.setInputs(tasks);
        mergeEffectsTask.setOutput(outputEffectsFilename);
        if (isFailed(TaskService.monitor(App.STYLE_SHORT_TASK).run(mergeEffectsTask))) {
            cleanUp();
            return 1;
        }
    }
    MergeReportTask mergeReport = new MergeReportTask();
    mergeReport.setInputs(tasks);
    mergeReport.setOutput(reportJson);
    if (isFailed(TaskService.monitor(App.STYLE_SHORT_TASK).run(mergeReport))) {
        cleanUp();
        return 1;
    }
    ReportFile report = mergeReport.getResult();
    if (reportHtml != null) {
        if (meta != null) {
            MetaFile metaFile = MetaFile.load(meta);
            report.mergeWithMeta(metaFile);
        }
        OutputFile data = new OutputFile(out);
        CreateHtmlReportTask htmlReportTask = new CreateHtmlReportTask();
        htmlReportTask.setReport(report);
        htmlReportTask.setData(data);
        htmlReportTask.setOutput(reportHtml);
        if (isFailed(TaskService.monitor(App.STYLE_SHORT_TASK).run(htmlReportTask))) {
            cleanUp();
            return 1;
        }
    }
    System.out.println();
    System.out.println("Execution Time: " + formatTime(watch.getElapsedTimeSecs()));
    System.out.println();
    watch.stop();
    cleanUp();
    return 0;
}
Also used : OutputFile(genepi.riskscore.io.OutputFile) MergeEffectsTask(genepi.riskscore.tasks.MergeEffectsTask) MergeReportTask(genepi.riskscore.tasks.MergeReportTask) Task(lukfor.progress.tasks.Task) ApplyScoreTask(genepi.riskscore.tasks.ApplyScoreTask) CreateHtmlReportTask(genepi.riskscore.tasks.CreateHtmlReportTask) MergeScoreTask(genepi.riskscore.tasks.MergeScoreTask) MergeScoreTask(genepi.riskscore.tasks.MergeScoreTask) ApplyScoreTask(genepi.riskscore.tasks.ApplyScoreTask) MergeReportTask(genepi.riskscore.tasks.MergeReportTask) StopWatch(htsjdk.samtools.util.StopWatch) MergeEffectsTask(genepi.riskscore.tasks.MergeEffectsTask) ReportFile(genepi.riskscore.io.ReportFile) MetaFile(genepi.riskscore.io.MetaFile) PGSCatalogIDFile(genepi.riskscore.io.PGSCatalogIDFile) ReportFile(genepi.riskscore.io.ReportFile) MetaFile(genepi.riskscore.io.MetaFile) OutputFile(genepi.riskscore.io.OutputFile) File(java.io.File) Vector(java.util.Vector) CreateHtmlReportTask(genepi.riskscore.tasks.CreateHtmlReportTask)

Example 2 with StopWatch

use of htsjdk.samtools.util.StopWatch in project jvarkit by lindenb.

the class MakeMiniBam method doWork.

@Override
public int doWork(final List<String> args) {
    ArchiveFactory archive = null;
    int id_generator = 0;
    final Set<String> outputFileNames = new HashSet<>();
    try {
        if (StringUtils.isBlank(this.filePrefix) || this.filePrefix.equals("now")) {
            final SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMdd");
            this.filePrefix = simpleDateFormat.format(new Date()) + ".";
        }
        if (!this.filePrefix.endsWith(".")) {
            this.filePrefix += ".";
        }
        IOUtil.assertDirectoryIsWritable(tmpDir);
        final List<Path> bamFiles = IOUtils.unrollPaths(args);
        if (bamFiles.isEmpty()) {
            LOG.error("no bam file defined");
            return -1;
        }
        final List<Locatable> locatables = this.intervalListProvider.enableBreakEndInterval(!disable_sv_bnd).enableSinglePoint().stream().collect(Collectors.toList());
        if (locatables.isEmpty()) {
            LOG.error("no position defined");
            return -1;
        }
        final SAMFileWriterFactory swf = new SAMFileWriterFactory();
        swf.setCompressionLevel(9);
        swf.setCreateIndex(true);
        final SamReaderFactory srf = super.createSamReaderFactory();
        if (this.referencePath != null)
            srf.referenceSequence(this.referencePath);
        archive = ArchiveFactory.open(this.outputFile);
        archive.setCompressionLevel(Deflater.NO_COMPRESSION);
        for (final Path bamFile : bamFiles) {
            LOG.info(bamFile.toString());
            final StopWatch stopWatch = new StopWatch();
            stopWatch.start();
            final SamReader sr = srf.open(bamFile);
            if (!sr.hasIndex()) {
                sr.close();
                LOG.error("file " + bamFile + " is not indexed.");
                return -1;
            }
            final SAMFileHeader header = sr.getFileHeader();
            if (!header.getSortOrder().equals(SortOrder.coordinate)) {
                sr.close();
                LOG.error("file " + bamFile + " is not sorted on coordinate.");
                return -1;
            }
            final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
            final Optional<String> dictLabel = SequenceDictionaryUtils.getBuildName(dict);
            final String labelSuffix = (dictLabel.isPresent() ? "." + dictLabel.get() : "") + (locatables.size() == 1 ? "." + locatables.get(0).getContig() + "_" + locatables.get(0).getStart() + (locatables.get(0).getStart() == locatables.get(0).getEnd() ? "" : "_" + locatables.get(0).getEnd()) : "");
            final ContigNameConverter ctgConvert = ContigNameConverter.fromOneDictionary(dict);
            final QueryInterval[] array = locatables.stream().flatMap(loc -> {
                if (this.bound_edge < 1 || loc.getLengthOnReference() <= this.bound_edge) {
                    return Collections.singletonList(loc).stream();
                }
                return Arrays.asList((Locatable) new SimpleInterval(loc.getContig(), loc.getStart(), loc.getStart()), (Locatable) new SimpleInterval(loc.getContig(), loc.getEnd(), loc.getEnd())).stream();
            }).map(LOC -> {
                final String contig = ctgConvert.apply(LOC.getContig());
                if (StringUtils.isBlank(contig)) {
                    LOG.warn("Cannot find " + LOC.getContig() + " in " + bamFile);
                    return null;
                }
                final SAMSequenceRecord ssr = dict.getSequence(contig);
                if (LOC.getStart() > ssr.getSequenceLength()) {
                    LOG.warn("pos " + LOC + " is greater than chromosome size " + ssr.getSequenceLength() + " in " + bamFile);
                    return null;
                }
                return new QueryInterval(ssr.getSequenceIndex(), Math.max(1, LOC.getStart() - extend), Math.min(ssr.getSequenceLength(), LOC.getEnd() + extend));
            }).filter(Q -> Q != null).collect(HtsCollectors.optimizedQueryIntervals());
            final Path tmpBam = Files.createTempFile(this.tmpDir, "tmp.", ".bam");
            final SAMFileHeader header2 = header.clone();
            final SAMProgramRecord prg = header2.createProgramRecord();
            prg.setProgramName(this.getProgramName());
            prg.setProgramVersion(this.getGitHash());
            prg.setCommandLine(this.getProgramCommandLine());
            JVarkitVersion.getInstance().addMetaData(this, header2);
            header2.addComment("MiniBam : Bam was " + bamFile);
            try (SAMFileWriter sfw = swf.makeBAMWriter(header2, true, tmpBam)) {
                if (array.length > 0) {
                    try (CloseableIterator<SAMRecord> ssr = sr.query(array, false)) {
                        while (ssr.hasNext()) {
                            final SAMRecord rec = ssr.next();
                            if (this.samRecordFilter.filterOut(rec))
                                continue;
                            rec.setAttribute(SAMTag.PG.name(), prg.getId());
                            sfw.addAlignment(rec);
                        }
                    }
                }
            }
            final Path tmpBai = SamFiles.findIndex(tmpBam);
            if (!Files.exists(tmpBai)) {
                LOG.error("Cannot find tmp bai Index for " + bamFile + " " + tmpBam);
                return -1;
            }
            final String sampleName1 = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(null);
            final String sampleName;
            if (!StringUtils.isBlank(sampleName1)) {
                sampleName = sampleName1;
            } else if (this.allow_no_sample) {
                sampleName = IOUtils.getFilenameWithoutSuffix(bamFile, 1);
                LOG.warn("No Read group in " + bamFile + " using filename : " + sampleName);
            } else {
                throw new IllegalArgumentException("No Sample found in " + bamFile + ". Use --no-samples option ?");
            }
            String filename = this.filePrefix + sampleName + labelSuffix;
            while (outputFileNames.contains(filename)) {
                filename = this.filePrefix + sampleName + "." + (id_generator++) + labelSuffix;
            }
            outputFileNames.add(filename);
            archive.copyTo(tmpBam, filename + FileExtensions.BAM);
            archive.copyTo(tmpBai, filename + IOUtils.getFileSuffix(tmpBai));
            stopWatch.stop();
            LOG.info("Added " + StringUtils.niceFileSize(Files.size(tmpBam)) + "(bam) and " + StringUtils.niceFileSize(Files.size(tmpBai)) + " (Bai). " + StringUtils.niceDuration(stopWatch.getElapsedTime()));
            Files.deleteIfExists(tmpBam);
            Files.deleteIfExists(tmpBai);
        }
        if (!StringUtils.isBlank(this.userComment)) {
            try (final PrintWriter pw = archive.openWriter(this.filePrefix + (this.filePrefix.endsWith(".") ? "" : ".") + "README.md")) {
                pw.append(this.userComment);
                pw.println();
                pw.println("## BAMS");
                pw.println();
                for (final Path bamFile : bamFiles) pw.println("  * " + bamFile);
                pw.println();
                pw.println("## Date");
                pw.println();
                pw.println(new SimpleDateFormat("yyyyMMdd").format(new Date()));
                pw.flush();
            }
        }
        archive.close();
        archive = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(archive);
    }
}
Also used : Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) Date(java.util.Date) SamFiles(htsjdk.samtools.SamFiles) IntervalListProvider(com.github.lindenb.jvarkit.samtools.util.IntervalListProvider) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SortOrder(htsjdk.samtools.SAMFileHeader.SortOrder) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SAMFileWriter(htsjdk.samtools.SAMFileWriter) Deflater(java.util.zip.Deflater) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) FileExtensions(htsjdk.samtools.util.FileExtensions) Optional(java.util.Optional) SamReaderFactory(htsjdk.samtools.SamReaderFactory) CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) SimpleDateFormat(java.text.SimpleDateFormat) HtsCollectors(com.github.lindenb.jvarkit.stream.HtsCollectors) HashSet(java.util.HashSet) SAMTag(htsjdk.samtools.SAMTag) StopWatch(htsjdk.samtools.util.StopWatch) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Locatable(htsjdk.samtools.util.Locatable) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) SamReader(htsjdk.samtools.SamReader) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) QueryInterval(htsjdk.samtools.QueryInterval) SamRecordFilterFactory(com.github.lindenb.jvarkit.util.bio.samfilter.SamRecordFilterFactory) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet) PrintWriter(java.io.PrintWriter) Path(java.nio.file.Path) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) Date(java.util.Date) StopWatch(htsjdk.samtools.util.StopWatch) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SimpleDateFormat(java.text.SimpleDateFormat) Locatable(htsjdk.samtools.util.Locatable)

Aggregations

StopWatch (htsjdk.samtools.util.StopWatch)2 Parameter (com.beust.jcommander.Parameter)1 ArchiveFactory (com.github.lindenb.jvarkit.io.ArchiveFactory)1 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)1 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)1 IntervalListProvider (com.github.lindenb.jvarkit.samtools.util.IntervalListProvider)1 SimpleInterval (com.github.lindenb.jvarkit.samtools.util.SimpleInterval)1 HtsCollectors (com.github.lindenb.jvarkit.stream.HtsCollectors)1 JVarkitVersion (com.github.lindenb.jvarkit.util.JVarkitVersion)1 DistanceParser (com.github.lindenb.jvarkit.util.bio.DistanceParser)1 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)1 ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)1 SamRecordFilterFactory (com.github.lindenb.jvarkit.util.bio.samfilter.SamRecordFilterFactory)1 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)1 NoSplitter (com.github.lindenb.jvarkit.util.jcommander.NoSplitter)1 Program (com.github.lindenb.jvarkit.util.jcommander.Program)1 Logger (com.github.lindenb.jvarkit.util.log.Logger)1 MetaFile (genepi.riskscore.io.MetaFile)1 OutputFile (genepi.riskscore.io.OutputFile)1 PGSCatalogIDFile (genepi.riskscore.io.PGSCatalogIDFile)1