Search in sources :

Example 21 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class DownsampleSam method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final Random r = RANDOM_SEED == null ? new Random() : new Random(RANDOM_SEED);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    long total = 0;
    long kept = 0;
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, in.getFileHeader(), true)) {
        final Map<String, Boolean> decisions = new HashMap<>();
        final ProgressLogger progress = new ProgressLogger(logger, (int) 1e7, "Read");
        for (final SAMRecord rec : in) {
            if (rec.isSecondaryOrSupplementary())
                continue;
            ++total;
            final String key = rec.getReadName();
            final Boolean previous = decisions.remove(key);
            final boolean keeper;
            if (previous == null) {
                keeper = r.nextDouble() <= PROBABILITY;
                if (rec.getReadPairedFlag())
                    decisions.put(key, keeper);
            } else {
                keeper = previous;
            }
            if (keeper) {
                out.addAlignment(rec);
                ++kept;
            }
            progress.record(rec);
        }
    } finally {
        CloserUtil.close(in);
    }
    logger.info("Finished! Kept " + kept + " out of " + total + " reads.");
    return null;
}
Also used : Random(java.util.Random) HashMap(java.util.HashMap) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger)

Example 22 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class AddOrReplaceReadGroups method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    // create the read group we'll be using
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(readGroupId);
    rg.setLibrary(readGroupLibrary);
    rg.setPlatform(readGroupPlatform);
    rg.setSample(readGroupSampleName);
    rg.setPlatformUnit(readGroupPlatformUnit);
    if (readGroupSequencingCenter != null)
        rg.setSequencingCenter(readGroupSequencingCenter);
    if (readGroupDescription != null)
        rg.setDescription(readGroupDescription);
    if (readGroupRunDate != null)
        rg.setRunDate(readGroupRunDate);
    if (readGroupPredictedInsertSize != null)
        rg.setPredictedMedianInsertSize(readGroupPredictedInsertSize);
    if (readGroupProgramGroup != null)
        rg.setProgramGroup(readGroupProgramGroup);
    if (readGroupPlatformModel != null)
        rg.setPlatformModel(readGroupPlatformModel);
    logger.info(String.format("Created read group ID=%s PL=%s LB=%s SM=%s%n", rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample()));
    // create the new header and output file
    final SAMFileHeader inHeader = in.getFileHeader();
    final SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(inHeader);
    outHeader.setReadGroups(Arrays.asList(rg));
    if (SORT_ORDER != null)
        outHeader.setSortOrder(SORT_ORDER);
    try (final SAMFileWriter outWriter = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, outHeader.getSortOrder() == inHeader.getSortOrder())) {
        final ProgressLogger progress = new ProgressLogger(logger);
        for (final SAMRecord read : in) {
            read.setAttribute(SAMTag.RG.name(), readGroupId);
            outWriter.addAlignment(read);
            progress.record(read);
        }
    } finally {
        // cleanup
        CloserUtil.close(in);
    }
    return null;
}
Also used : ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger)

Example 23 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class FastqToSam method doPaired.

/** More complicated method that takes two fastq files and builds pairing information in the SAM. */
protected int doPaired(final FastqReader freader1, final FastqReader freader2, final SAMFileWriter writer) {
    int readCount = 0;
    final ProgressLogger progress = new ProgressLogger(LOG);
    for (; freader1.hasNext() && freader2.hasNext(); readCount++) {
        final FastqRecord frec1 = freader1.next();
        final FastqRecord frec2 = freader2.next();
        final String frec1Name = getReadName(frec1.getReadName(), true);
        final String frec2Name = getReadName(frec2.getReadName(), true);
        final String baseName = getBaseName(frec1Name, frec2Name, freader1, freader2);
        final SAMRecord srec1 = createSamRecord(writer.getFileHeader(), baseName, frec1, true);
        srec1.setFirstOfPairFlag(true);
        srec1.setSecondOfPairFlag(false);
        writer.addAlignment(srec1);
        progress.record(srec1);
        final SAMRecord srec2 = createSamRecord(writer.getFileHeader(), baseName, frec2, true);
        srec2.setFirstOfPairFlag(false);
        srec2.setSecondOfPairFlag(true);
        writer.addAlignment(srec2);
        progress.record(srec2);
    }
    writer.close();
    if (freader1.hasNext() || freader2.hasNext()) {
        throw new UserException("Input paired fastq files must be the same length");
    }
    return readCount;
}
Also used : FastqRecord(htsjdk.samtools.fastq.FastqRecord) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) UserException(org.broadinstitute.hellbender.exceptions.UserException)

Example 24 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class FilterReads method filterReads.

private void filterReads(final FilteringSamIterator filteringIterator) {
    // get OUTPUT header from INPUT and overwrite it if necessary
    final SAMFileHeader fileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT);
    final SAMFileHeader.SortOrder inputSortOrder = fileHeader.getSortOrder();
    if (SORT_ORDER != null) {
        fileHeader.setSortOrder(SORT_ORDER);
    }
    final boolean presorted = inputSortOrder.equals(fileHeader.getSortOrder());
    logger.info("Filtering [presorted=" + presorted + "] " + INPUT.getName() + " -> output=" + OUTPUT.getName() + " [sortorder=" + fileHeader.getSortOrder().name() + "]");
    final ProgressLogger progress = new ProgressLogger(logger, (int) 1e6, "Written");
    // create OUTPUT file
    try (final SAMFileWriter outputWriter = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, fileHeader, presorted)) {
        while (filteringIterator.hasNext()) {
            final SAMRecord rec = filteringIterator.next();
            outputWriter.addAlignment(rec);
            progress.record(rec);
        }
        filteringIterator.close();
    }
    logger.info(new DecimalFormat("#,###").format(progress.getCount()) + " SAMRecords written to " + OUTPUT.getName());
}
Also used : DecimalFormat(java.text.DecimalFormat) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger)

Example 25 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class MergeSamFiles method doWork.

/** Combines multiple SAM/BAM files into one. */
@Override
protected Object doWork() {
    boolean matchedSortOrders = true;
    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<>();
    final List<SAMFileHeader> headers = new ArrayList<>();
    {
        // Used to try and reduce redundant SDs in memory
        SAMSequenceDictionary dict = null;
        for (final File inFile : INPUT) {
            IOUtil.assertFileIsReadable(inFile);
            final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
            readers.add(in);
            headers.add(in.getFileHeader());
            // replace the duplicate copies with a single dictionary to reduce the memory footprint.
            if (dict == null) {
                dict = in.getFileHeader().getSequenceDictionary();
            } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
                in.getFileHeader().setSequenceDictionary(dict);
            }
            matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
        }
    }
    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;
    if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
        logger.info("Input files are in same order as output so sorting to temp directory is not needed.");
        headerMergerSortOrder = SORT_ORDER;
        mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
        presorted = true;
    } else {
        logger.info("Sorting input files using temp directory " + TMP_DIR);
        headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
        mergingSamRecordIteratorAssumeSorted = false;
        presorted = false;
    }
    final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
        header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
        samFileWriterFactory.setUseAsyncIo(true);
    }
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, presorted)) {
        // Lastly loop through and write out the records
        final ProgressLogger progress = new ProgressLogger(logger, PROGRESS_INTERVAL);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        logger.info("Finished reading inputs.");
        CloserUtil.close(readers);
    }
    return null;
}
Also used : SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Aggregations

ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)31 SamReader (htsjdk.samtools.SamReader)13 SAMRecord (htsjdk.samtools.SAMRecord)12 UserException (org.broadinstitute.hellbender.exceptions.UserException)11 VariantContext (htsjdk.variant.variantcontext.VariantContext)7 File (java.io.File)7 ArrayList (java.util.ArrayList)7 SAMFileWriter (htsjdk.samtools.SAMFileWriter)6 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)6 VariantContextWriterBuilder (htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)6 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)5 VCFHeader (htsjdk.variant.vcf.VCFHeader)5 HashMap (java.util.HashMap)5 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 MetricsFile (htsjdk.samtools.metrics.MetricsFile)4 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)4 BAMRecordCodec (htsjdk.samtools.BAMRecordCodec)3 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)3 SAMRecordQueryNameComparator (htsjdk.samtools.SAMRecordQueryNameComparator)3