Search in sources :

Example 6 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class FastqToSam method doUnpaired.

/** Creates a simple SAM file from a single fastq file. */
protected int doUnpaired(final FastqReader freader, final SAMFileWriter writer) {
    int readCount = 0;
    final ProgressLogger progress = new ProgressLogger(LOG);
    for (; freader.hasNext(); readCount++) {
        final FastqRecord frec = freader.next();
        final SAMRecord srec = createSamRecord(writer.getFileHeader(), getReadName(frec.getReadName(), false), frec, false);
        srec.setReadPairedFlag(false);
        writer.addAlignment(srec);
        progress.record(srec);
    }
    writer.close();
    return readCount;
}
Also used : FastqRecord(htsjdk.samtools.fastq.FastqRecord) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger)

Example 7 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class FixMateInformation method doWork.

@Override
protected Object doWork() {
    // Open up the input
    boolean allQueryNameSorted = true;
    final List<SamReader> readers = new ArrayList<>();
    for (final File f : INPUT) {
        IOUtil.assertFileIsReadable(f);
        final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(f);
        readers.add(reader);
        if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.queryname)
            allQueryNameSorted = false;
    }
    // or into a temporary file that will overwrite the INPUT file eventually
    if (OUTPUT != null)
        OUTPUT = OUTPUT.getAbsoluteFile();
    final boolean differentOutputSpecified = OUTPUT != null;
    if (differentOutputSpecified) {
        IOUtil.assertFileIsWritable(OUTPUT);
    } else if (INPUT.size() != 1) {
        throw new UserException("Must specify either an explicit OUTPUT file or a single INPUT file to be overridden.");
    } else {
        final File soleInput = INPUT.get(0).getAbsoluteFile();
        final File dir = soleInput.getParentFile().getAbsoluteFile();
        try {
            IOUtil.assertFileIsWritable(soleInput);
            IOUtil.assertDirectoryIsWritable(dir);
            OUTPUT = File.createTempFile(soleInput.getName() + ".being_fixed.", BamFileIoUtils.BAM_FILE_EXTENSION, dir);
        } catch (final IOException ioe) {
            throw new RuntimeIOException("Could not create tmp file in " + dir.getAbsolutePath());
        }
    }
    // Get the input records merged and sorted by query name as needed
    final PeekableIterator<SAMRecord> iterator;
    final SAMFileHeader header;
    {
        // Deal with merging if necessary
        final Iterator<SAMRecord> tmp;
        if (INPUT.size() > 1) {
            final List<SAMFileHeader> headers = new ArrayList<>(readers.size());
            for (final SamReader reader : readers) {
                headers.add(reader.getFileHeader());
            }
            final SAMFileHeader.SortOrder sortOrder = (allQueryNameSorted ? SAMFileHeader.SortOrder.queryname : SAMFileHeader.SortOrder.unsorted);
            final SamFileHeaderMerger merger = new SamFileHeaderMerger(sortOrder, headers, false);
            tmp = new MergingSamRecordIterator(merger, readers, false);
            header = merger.getMergedHeader();
        } else {
            tmp = readers.get(0).iterator();
            header = readers.get(0).getFileHeader();
        }
        // And now deal with re-sorting if necessary
        if (ASSUME_SORTED || allQueryNameSorted) {
            iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<>(tmp), ADD_MATE_CIGAR);
        } else {
            logger.info("Sorting input into queryname order.");
            final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
            while (tmp.hasNext()) {
                sorter.add(tmp.next());
            }
            iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(sorter.iterator()) {

                @Override
                public void close() {
                    super.close();
                    sorter.cleanup();
                }
            }, ADD_MATE_CIGAR);
            logger.info("Sorting by queryname complete.");
        }
        // Deal with the various sorting complications
        final SAMFileHeader.SortOrder outputSortOrder = SORT_ORDER == null ? readers.get(0).getFileHeader().getSortOrder() : SORT_ORDER;
        logger.info("Output will be sorted by " + outputSortOrder);
        header.setSortOrder(outputSortOrder);
    }
    if (CREATE_INDEX && header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new UserException("Can't CREATE_INDEX unless sort order is coordinate");
    }
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, header.getSortOrder() == SAMFileHeader.SortOrder.queryname)) {
        logger.info("Traversing query name sorted records and fixing up mate pair information.");
        final ProgressLogger progress = new ProgressLogger(logger);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        iterator.close();
        if (header.getSortOrder() == SAMFileHeader.SortOrder.queryname) {
            logger.info("Closing output file.");
        } else {
            logger.info("Finished processing reads; re-sorting output file.");
        }
    }
    // TODO throw appropriate exceptions instead of writing to log.error and returning
    if (!differentOutputSpecified) {
        logger.info("Replacing input file with fixed file.");
        final File soleInput = INPUT.get(0).getAbsoluteFile();
        final File old = new File(soleInput.getParentFile(), soleInput.getName() + ".old");
        if (!old.exists() && soleInput.renameTo(old)) {
            if (OUTPUT.renameTo(soleInput)) {
                if (!old.delete()) {
                    logger.warn("Could not delete old file: " + old.getAbsolutePath());
                    return null;
                }
                if (CREATE_INDEX) {
                    final File newIndex = new File(OUTPUT.getParent(), OUTPUT.getName().substring(0, OUTPUT.getName().length() - 4) + ".bai");
                    final File oldIndex = new File(soleInput.getParent(), soleInput.getName().substring(0, soleInput.getName().length() - 4) + ".bai");
                    if (!newIndex.renameTo(oldIndex)) {
                        logger.warn("Could not overwrite index file: " + oldIndex.getAbsolutePath());
                    }
                }
            } else {
                logger.error("Could not move new file to " + soleInput.getAbsolutePath());
                logger.error("Input file preserved as: " + old.getAbsolutePath());
                logger.error("New file preserved as: " + OUTPUT.getAbsolutePath());
                return null;
            }
        } else {
            logger.error("Could not move input file out of the way: " + soleInput.getAbsolutePath());
            if (!OUTPUT.delete()) {
                logger.error("Could not delete temporary file: " + OUTPUT.getAbsolutePath());
            }
            return null;
        }
    }
    CloserUtil.close(readers);
    return null;
}
Also used : MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SortingCollection(htsjdk.samtools.util.SortingCollection) ArrayList(java.util.ArrayList) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SamReader(htsjdk.samtools.SamReader) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) PeekableIterator(htsjdk.samtools.util.PeekableIterator) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) UserException(org.broadinstitute.hellbender.exceptions.UserException) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) SAMFileWriter(htsjdk.samtools.SAMFileWriter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) SAMRecord(htsjdk.samtools.SAMRecord) SAMRecordQueryNameComparator(htsjdk.samtools.SAMRecordQueryNameComparator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 8 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class MakeSitesOnlyVcf method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final VCFFileReader reader = new VCFFileReader(INPUT, false);
    final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
    final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    // Setup the site-only file writer
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    try (final VariantContextWriter writer = builder.build()) {
        final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
        writer.writeHeader(header);
        // Go through the input, strip the records and write them to the output
        final CloseableIterator<VariantContext> iterator = reader.iterator();
        while (iterator.hasNext()) {
            final VariantContext full = iterator.next();
            final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
            writer.add(site);
            progress.record(site.getContig(), site.getStart());
        }
        CloserUtil.close(iterator);
        CloserUtil.close(reader);
    }
    return null;
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 9 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class MergeVcfs method doWork.

@Override
protected Object doWork() {
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    final List<String> sampleList = new ArrayList<>();
    final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
    final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
    VariantContextComparator variantContextComparator = null;
    SAMSequenceDictionary sequenceDictionary = null;
    if (SEQUENCE_DICTIONARY != null) {
        sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
    }
    for (final File file : INPUT) {
        IOUtil.assertFileIsReadable(file);
        final VCFFileReader fileReader = new VCFFileReader(file, false);
        final VCFHeader fileHeader = fileReader.getFileHeader();
        if (variantContextComparator == null) {
            variantContextComparator = fileHeader.getVCFRecordComparator();
        } else {
            if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
                throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
            }
        }
        if (sequenceDictionary == null)
            sequenceDictionary = fileHeader.getSequenceDictionary();
        if (sampleList.isEmpty()) {
            sampleList.addAll(fileHeader.getSampleNamesInOrder());
        } else {
            if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
                throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
            }
        }
        headers.add(fileHeader);
        iteratorCollection.add(fileReader.iterator());
    }
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
    if (CREATE_INDEX) {
        builder.setOption(Options.INDEX_ON_THE_FLY);
    }
    try (final VariantContextWriter writer = builder.build()) {
        writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
        final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
        while (mergingIterator.hasNext()) {
            final VariantContext context = mergingIterator.next();
            writer.add(context);
            progress.record(context.getContig(), context.getStart());
        }
        CloserUtil.close(mergingIterator);
    }
    return null;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ArrayList(java.util.ArrayList) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) MergingIterator(htsjdk.samtools.util.MergingIterator) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 10 with ProgressLogger

use of org.broadinstitute.hellbender.utils.runtime.ProgressLogger in project gatk by broadinstitute.

the class SortVcf method sortInputs.

/**
     * Merge the inputs and sort them by adding each input's content to a single SortingCollection.
     * <p/>
     * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
     * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
     * MergeVcfs exists for simple merging of presorted inputs.
     *
     * @param readers      - a list of VCFFileReaders, one for each input VCF
     * @param outputHeader - The merged header whose information we intend to use in the final output file
     */
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
    final ProgressLogger readProgress = new ProgressLogger(logger, 25000, "read", "records");
    // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
    // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
    final SortingCollection<VariantContext> sorter = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(outputHeader), outputHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
    int readerCount = 1;
    for (final VCFFileReader reader : readers) {
        logger.info("Reading entries from input file " + readerCount);
        for (final VariantContext variantContext : reader) {
            sorter.add(variantContext);
            readProgress.record(variantContext.getContig(), variantContext.getStart());
        }
        reader.close();
        readerCount++;
    }
    return sorter;
}
Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VCFRecordCodec(htsjdk.variant.vcf.VCFRecordCodec)

Aggregations

ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)31 SamReader (htsjdk.samtools.SamReader)13 SAMRecord (htsjdk.samtools.SAMRecord)12 UserException (org.broadinstitute.hellbender.exceptions.UserException)11 VariantContext (htsjdk.variant.variantcontext.VariantContext)7 File (java.io.File)7 ArrayList (java.util.ArrayList)7 SAMFileWriter (htsjdk.samtools.SAMFileWriter)6 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)6 VariantContextWriterBuilder (htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)6 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)5 VCFHeader (htsjdk.variant.vcf.VCFHeader)5 HashMap (java.util.HashMap)5 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 MetricsFile (htsjdk.samtools.metrics.MetricsFile)4 ReferenceSequenceFileWalker (htsjdk.samtools.reference.ReferenceSequenceFileWalker)4 BAMRecordCodec (htsjdk.samtools.BAMRecordCodec)3 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)3 SAMRecordQueryNameComparator (htsjdk.samtools.SAMRecordQueryNameComparator)3