Search in sources :

Example 1 with FastqWriter

use of htsjdk.samtools.fastq.FastqWriter in project gatk by broadinstitute.

the class SamToFastq method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final Map<String, SAMRecord> firstSeenMates = new HashMap<>();
    final FastqWriterFactory factory = new FastqWriterFactory();
    factory.setCreateMd5(CREATE_MD5_FILE);
    final Map<SAMReadGroupRecord, FastqWriters> writers = generateWriters(reader.getFileHeader().getReadGroups(), factory);
    final ProgressLogger progress = new ProgressLogger(logger);
    for (final SAMRecord currentRecord : reader) {
        if (currentRecord.isSecondaryOrSupplementary() && !INCLUDE_NON_PRIMARY_ALIGNMENTS)
            continue;
        // Skip non-PF reads as necessary
        if (currentRecord.getReadFailsVendorQualityCheckFlag() && !INCLUDE_NON_PF_READS)
            continue;
        final FastqWriters fq = writers.get(currentRecord.getReadGroup());
        if (currentRecord.getReadPairedFlag()) {
            final String currentReadName = currentRecord.getReadName();
            final SAMRecord firstRecord = firstSeenMates.remove(currentReadName);
            if (firstRecord == null) {
                firstSeenMates.put(currentReadName, currentRecord);
            } else {
                assertPairedMates(firstRecord, currentRecord);
                final SAMRecord read1 = currentRecord.getFirstOfPairFlag() ? currentRecord : firstRecord;
                final SAMRecord read2 = currentRecord.getFirstOfPairFlag() ? firstRecord : currentRecord;
                writeRecord(read1, 1, fq.getFirstOfPair(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
                final FastqWriter secondOfPairWriter = fq.getSecondOfPair();
                if (secondOfPairWriter == null) {
                    throw new UserException("Input contains paired reads but no SECOND_END_FASTQ specified.");
                }
                writeRecord(read2, 2, secondOfPairWriter, READ2_TRIM, READ2_MAX_BASES_TO_WRITE);
            }
        } else {
            writeRecord(currentRecord, null, fq.getUnpaired(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
        }
        progress.record(currentRecord);
    }
    CloserUtil.close(reader);
    // Close all the fastq writers being careful to close each one only once!
    for (final FastqWriters writerMapping : new HashSet<>(writers.values())) {
        writerMapping.closeAll();
    }
    if (!firstSeenMates.isEmpty()) {
        SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.MATE_NOT_FOUND, "Found " + firstSeenMates.size() + " unpaired mates", null), VALIDATION_STRINGENCY);
    }
    return null;
}
Also used : HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) SAMRecord(htsjdk.samtools.SAMRecord) FastqWriterFactory(htsjdk.samtools.fastq.FastqWriterFactory) FastqWriter(htsjdk.samtools.fastq.FastqWriter) UserException(org.broadinstitute.hellbender.exceptions.UserException) HashSet(java.util.HashSet)

Example 2 with FastqWriter

use of htsjdk.samtools.fastq.FastqWriter in project gatk by broadinstitute.

the class SamToFastq method generateWriters.

/**
     * Generates the writers for the given read groups or, if we are not emitting per-read-group, just returns the single set of writers.
     */
private Map<SAMReadGroupRecord, FastqWriters> generateWriters(final List<SAMReadGroupRecord> samReadGroupRecords, final FastqWriterFactory factory) {
    final Map<SAMReadGroupRecord, FastqWriters> writerMap = new HashMap<>();
    final FastqWriters fastqWriters;
    if (!OUTPUT_PER_RG) {
        IOUtil.assertFileIsWritable(FASTQ);
        final FastqWriter firstOfPairWriter = factory.newWriter(FASTQ);
        final FastqWriter secondOfPairWriter;
        if (INTERLEAVE) {
            secondOfPairWriter = firstOfPairWriter;
        } else if (SECOND_END_FASTQ != null) {
            IOUtil.assertFileIsWritable(SECOND_END_FASTQ);
            secondOfPairWriter = factory.newWriter(SECOND_END_FASTQ);
        } else {
            secondOfPairWriter = null;
        }
        /** Prepare the writer that will accept unpaired reads.  If we're emitting a single fastq - and assuming single-ended reads -
             * then this is simply that one fastq writer.  Otherwise, if we're doing paired-end, we emit to a third new writer, since
             * the other two fastqs are accepting only paired end reads. */
        final FastqWriter unpairedWriter = UNPAIRED_FASTQ == null ? firstOfPairWriter : factory.newWriter(UNPAIRED_FASTQ);
        fastqWriters = new FastqWriters(firstOfPairWriter, secondOfPairWriter, unpairedWriter);
        // For all read groups we may find in the bam, register this single set of writers for them.
        writerMap.put(null, fastqWriters);
        for (final SAMReadGroupRecord rg : samReadGroupRecords) {
            writerMap.put(rg, fastqWriters);
        }
    } else {
        // When we're creating a fastq-group per readgroup, by convention we do not emit a special fastq for unpaired reads.
        for (final SAMReadGroupRecord rg : samReadGroupRecords) {
            final FastqWriter firstOfPairWriter = factory.newWriter(makeReadGroupFile(rg, "_1"));
            // Create this writer on-the-fly; if we find no second-of-pair reads, don't bother making a writer (or delegating,
            // if we're interleaving).
            final Lazy<FastqWriter> lazySecondOfPairWriter = new Lazy<>(() -> INTERLEAVE ? firstOfPairWriter : factory.newWriter(makeReadGroupFile(rg, "_2")));
            writerMap.put(rg, new FastqWriters(firstOfPairWriter, lazySecondOfPairWriter, firstOfPairWriter));
        }
    }
    return writerMap;
}
Also used : HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) FastqWriter(htsjdk.samtools.fastq.FastqWriter)

Aggregations

SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)2 FastqWriter (htsjdk.samtools.fastq.FastqWriter)2 HashMap (java.util.HashMap)2 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMValidationError (htsjdk.samtools.SAMValidationError)1 SamReader (htsjdk.samtools.SamReader)1 FastqWriterFactory (htsjdk.samtools.fastq.FastqWriterFactory)1 HashSet (java.util.HashSet)1 UserException (org.broadinstitute.hellbender.exceptions.UserException)1 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)1