Search in sources :

Example 1 with SAMValidationError

use of htsjdk.samtools.SAMValidationError in project gatk by broadinstitute.

the class SamToFastq method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final Map<String, SAMRecord> firstSeenMates = new HashMap<>();
    final FastqWriterFactory factory = new FastqWriterFactory();
    factory.setCreateMd5(CREATE_MD5_FILE);
    final Map<SAMReadGroupRecord, FastqWriters> writers = generateWriters(reader.getFileHeader().getReadGroups(), factory);
    final ProgressLogger progress = new ProgressLogger(logger);
    for (final SAMRecord currentRecord : reader) {
        if (currentRecord.isSecondaryOrSupplementary() && !INCLUDE_NON_PRIMARY_ALIGNMENTS)
            continue;
        // Skip non-PF reads as necessary
        if (currentRecord.getReadFailsVendorQualityCheckFlag() && !INCLUDE_NON_PF_READS)
            continue;
        final FastqWriters fq = writers.get(currentRecord.getReadGroup());
        if (currentRecord.getReadPairedFlag()) {
            final String currentReadName = currentRecord.getReadName();
            final SAMRecord firstRecord = firstSeenMates.remove(currentReadName);
            if (firstRecord == null) {
                firstSeenMates.put(currentReadName, currentRecord);
            } else {
                assertPairedMates(firstRecord, currentRecord);
                final SAMRecord read1 = currentRecord.getFirstOfPairFlag() ? currentRecord : firstRecord;
                final SAMRecord read2 = currentRecord.getFirstOfPairFlag() ? firstRecord : currentRecord;
                writeRecord(read1, 1, fq.getFirstOfPair(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
                final FastqWriter secondOfPairWriter = fq.getSecondOfPair();
                if (secondOfPairWriter == null) {
                    throw new UserException("Input contains paired reads but no SECOND_END_FASTQ specified.");
                }
                writeRecord(read2, 2, secondOfPairWriter, READ2_TRIM, READ2_MAX_BASES_TO_WRITE);
            }
        } else {
            writeRecord(currentRecord, null, fq.getUnpaired(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
        }
        progress.record(currentRecord);
    }
    CloserUtil.close(reader);
    // Close all the fastq writers being careful to close each one only once!
    for (final FastqWriters writerMapping : new HashSet<>(writers.values())) {
        writerMapping.closeAll();
    }
    if (!firstSeenMates.isEmpty()) {
        SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.MATE_NOT_FOUND, "Found " + firstSeenMates.size() + " unpaired mates", null), VALIDATION_STRINGENCY);
    }
    return null;
}
Also used : HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SamReader(htsjdk.samtools.SamReader) SAMValidationError(htsjdk.samtools.SAMValidationError) SAMRecord(htsjdk.samtools.SAMRecord) FastqWriterFactory(htsjdk.samtools.fastq.FastqWriterFactory) FastqWriter(htsjdk.samtools.fastq.FastqWriter) UserException(org.broadinstitute.hellbender.exceptions.UserException) HashSet(java.util.HashSet)

Aggregations

SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMValidationError (htsjdk.samtools.SAMValidationError)1 SamReader (htsjdk.samtools.SamReader)1 FastqWriter (htsjdk.samtools.fastq.FastqWriter)1 FastqWriterFactory (htsjdk.samtools.fastq.FastqWriterFactory)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 UserException (org.broadinstitute.hellbender.exceptions.UserException)1 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)1