use of htsjdk.samtools.fastq.FastqWriter in project gatk by broadinstitute.
the class SamToFastq method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final Map<String, SAMRecord> firstSeenMates = new HashMap<>();
final FastqWriterFactory factory = new FastqWriterFactory();
factory.setCreateMd5(CREATE_MD5_FILE);
final Map<SAMReadGroupRecord, FastqWriters> writers = generateWriters(reader.getFileHeader().getReadGroups(), factory);
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord currentRecord : reader) {
if (currentRecord.isSecondaryOrSupplementary() && !INCLUDE_NON_PRIMARY_ALIGNMENTS)
continue;
// Skip non-PF reads as necessary
if (currentRecord.getReadFailsVendorQualityCheckFlag() && !INCLUDE_NON_PF_READS)
continue;
final FastqWriters fq = writers.get(currentRecord.getReadGroup());
if (currentRecord.getReadPairedFlag()) {
final String currentReadName = currentRecord.getReadName();
final SAMRecord firstRecord = firstSeenMates.remove(currentReadName);
if (firstRecord == null) {
firstSeenMates.put(currentReadName, currentRecord);
} else {
assertPairedMates(firstRecord, currentRecord);
final SAMRecord read1 = currentRecord.getFirstOfPairFlag() ? currentRecord : firstRecord;
final SAMRecord read2 = currentRecord.getFirstOfPairFlag() ? firstRecord : currentRecord;
writeRecord(read1, 1, fq.getFirstOfPair(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
final FastqWriter secondOfPairWriter = fq.getSecondOfPair();
if (secondOfPairWriter == null) {
throw new UserException("Input contains paired reads but no SECOND_END_FASTQ specified.");
}
writeRecord(read2, 2, secondOfPairWriter, READ2_TRIM, READ2_MAX_BASES_TO_WRITE);
}
} else {
writeRecord(currentRecord, null, fq.getUnpaired(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
}
progress.record(currentRecord);
}
CloserUtil.close(reader);
// Close all the fastq writers being careful to close each one only once!
for (final FastqWriters writerMapping : new HashSet<>(writers.values())) {
writerMapping.closeAll();
}
if (!firstSeenMates.isEmpty()) {
SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.MATE_NOT_FOUND, "Found " + firstSeenMates.size() + " unpaired mates", null), VALIDATION_STRINGENCY);
}
return null;
}
use of htsjdk.samtools.fastq.FastqWriter in project gatk by broadinstitute.
the class SamToFastq method generateWriters.
/**
* Generates the writers for the given read groups or, if we are not emitting per-read-group, just returns the single set of writers.
*/
private Map<SAMReadGroupRecord, FastqWriters> generateWriters(final List<SAMReadGroupRecord> samReadGroupRecords, final FastqWriterFactory factory) {
final Map<SAMReadGroupRecord, FastqWriters> writerMap = new HashMap<>();
final FastqWriters fastqWriters;
if (!OUTPUT_PER_RG) {
IOUtil.assertFileIsWritable(FASTQ);
final FastqWriter firstOfPairWriter = factory.newWriter(FASTQ);
final FastqWriter secondOfPairWriter;
if (INTERLEAVE) {
secondOfPairWriter = firstOfPairWriter;
} else if (SECOND_END_FASTQ != null) {
IOUtil.assertFileIsWritable(SECOND_END_FASTQ);
secondOfPairWriter = factory.newWriter(SECOND_END_FASTQ);
} else {
secondOfPairWriter = null;
}
/** Prepare the writer that will accept unpaired reads. If we're emitting a single fastq - and assuming single-ended reads -
* then this is simply that one fastq writer. Otherwise, if we're doing paired-end, we emit to a third new writer, since
* the other two fastqs are accepting only paired end reads. */
final FastqWriter unpairedWriter = UNPAIRED_FASTQ == null ? firstOfPairWriter : factory.newWriter(UNPAIRED_FASTQ);
fastqWriters = new FastqWriters(firstOfPairWriter, secondOfPairWriter, unpairedWriter);
// For all read groups we may find in the bam, register this single set of writers for them.
writerMap.put(null, fastqWriters);
for (final SAMReadGroupRecord rg : samReadGroupRecords) {
writerMap.put(rg, fastqWriters);
}
} else {
// When we're creating a fastq-group per readgroup, by convention we do not emit a special fastq for unpaired reads.
for (final SAMReadGroupRecord rg : samReadGroupRecords) {
final FastqWriter firstOfPairWriter = factory.newWriter(makeReadGroupFile(rg, "_1"));
// Create this writer on-the-fly; if we find no second-of-pair reads, don't bother making a writer (or delegating,
// if we're interleaving).
final Lazy<FastqWriter> lazySecondOfPairWriter = new Lazy<>(() -> INTERLEAVE ? firstOfPairWriter : factory.newWriter(makeReadGroupFile(rg, "_2")));
writerMap.put(rg, new FastqWriters(firstOfPairWriter, lazySecondOfPairWriter, firstOfPairWriter));
}
}
return writerMap;
}
Aggregations