use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class SATagBuilder method getArtificialReadsBasedOnSATag.
/**
* Returns a list of artificial GATKReads corresponding to the reads described by the SA tag in read.
* Fills as much information as can be inferred from the original read onto the artificial read copies
* This function is only used for testing (e.g. testGetArtificialReadsBasedOnSATag, testEmptyNMTagSupport)
*
* @param header the header to be used in constructing artificial reads
*/
public List<GATKRead> getArtificialReadsBasedOnSATag(SAMFileHeader header) {
List<GATKRead> output = new ArrayList<>(supplementaryReads.size());
GATKRead readCopy = read.copy();
readCopy.setAttribute("SA", getTag());
SAMRecord record = readCopy.convertToSAMRecord(header);
List<SAMRecord> readRecords = SAMUtils.getOtherCanonicalAlignments(record);
for (SAMRecord artificialRead : readRecords) {
output.add(new SAMRecordToGATKReadAdapter(artificialRead));
}
return output;
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class CollectRrbsMetrics method doWork.
@Override
protected Object doWork() {
if (!METRICS_FILE_PREFIX.endsWith(".")) {
METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + ".";
}
final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);
final SamReader samReader = SamReaderFactory.makeDefault().open(INPUT);
if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new UserException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
}
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
final ProgressLogger progressLogger = new ProgressLogger(logger);
final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);
for (final SAMRecord samRecord : samReader) {
progressLogger.record(samRecord);
if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
metricsCollector.acceptRecord(samRecord, referenceSequence);
}
}
metricsCollector.finish();
final MetricsFile<RrbsMetrics, Long> rrbsMetrics = getMetricsFile();
metricsCollector.addAllLevelsToFile(rrbsMetrics);
// Using RrbsMetrics as a way to get both of the metrics objects through the MultiLevelCollector. Once
// we get it out split it apart to the two separate MetricsFiles and write them to file
final MetricsFile<RrbsSummaryMetrics, ?> summaryFile = getMetricsFile();
final MetricsFile<RrbsCpgDetailMetrics, ?> detailsFile = getMetricsFile();
for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
detailsFile.addMetric(detailMetric);
}
}
summaryFile.write(SUMMARY_OUT);
detailsFile.write(DETAILS_OUT);
if (PRODUCE_PLOT) {
final RScriptExecutor executor = new RScriptExecutor();
executor.addScript(new Resource(R_SCRIPT, CollectRrbsMetrics.class));
executor.addArgs(DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath());
executor.exec();
}
CloserUtil.close(samReader);
return null;
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class CollectTargetedMetrics method doWork.
/**
* Asserts that files are readable and writable and then fires off an
* HsMetricsCalculator instance to do the real work.
*/
@Override
protected Object doWork() {
for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (PER_TARGET_COVERAGE != null)
IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);
// Validate that the targets and baits have the same references as the reads file
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), targetIntervals.getHeader().getSequenceDictionary());
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), getProbeIntervals().getHeader().getSequenceDictionary());
ReferenceSequenceFile ref = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(), INPUT, REFERENCE_SEQUENCE);
}
final COLLECTOR collector = makeCollector(METRIC_ACCUMULATION_LEVEL, reader.getFileHeader().getReadGroups(), ref, PER_TARGET_COVERAGE, targetIntervals, getProbeIntervals(), getProbeSetName());
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord record : reader) {
collector.acceptRecord(record, null);
progress.record(record);
}
// Write the output file
final MetricsFile<METRIC, Integer> metrics = getMetricsFile();
collector.finish();
collector.addAllLevelsToFile(metrics);
metrics.write(OUTPUT);
CloserUtil.close(reader);
return null;
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class SamFormatConverter method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
try (final SAMFileWriter writer = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, reader.getFileHeader(), true)) {
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord rec : reader) {
writer.addAlignment(rec);
progress.record(rec);
}
}
CloserUtil.close(reader);
return null;
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class SamToFastq method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final Map<String, SAMRecord> firstSeenMates = new HashMap<>();
final FastqWriterFactory factory = new FastqWriterFactory();
factory.setCreateMd5(CREATE_MD5_FILE);
final Map<SAMReadGroupRecord, FastqWriters> writers = generateWriters(reader.getFileHeader().getReadGroups(), factory);
final ProgressLogger progress = new ProgressLogger(logger);
for (final SAMRecord currentRecord : reader) {
if (currentRecord.isSecondaryOrSupplementary() && !INCLUDE_NON_PRIMARY_ALIGNMENTS)
continue;
// Skip non-PF reads as necessary
if (currentRecord.getReadFailsVendorQualityCheckFlag() && !INCLUDE_NON_PF_READS)
continue;
final FastqWriters fq = writers.get(currentRecord.getReadGroup());
if (currentRecord.getReadPairedFlag()) {
final String currentReadName = currentRecord.getReadName();
final SAMRecord firstRecord = firstSeenMates.remove(currentReadName);
if (firstRecord == null) {
firstSeenMates.put(currentReadName, currentRecord);
} else {
assertPairedMates(firstRecord, currentRecord);
final SAMRecord read1 = currentRecord.getFirstOfPairFlag() ? currentRecord : firstRecord;
final SAMRecord read2 = currentRecord.getFirstOfPairFlag() ? firstRecord : currentRecord;
writeRecord(read1, 1, fq.getFirstOfPair(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
final FastqWriter secondOfPairWriter = fq.getSecondOfPair();
if (secondOfPairWriter == null) {
throw new UserException("Input contains paired reads but no SECOND_END_FASTQ specified.");
}
writeRecord(read2, 2, secondOfPairWriter, READ2_TRIM, READ2_MAX_BASES_TO_WRITE);
}
} else {
writeRecord(currentRecord, null, fq.getUnpaired(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
}
progress.record(currentRecord);
}
CloserUtil.close(reader);
// Close all the fastq writers being careful to close each one only once!
for (final FastqWriters writerMapping : new HashSet<>(writers.values())) {
writerMapping.closeAll();
}
if (!firstSeenMates.isEmpty()) {
SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.MATE_NOT_FOUND, "Found " + firstSeenMates.size() + " unpaired mates", null), VALIDATION_STRINGENCY);
}
return null;
}
Aggregations