use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class MergeSamFiles method doWork.
/** Combines multiple SAM/BAM files into one. */
@Override
protected Object doWork() {
boolean matchedSortOrders = true;
// Open the files for reading and writing
final List<SamReader> readers = new ArrayList<>();
final List<SAMFileHeader> headers = new ArrayList<>();
{
// Used to try and reduce redundant SDs in memory
SAMSequenceDictionary dict = null;
for (final File inFile : INPUT) {
IOUtil.assertFileIsReadable(inFile);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
readers.add(in);
headers.add(in.getFileHeader());
// replace the duplicate copies with a single dictionary to reduce the memory footprint.
if (dict == null) {
dict = in.getFileHeader().getSequenceDictionary();
} else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
in.getFileHeader().setSequenceDictionary(dict);
}
matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
}
}
// If all the input sort orders match the output sort order then just merge them and
// write on the fly, otherwise setup to merge and sort before writing out the final file
IOUtil.assertFileIsWritable(OUTPUT);
final boolean presorted;
final SAMFileHeader.SortOrder headerMergerSortOrder;
final boolean mergingSamRecordIteratorAssumeSorted;
if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
logger.info("Input files are in same order as output so sorting to temp directory is not needed.");
headerMergerSortOrder = SORT_ORDER;
mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
presorted = true;
} else {
logger.info("Sorting input files using temp directory " + TMP_DIR);
headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
mergingSamRecordIteratorAssumeSorted = false;
presorted = false;
}
final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
final SAMFileHeader header = headerMerger.getMergedHeader();
for (final String comment : COMMENT) {
header.addComment(comment);
}
header.setSortOrder(SORT_ORDER);
final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
if (USE_THREADING) {
samFileWriterFactory.setUseAsyncIo(true);
}
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, presorted)) {
// Lastly loop through and write out the records
final ProgressLogger progress = new ProgressLogger(logger, PROGRESS_INTERVAL);
while (iterator.hasNext()) {
final SAMRecord record = iterator.next();
out.addAlignment(record);
progress.record(record);
}
logger.info("Finished reading inputs.");
CloserUtil.close(readers);
}
return null;
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class ReorderSam method writeReads.
/**
* Helper function that writes reads from iterator it into writer out, updating each SAMRecord along the way
* according to the newOrder mapping from dictionary index -> index. Name is used for printing only.
*/
private void writeReads(final SAMFileWriter out, final SAMRecordIterator it, final Map<Integer, Integer> newOrder, final String name) {
long counter = 0;
logger.info(" Processing " + name);
while (it.hasNext()) {
counter++;
final SAMRecord read = it.next();
final int oldRefIndex = read.getReferenceIndex();
final int oldMateIndex = read.getMateReferenceIndex();
final int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder);
read.setHeaderStrict(out.getFileHeader());
read.setReferenceIndex(newRefIndex);
final int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder);
if (oldMateIndex != -1 && newMateIndex == -1) {
// becoming unmapped
read.setMateAlignmentStart(0);
read.setMateUnmappedFlag(true);
// Set the Mate Cigar String to null
read.setAttribute(SAMTag.MC.name(), null);
}
read.setMateReferenceIndex(newMateIndex);
out.addAlignment(read);
}
it.close();
logger.info("Wrote " + counter + " reads");
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class RevertSam method fetchByReadName.
/**
* Generates a list by consuming from the iterator in order starting with the first available
* read and continuing while subsequent reads share the same read name. If there are no reads
* remaining returns an empty list.
*/
private List<SAMRecord> fetchByReadName(final PeekableIterator<SAMRecord> iterator) {
final List<SAMRecord> out = new LinkedList<>();
if (iterator.hasNext()) {
final SAMRecord first = iterator.next();
out.add(first);
while (iterator.hasNext() && iterator.peek().getReadName().equals(first.getReadName())) {
out.add(iterator.next());
}
}
return out;
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class HaplotypeBAMWriter method writeHaplotype.
/**
* Write out a representation of this haplotype as a read
*
* @param haplotype a haplotype to write out, must not be null
* @param paddedRefLoc the reference location, must not be null
* @param isAmongBestHaplotypes true if among the best haplotypes, false if it was just one possible haplotype
*/
private void writeHaplotype(final Haplotype haplotype, final Locatable paddedRefLoc, final boolean isAmongBestHaplotypes) {
Utils.nonNull(haplotype, "haplotype cannot be null");
Utils.nonNull(paddedRefLoc, "paddedRefLoc cannot be null");
final SAMRecord record = new SAMRecord(output.getBAMOutputHeader());
record.setReadBases(haplotype.getBases());
record.setAlignmentStart(paddedRefLoc.getStart() + haplotype.getAlignmentStartHapwrtRef());
// Use a base quality value "!" for it's display value (quality value is not meaningful)
record.setBaseQualities(Utils.dupBytes((byte) '!', haplotype.getBases().length));
record.setCigar(AlignmentUtils.consolidateCigar(haplotype.getCigar()));
record.setMappingQuality(isAmongBestHaplotypes ? bestHaplotypeMQ : otherMQ);
record.setReadName(output.getHaplotypeSampleTag() + uniqueNameCounter++);
record.setAttribute(output.getHaplotypeSampleTag(), haplotype.hashCode());
record.setReadUnmappedFlag(false);
record.setReferenceIndex(output.getBAMOutputHeader().getSequenceIndex(paddedRefLoc.getContig()));
record.setAttribute(SAMTag.RG.toString(), output.getHaplotypeReadGroupID());
record.setFlags(SAMFlag.READ_REVERSE_STRAND.intValue());
output.add(new SAMRecordToGATKReadAdapter(record));
}
use of htsjdk.samtools.SAMRecord in project gatk by broadinstitute.
the class FilterReadsIntegrationTest method getReadCounts.
private int getReadCounts(final String resultFileName, final String referenceFileName) {
final File path = new File(resultFileName);
IOUtil.assertFileIsReadable(path);
final File refFile = null == referenceFileName ? null : new File(TEST_DATA_DIR, referenceFileName);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(refFile).open(path);
int count = 0;
for (@SuppressWarnings("unused") final SAMRecord rec : in) {
count++;
}
CloserUtil.close(in);
return count;
}
Aggregations