use of htsjdk.samtools.SAMRecordIterator in project gatk by broadinstitute.
the class ReorderSam method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
ReferenceSequenceFile reference = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SAMSequenceDictionary refDict = reference.getSequenceDictionary();
if (refDict == null) {
CloserUtil.close(in);
throw new UserException("No reference sequence dictionary found. Aborting. " + "You can create a sequence dictionary for the reference fasta using CreateSequenceDictionary.jar.");
}
printDictionary("SAM/BAM file", in.getFileHeader().getSequenceDictionary());
printDictionary("Reference", refDict);
Map<Integer, Integer> newOrder = buildSequenceDictionaryMap(refDict, in.getFileHeader().getSequenceDictionary());
// has to be after we create the newOrder
SAMFileHeader outHeader = ReadUtils.cloneSAMFileHeader(in.getFileHeader());
outHeader.setSequenceDictionary(refDict);
logger.info("Writing reads...");
if (in.hasIndex()) {
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, true)) {
// write the reads in contig order
for (final SAMSequenceRecord contig : refDict.getSequences()) {
final SAMRecordIterator it = in.query(contig.getSequenceName(), 0, 0, false);
writeReads(out, it, newOrder, contig.getSequenceName());
}
// don't forget the unmapped reads
writeReads(out, in.queryUnmapped(), newOrder, "unmapped");
}
} else {
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, false)) {
writeReads(out, in.iterator(), newOrder, "All reads");
}
}
// cleanup
CloserUtil.close(in);
return null;
}
use of htsjdk.samtools.SAMRecordIterator in project hmftools by hartwigmedical.
the class Analysis method queryNameSortedBAM.
private static File queryNameSortedBAM(final SamReader reader, final QueryInterval[] intervals, final String name) throws IOException {
final SAMFileHeader header = reader.getFileHeader().clone();
header.setSortOrder(SAMFileHeader.SortOrder.queryname);
final File file = File.createTempFile(name, ".bam");
final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, file);
final SAMRecordIterator iterator = reader.queryOverlapping(intervals);
while (iterator.hasNext()) {
writer.addAlignment(iterator.next());
}
iterator.close();
writer.close();
return file;
}
use of htsjdk.samtools.SAMRecordIterator in project hmftools by hartwigmedical.
the class Analysis method collectEvidence.
private SampleStats collectEvidence(final HMFVariantContext ctx, final SamReader reader, final Pair<Location, Location> breakpoints) {
final Location bp1 = breakpoints.getLeft();
final Location bp2 = breakpoints.getRight();
final SampleStats result = new SampleStats();
final Pair<Integer, Integer> ctxOrientation = Pair.of(ctx.OrientationBP1, ctx.OrientationBP2);
final List<SAMRecord> currentReads = Lists.newArrayList();
final SAMRecordIterator iterator = reader.iterator();
final boolean srOnly = ctx.isShortVariant() || ctx.isInsert();
// then go through alignments of a read pair-wise
while (iterator.hasNext() || !currentReads.isEmpty()) {
final SAMRecord record = iterator.hasNext() ? iterator.next() : null;
if (record != null) {
if (currentReads.isEmpty() || record.getReadName().equals(currentReads.get(0).getReadName())) {
currentReads.add(record);
continue;
}
}
currentReads.sort(comparator);
final PairedReads pairs = pairs(currentReads);
currentReads.clear();
if (record != null) {
currentReads.add(record);
}
boolean pr_support = false;
boolean bp1_sr_support = false;
boolean bp2_sr_support = false;
boolean bp1_pr_normal = false;
boolean bp1_sr_normal = false;
boolean bp2_pr_normal = false;
boolean bp2_sr_normal = false;
for (final Pair<SAMRecord, SAMRecord> pair : pairs) {
final boolean proper = stream(pair).anyMatch(SAMRecord::getProperPairFlag);
final boolean secondary = stream(pair).anyMatch(SAMRecord::isSecondaryOrSupplementary);
final boolean correctOrientation = orientation(pair).equals(ctxOrientation);
final boolean correctChromosome = Location.fromSAMRecord(pair.getLeft()).sameChromosomeAs(ctx.MantaBP1) && Location.fromSAMRecord(pair.getRight()).sameChromosomeAs(ctx.MantaBP2);
final int MAX_INTRA_PAIR_LENGTH = 400;
final boolean intraPairLength = (ctx.OrientationBP1 > 0 ? bp1.Position - pair.getLeft().getAlignmentEnd() : pair.getLeft().getAlignmentStart() - bp1.Position) + (ctx.OrientationBP2 > 0 ? breakpoints.getRight().Position - pair.getRight().getAlignmentEnd() : pair.getRight().getAlignmentStart() - bp2.Position) < MAX_INTRA_PAIR_LENGTH;
boolean isPairEvidence = correctOrientation && correctChromosome && intraPairLength;
if (isPairEvidence) {
final int left_outer = Location.fromSAMRecord(pair.getLeft(), ctx.OrientationBP1 > 0).compareTo(bp1);
final int right_outer = Location.fromSAMRecord(pair.getRight(), ctx.OrientationBP2 > 0).compareTo(bp2);
if (ctx.OrientationBP1 > 0) {
isPairEvidence &= left_outer < 0;
} else {
isPairEvidence &= left_outer > 0;
}
if (ctx.OrientationBP2 > 0) {
isPairEvidence &= right_outer < 0;
} else {
isPairEvidence &= right_outer > 0;
}
}
if (isPairEvidence) {
bp1_sr_support |= exactlyClipsBreakpoint(pair.getLeft(), bp1, ctx.OrientationBP1);
bp2_sr_support |= exactlyClipsBreakpoint(pair.getRight(), bp2, ctx.OrientationBP2);
if (!srOnly) {
pr_support = true;
result.PR_Evidence.add(pair);
}
}
if (proper || secondary) {
final boolean clips_bp1 = exactlyClipsBreakpoint(ctx.OrientationBP1 > 0 ? pair.getRight() : pair.getLeft(), bp1, ctx.OrientationBP1);
final boolean clips_bp2 = exactlyClipsBreakpoint(ctx.OrientationBP2 > 0 ? pair.getRight() : pair.getLeft(), bp2, ctx.OrientationBP2);
final boolean span_bp1 = span(pair, bp1);
final boolean span_bp2 = span(pair, bp2);
final boolean overlap_bp1 = overlap(pair, bp1);
final boolean overlap_bp2 = overlap(pair, bp2);
boolean addToSR = false;
if (span_bp1) {
if (clips_bp1) {
bp1_sr_support = addToSR = true;
} else if (overlap_bp1) {
bp1_pr_normal = bp1_sr_normal = addToSR = true;
} else {
bp1_pr_normal = true;
}
}
if (span_bp2) {
if (clips_bp2) {
bp2_sr_support = addToSR = true;
} else if (overlap_bp2) {
bp2_pr_normal = bp2_sr_normal = addToSR = true;
} else {
bp2_pr_normal = true;
}
}
if (addToSR) {
result.SR_Evidence.add(pair);
}
}
}
// next pair in reads
// increment read counts
final boolean sr_support = bp1_sr_support || bp2_sr_support;
if (sr_support && pr_support) {
result.BP1_Stats.PR_SR_Support++;
} else if (bp1_sr_support) {
result.BP1_Stats.SR_Only_Support++;
} else if (pr_support) {
result.BP1_Stats.PR_Only_Support++;
}
if (bp1_pr_normal && bp1_sr_normal) {
result.BP1_Stats.PR_SR_Normal++;
} else if (bp1_pr_normal && !srOnly) {
result.BP1_Stats.PR_Only_Normal++;
}
if (sr_support && pr_support) {
result.BP2_Stats.PR_SR_Support++;
} else if (bp2_sr_support) {
result.BP2_Stats.SR_Only_Support++;
} else if (pr_support) {
result.BP2_Stats.PR_Only_Support++;
}
if (bp2_pr_normal && bp2_sr_normal) {
result.BP2_Stats.PR_SR_Normal++;
} else if (bp2_pr_normal && !srOnly) {
result.BP2_Stats.PR_Only_Normal++;
}
}
// next read collection
iterator.close();
return result;
}
use of htsjdk.samtools.SAMRecordIterator in project hmftools by hartwigmedical.
the class BreakPointInspectorApplication method writeToSlice.
private static void writeToSlice(final String path, final SamReader reader, final QueryInterval[] intervals) {
final File outputBAM = new File(path);
final SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(), true, outputBAM);
final SAMRecordIterator iterator = reader.queryOverlapping(intervals);
while (iterator.hasNext()) {
writer.addAlignment(iterator.next());
}
iterator.close();
writer.close();
}
use of htsjdk.samtools.SAMRecordIterator in project polyGembler by c-zhou.
the class SamFileExtract method run.
@Override
public void run() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(bam_in));
final SAMFileHeader header = inputSam.getFileHeader();
final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
final SAMFileHeader header_out = new SAMFileHeader();
final SAMSequenceDictionary seqdic_out = new SAMSequenceDictionary();
SAMRecordIterator iter = inputSam.iterator();
File bed_file = new File(bed_in);
final Set<String> extract = new HashSet<String>();
try (BufferedReader br = new BufferedReader(new FileReader(bed_file))) {
String line;
while ((line = br.readLine()) != null) extract.add(line.split("\\s+")[0]);
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
header_out.setAttribute("VN", header.getAttribute("VN"));
header_out.setAttribute("SO", header.getAttribute("SO"));
List<SAMSequenceRecord> seqs = seqdic.getSequences();
for (SAMSequenceRecord seq : seqs) if (extract.contains(seq.getSequenceName()))
seqdic_out.addSequence(seq);
header_out.setSequenceDictionary(seqdic_out);
for (SAMReadGroupRecord rg : header.getReadGroups()) header_out.addReadGroup(rg);
for (SAMProgramRecord pg : header.getProgramRecords()) header_out.addProgramRecord(pg);
final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_out, true, new File(bam_out));
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (extract.contains(rec.getReferenceName()))
outputSam.addAlignment(rec);
}
iter.close();
try {
inputSam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
outputSam.close();
System.err.println(bam_in + " return true");
}
Aggregations