use of htsjdk.samtools.SamReader in project gatk by broadinstitute.
the class SamComparisonTest method testHelper.
private void testHelper(final String f1, final String f2, final int expectedMatch, final int expectedDiffer, final int expectedUnmappedBoth, final int expectedUnmappedLeft, final int expectedUnmappedRight, final int expectedMissingLeft, final int expectedMissingRight, final boolean areEqual) throws IOException {
SamReaderFactory factory = SamReaderFactory.makeDefault();
File sam1 = new File(TEST_FILES_DIR, f1);
File sam2 = new File(TEST_FILES_DIR, f2);
try (final SamReader reader1 = factory.open(sam1);
final SamReader reader2 = factory.open(sam2)) {
final SamComparison comparison = new SamComparison(reader1, reader2);
Assert.assertEquals(areEqual, comparison.areEqual());
Assert.assertEquals(expectedMatch, comparison.getMappingsMatch());
Assert.assertEquals(expectedDiffer, comparison.getMappingsDiffer());
Assert.assertEquals(expectedUnmappedBoth, comparison.getUnmappedBoth());
Assert.assertEquals(expectedUnmappedLeft, comparison.getUnmappedLeft());
Assert.assertEquals(expectedUnmappedRight, comparison.getUnmappedRight());
Assert.assertEquals(expectedMissingLeft, comparison.getMissingLeft());
Assert.assertEquals(expectedMissingRight, comparison.getMissingRight());
}
// now reverse the comparison
try (final SamReader reader1 = factory.open(sam1);
final SamReader reader2 = factory.open(sam2)) {
final SamComparison comparison = new SamComparison(reader2, reader1);
Assert.assertEquals(areEqual, comparison.areEqual());
Assert.assertEquals(expectedMatch, comparison.getMappingsMatch());
Assert.assertEquals(expectedDiffer, comparison.getMappingsDiffer());
Assert.assertEquals(expectedUnmappedBoth, comparison.getUnmappedBoth());
Assert.assertEquals(expectedUnmappedRight, comparison.getUnmappedLeft());
Assert.assertEquals(expectedUnmappedLeft, comparison.getUnmappedRight());
Assert.assertEquals(expectedMissingRight, comparison.getMissingLeft());
Assert.assertEquals(expectedMissingLeft, comparison.getMissingRight());
}
}
use of htsjdk.samtools.SamReader in project gatk by broadinstitute.
the class ReadUtilsUnitTest method testCreateSAMWriter.
@Test(dataProvider = "createSAMWriter")
public void testCreateSAMWriter(final File bamFile, final boolean preSorted, final boolean createIndex, final boolean createMD5, final boolean expectIndex) throws Exception {
final File outputFile = createTempFile("samWriterTest", ".bam");
try (final SamReader samReader = SamReaderFactory.makeDefault().open(bamFile)) {
final SAMFileHeader header = samReader.getFileHeader();
if (expectIndex) {
// ensure test condition
Assert.assertEquals(expectIndex, header.getSortOrder() == SAMFileHeader.SortOrder.coordinate);
}
try (final SAMFileWriter samWriter = ReadUtils.createCommonSAMWriter(outputFile, null, samReader.getFileHeader(), preSorted, createIndex, createMD5)) {
final Iterator<SAMRecord> samRecIt = samReader.iterator();
while (samRecIt.hasNext()) {
samWriter.addAlignment(samRecIt.next());
}
}
}
final File md5File = new File(outputFile.getAbsolutePath() + ".md5");
if (md5File.exists()) {
md5File.deleteOnExit();
}
Assert.assertEquals(expectIndex, null != SamFiles.findIndex(outputFile));
Assert.assertEquals(createMD5, md5File.exists());
}
use of htsjdk.samtools.SamReader in project gatk-protected by broadinstitute.
the class GetHetCoverageIntegrationTest method initHeaders.
@BeforeClass
public void initHeaders() throws IOException {
try (final SamReader normalBamReader = SamReaderFactory.makeDefault().open(NORMAL_BAM_FILE);
final SamReader tumorBamReader = SamReaderFactory.makeDefault().open(TUMOR_BAM_FILE)) {
normalHeader = normalBamReader.getFileHeader();
tumorHeader = tumorBamReader.getFileHeader();
normalHetPulldownExpected = new Pulldown(normalHeader);
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
normalHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));
tumorHetPulldownExpected = new Pulldown(tumorHeader);
tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
tumorHetPulldownExpected.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));
}
}
use of htsjdk.samtools.SamReader in project hmftools by hartwigmedical.
the class Analysis method processStructuralVariant.
StructuralVariantResult processStructuralVariant(final HMFVariantContext ctx) throws IOException {
final QueryInterval[] intervals = QueryInterval.optimizeIntervals(new QueryInterval[] { new QueryInterval(ctx.MantaBP1.ReferenceIndex, Math.max(0, ctx.MantaBP1.Position + ctx.Uncertainty1.Start - range), ctx.MantaBP1.Position + ctx.Uncertainty1.End + range), new QueryInterval(ctx.MantaBP2.ReferenceIndex, Math.max(0, ctx.MantaBP2.Position + ctx.Uncertainty2.Start - range), ctx.MantaBP2.Position + ctx.Uncertainty2.End + range) });
final File TEMP_REF_BAM = queryNameSortedBAM(refReader, intervals, "ref");
final File TEMP_TUMOR_BAM = queryNameSortedBAM(tumorReader, intervals, "tumor");
final SamReader SORTED_REF_READER = SamReaderFactory.makeDefault().open(TEMP_REF_BAM);
final SamReader SORTED_TUMOR_READER = SamReaderFactory.makeDefault().open(TEMP_TUMOR_BAM);
final BreakpointResult breakpoints = determineBreakpoints(ctx, SORTED_TUMOR_READER);
final StructuralVariantResult result = new StructuralVariantResult();
result.Breakpoints = breakpoints.Breakpoints;
result.QueryIntervals = intervals;
if (breakpoints.Error != BreakpointError.NONE) {
result.Filters = Filter.getErrorFilter();
} else {
result.TumorStats = collectEvidence(ctx, SORTED_TUMOR_READER, result.Breakpoints);
result.RefStats = collectEvidence(ctx, SORTED_REF_READER, result.Breakpoints);
result.AlleleFrequency = AlleleFrequency.calculate(result.TumorStats);
// load sample clipping
SORTED_TUMOR_READER.forEach(r -> Clipping.getClips(r).forEach(c -> result.TumorStats.Sample_Clipping.add(c)));
SORTED_REF_READER.forEach(r -> Clipping.getClips(r).forEach(c -> result.RefStats.Sample_Clipping.add(c)));
result.Filters = Filter.getFilters(ctx, result.TumorStats, result.RefStats, result.Breakpoints, contamination);
// adjust for homology
final Location bp1 = result.Breakpoints.getLeft().add(ctx.OrientationBP1 > 0 ? 0 : -1);
final Location bp2;
if (!ctx.isInsert() && ctx.InsertSequence.isEmpty()) {
bp2 = result.Breakpoints.getRight().add(-ctx.OrientationBP2 * ctx.HomologySequence.length()).add(ctx.OrientationBP2 > 0 ? 0 : -1);
} else {
bp2 = result.Breakpoints.getRight().add(ctx.OrientationBP2 > 0 ? 0 : -1);
}
result.Breakpoints = Pair.of(bp1, bp2);
}
result.FilterString = result.Filters.isEmpty() ? "PASS" : String.join(";", result.Filters);
// clean up
SORTED_REF_READER.close();
SORTED_TUMOR_READER.close();
if (!TEMP_REF_BAM.delete()) {
LOGGER.error("couldn't delete {}", TEMP_REF_BAM);
}
if (!TEMP_TUMOR_BAM.delete()) {
LOGGER.error("couldn't delete {}", TEMP_TUMOR_BAM);
}
return result;
}
use of htsjdk.samtools.SamReader in project hmftools by hartwigmedical.
the class BreakPointInspectorApplication method main.
public static void main(final String... args) throws IOException {
final AnalysisBuilder analysisBuilder = new AnalysisBuilder();
final Options options = createOptions();
try {
final CommandLine cmd = createCommandLine(options, args);
final String refPath = cmd.getOptionValue(REF_PATH);
final String refSlicePath = cmd.getOptionValue(REF_SLICE);
final String tumorPath = cmd.getOptionValue(TUMOR_PATH);
final String tumorSlicePath = cmd.getOptionValue(TUMOR_SLICE);
final String vcfPath = cmd.getOptionValue(VCF);
if (cmd.hasOption(PROXIMITY)) {
analysisBuilder.setRange(Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500")));
}
if (cmd.hasOption(CONTAMINATION)) {
analysisBuilder.setContaminationFraction(Float.parseFloat(cmd.getOptionValue(CONTAMINATION, "0")));
}
if (refPath == null || tumorPath == null || vcfPath == null) {
printHelpAndExit(options);
return;
}
final File tumorBAM = new File(tumorPath);
final SamReader tumorReader = SamReaderFactory.makeDefault().open(tumorBAM);
final File refBAM = new File(refPath);
final SamReader refReader = SamReaderFactory.makeDefault().open(refBAM);
final File vcfFile = new File(vcfPath);
final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
final List<String> samples = vcfReader.getFileHeader().getGenotypeSamples();
if (samples.size() != 2) {
System.err.println("could not determine tumor and sample from VCF");
System.exit(1);
return;
}
TSVOutput.PrintHeaders();
final Analysis analysis = analysisBuilder.setRefReader(refReader).setTumorReader(tumorReader).createAnalysis();
final List<QueryInterval> combinedQueryIntervals = Lists.newArrayList();
final Map<String, VariantContext> variantMap = new HashMap<>();
final List<VariantContext> variants = Lists.newArrayList();
for (VariantContext variant : vcfReader) {
variantMap.put(variant.getID(), variant);
final VariantContext mateVariant = variant;
if (variant.hasAttribute("MATEID")) {
variant = variantMap.get(variant.getAttributeAsString("MATEID", ""));
if (variant == null) {
continue;
}
}
final String location = variant.getContig() + ":" + Integer.toString(variant.getStart());
final Location location1 = Location.parseLocationString(location, tumorReader.getFileHeader().getSequenceDictionary());
final Range uncertainty1 = extractCIPOS(variant);
final List<Integer> CIEND = variant.getAttributeAsIntList("CIEND", 0);
Range uncertainty2 = CIEND.size() == 2 ? new Range(CIEND.get(0), CIEND.get(1)) : null;
final boolean IMPRECISE = variant.hasAttribute("IMPRECISE");
HMFVariantType svType;
final Location location2;
switch(variant.getStructuralVariantType()) {
case INS:
svType = HMFVariantType.INS;
location2 = location1.set(variant.getAttributeAsInt("END", 0));
break;
case INV:
if (variant.hasAttribute("INV3")) {
svType = HMFVariantType.INV3;
} else if (variant.hasAttribute("INV5")) {
svType = HMFVariantType.INV5;
} else {
System.err.println(variant.getID() + " : expected either INV3 or INV5 flag");
continue;
}
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case DEL:
svType = HMFVariantType.DEL;
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case DUP:
svType = HMFVariantType.DUP;
location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
break;
case BND:
// process the breakend string
final String call = variant.getAlternateAllele(0).getDisplayString();
final String[] leftSplit = call.split("\\]");
final String[] rightSplit = call.split("\\[");
if (leftSplit.length >= 2) {
location2 = Location.parseLocationString(leftSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
if (leftSplit[0].length() > 0) {
svType = HMFVariantType.INV3;
uncertainty2 = Range.invert(uncertainty1);
} else {
svType = HMFVariantType.DUP;
uncertainty2 = uncertainty1;
}
} else if (rightSplit.length >= 2) {
location2 = Location.parseLocationString(rightSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
if (rightSplit[0].length() > 0) {
svType = HMFVariantType.DEL;
uncertainty2 = uncertainty1;
} else {
svType = HMFVariantType.INV5;
uncertainty2 = Range.invert(uncertainty1);
}
} else {
System.err.println(variant.getID() + " : could not parse breakpoint");
continue;
}
if (IMPRECISE) {
uncertainty2 = extractCIPOS(mateVariant);
}
break;
default:
System.err.println(variant.getID() + " : UNEXPECTED SVTYPE=" + variant.getStructuralVariantType());
continue;
}
final HMFVariantContext ctx = new HMFVariantContext(variant.getID(), location1, location2, svType, IMPRECISE);
ctx.Filter.addAll(variant.getFilters().stream().filter(s -> !s.startsWith("BPI")).collect(Collectors.toSet()));
ctx.Uncertainty1 = uncertainty1;
ctx.Uncertainty2 = ObjectUtils.firstNonNull(uncertainty2, fixup(uncertainty1, IMPRECISE, svType == HMFVariantType.INV3 || svType == HMFVariantType.INV5));
ctx.HomologySequence = variant.getAttributeAsString("HOMSEQ", "");
if (variant.hasAttribute("LEFT_SVINSSEQ") && variant.hasAttribute("RIGHT_SVINSSEQ")) {
ctx.InsertSequence = variant.getAttributeAsString("LEFT_SVINSSEQ", "") + "..." + variant.getAttributeAsString("RIGHT_SVINSSEQ", "");
} else {
ctx.InsertSequence = variant.getAttributeAsString("SVINSSEQ", "");
}
ctx.BND = variant.getStructuralVariantType() == StructuralVariantType.BND;
switch(ctx.Type) {
case INS:
case DEL:
ctx.OrientationBP1 = 1;
ctx.OrientationBP2 = -1;
break;
case INV3:
ctx.OrientationBP1 = 1;
ctx.OrientationBP2 = 1;
break;
case INV5:
ctx.OrientationBP1 = -1;
ctx.OrientationBP2 = -1;
break;
case DUP:
ctx.OrientationBP1 = -1;
ctx.OrientationBP2 = 1;
break;
}
final StructuralVariantResult result = analysis.processStructuralVariant(ctx);
combinedQueryIntervals.addAll(asList(result.QueryIntervals));
TSVOutput.print(variant, ctx, result);
final BiConsumer<VariantContext, Boolean> vcfUpdater = (v, swap) -> {
final Set<String> filters = v.getCommonInfo().getFiltersMaybeNull();
if (filters != null) {
filters.clear();
}
// we will map BreakpointError to a flag
if (result.Filters.contains(Filter.Filters.BreakpointError.toString())) {
v.getCommonInfo().putAttribute("BPI_AMBIGUOUS", true, true);
} else {
v.getCommonInfo().addFilters(result.Filters);
}
if (result.Filters.isEmpty()) {
final List<Double> af = asList(result.AlleleFrequency.getLeft(), result.AlleleFrequency.getRight());
v.getCommonInfo().putAttribute(AlleleFrequency.VCF_INFO_TAG, swap ? Lists.reverse(af) : af, true);
}
if (result.Breakpoints.getLeft() != null) {
v.getCommonInfo().putAttribute(swap ? "BPI_END" : "BPI_START", result.Breakpoints.getLeft().Position, true);
}
if (result.Breakpoints.getRight() != null) {
v.getCommonInfo().putAttribute(swap ? "BPI_START" : "BPI_END", result.Breakpoints.getRight().Position, true);
}
// remove CIPOS / CIEND when we have an insert sequence
if (!v.hasAttribute("IMPRECISE") && v.hasAttribute("SVINSSEQ")) {
v.getCommonInfo().removeAttribute("CIPOS");
v.getCommonInfo().removeAttribute("CIEND");
}
variants.add(v);
};
vcfUpdater.accept(variant, false);
if (mateVariant != variant) {
vcfUpdater.accept(mateVariant, true);
}
}
// TODO: update START, END with BPI values and save Manta values in new attributes
final String vcfOutputPath = cmd.getOptionValue(VCF_OUT);
if (vcfOutputPath != null) {
final VCFHeader header = vcfReader.getFileHeader();
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_START", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_END", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
header.addMetaDataLine(new VCFInfoHeaderLine("BPI_AMBIGUOUS", 0, VCFHeaderLineType.Flag, "BPI could not determine the breakpoints, inspect manually"));
header.addMetaDataLine(new VCFHeaderLine("bpiVersion", BreakPointInspectorApplication.class.getPackage().getImplementationVersion()));
Filter.UpdateVCFHeader(header);
AlleleFrequency.UpdateVCFHeader(header);
// setup VCF
final VariantContextWriter writer = new VariantContextWriterBuilder().setReferenceDictionary(header.getSequenceDictionary()).setOutputFile(vcfOutputPath).build();
writer.writeHeader(header);
// write variants
variants.sort(new VariantContextComparator(header.getSequenceDictionary()));
variants.forEach(writer::add);
writer.close();
}
final QueryInterval[] optimizedIntervals = QueryInterval.optimizeIntervals(combinedQueryIntervals.toArray(new QueryInterval[combinedQueryIntervals.size()]));
if (tumorSlicePath != null) {
writeToSlice(tumorSlicePath, tumorReader, optimizedIntervals);
}
if (refSlicePath != null) {
writeToSlice(refSlicePath, refReader, optimizedIntervals);
}
refReader.close();
tumorReader.close();
} catch (ParseException e) {
printHelpAndExit(options);
System.exit(1);
}
}
Aggregations