use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk-protected by broadinstitute.
the class HaplotypeCallerSpark method writeVariants.
/**
* WriteVariants, this is currently going to be horribly slow and explosive on a full size file since it performs a collect.
*
* This will be replaced by a parallel writer similar to what's done with {@link org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSink}
*/
private void writeVariants(JavaRDD<VariantContext> variants) {
final List<VariantContext> collectedVariants = variants.collect();
final SAMSequenceDictionary referenceDictionary = getReferenceSequenceDictionary();
final List<VariantContext> sortedVariants = collectedVariants.stream().sorted((o1, o2) -> IntervalUtils.compareLocatables(o1, o2, referenceDictionary)).collect(Collectors.toList());
final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgs, getHeaderForReads(), new ReferenceMultiSourceAdapter(getReference(), getAuthHolder()));
try (final VariantContextWriter writer = hcEngine.makeVCFWriter(output, getBestAvailableSequenceDictionary())) {
hcEngine.writeHeader(writer, getHeaderForReads().getSequenceDictionary(), Collections.emptySet());
sortedVariants.forEach(writer::add);
}
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.
the class GVCFWriterUnitTest method testAgainstExampleGVCF.
@Test
public void testAgainstExampleGVCF() throws IOException {
final List<Integer> gqPartitions = Arrays.asList(1, 10, 20, 30, 40, 50, 60);
final File comparisonFile = new File("src/test/resources/org/broadinstitute/hellbender/utils/variant/writers/small.g.vcf");
final File outputFile = createTempFile("generated", ".g.vcf");
final VariantContextBuilder chr = new VariantContextBuilder().chr(CHR1);
final Allele REF_C = Allele.create("C", true);
final Allele REF_G = Allele.create("G", true);
final Allele C = Allele.create("C", false);
final GenotypeBuilder block1GenotypeBuilder = new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF_C, REF_C)).DP(35).GQ(57).PL(new int[] { 0, 57, 855 });
final VariantContextBuilder block1 = new VariantContextBuilder(null, "1", 14663, 14663, Arrays.asList(REF_C, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)).genotypes(block1GenotypeBuilder.make());
final VariantContextBuilder block2 = new VariantContextBuilder(null, "1", 14667, 14667, Arrays.asList(REF_G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)).genotypes(new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF_G, REF_G)).DP(34).GQ(60).PL(new int[] { 0, 60, 900 }).make());
final VariantContext snp = new VariantContextBuilder(null, "1", 14673, 14673, Arrays.asList(REF_G, C, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)).log10PError(541.77 / -10).attribute(GATKVCFConstants.BASE_QUAL_RANK_SUM_KEY, "-1.814").attribute(GATKVCFConstants.CLIPPING_RANK_SUM_KEY, "2.599").attribute(VCFConstants.DEPTH_KEY, 30).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, new int[] { 1, 0 }).attribute(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY, new double[] { 0.500, 0.00 }).attribute(VCFConstants.RMS_MAPPING_QUALITY_KEY, 26.87).attribute(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 0).attribute(GATKVCFConstants.MAP_QUAL_RANK_SUM_KEY, 0.245).attribute(GATKVCFConstants.READ_POS_RANK_SUM_KEY, 0.294).genotypes(new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF_G, C)).AD(new int[] { 7, 23, 0 }).DP(30).GQ(99).PL(new int[] { 570, 0, 212, 591, 281, 872 }).attribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, new int[] { 4, 3, 23, 0 }).make()).make();
final GenotypeBuilder block3genotypeBuilder = new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF_G, REF_G)).DP(29).GQ(54).PL(new int[] { 0, 54, 810 });
final VariantContextBuilder block3 = new VariantContextBuilder(null, "1", 14674, 14674, Arrays.asList(REF_G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)).genotypes(block3genotypeBuilder.make());
try (VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputFile, null, false);
GVCFWriter gvcfWriter = new GVCFWriter(writer, gqPartitions, HomoSapiensConstants.DEFAULT_PLOIDY)) {
gvcfWriter.writeHeader(getMinimalVCFHeader());
// add with different DP's so that the DP and MIN_DP are correct
gvcfWriter.add(block1.genotypes(block1GenotypeBuilder.DP(35).make()).make());
gvcfWriter.add(block1.start(14664).stop(14664).genotypes(block1GenotypeBuilder.DP(35).make()).make());
gvcfWriter.add(block1.start(14665).stop(14665).genotypes(block1GenotypeBuilder.DP(33).make()).make());
gvcfWriter.add(block1.start(14666).stop(14666).genotypes(block1GenotypeBuilder.DP(35).make()).make());
for (int i = 14667; i < 14673; i++) {
gvcfWriter.add(block2.start(i).stop(i).make());
}
gvcfWriter.add(snp);
//vary both DP and PL so that MIN_DP and MIN_PL are both correct
gvcfWriter.add(block3.start(14674).stop(14674).genotypes(block3genotypeBuilder.DP(28).PL(new int[] { 1, 54, 980 }).make()).make());
gvcfWriter.add(block3.start(14675).stop(14675).genotypes(block3genotypeBuilder.DP(29).PL(new int[] { 0, 56, 900 }).make()).make());
gvcfWriter.add(block3.start(14676).stop(14676).genotypes(block3genotypeBuilder.DP(29).PL(new int[] { 0, 59, 810 }).make()).make());
}
IntegrationTestSpec.assertEqualTextFiles(outputFile, comparisonFile);
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.
the class GATKVariantContextUtilsUnitTest method testCreateVariantContextWriterNoOptions.
@Test
public void testCreateVariantContextWriterNoOptions() {
final File tmpDir = createTempDir("createVCFTest");
final File outFile = new File(tmpDir.getAbsolutePath(), "createVCFTest" + ".vcf");
final VariantContextWriter vcw = GATKVariantContextUtils.createVCFWriter(outFile, null, false);
vcw.close();
final File outFileIndex = new File(outFile.getAbsolutePath() + ".idx");
final File outFileMD5 = new File(outFile.getAbsolutePath() + ".md5");
Assert.assertTrue(outFile.exists(), "No output file was not created");
Assert.assertFalse(outFileIndex.exists(), "The createIndex argument was not honored");
Assert.assertFalse(outFileMD5.exists(), "The createMD5 argument was not honored");
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.
the class HaplotypeCallerEngine method makeVCFWriter.
/**
* Create a VCF or GVCF writer as appropriate, given our arguments
*
* @param outputVCF location to which the vcf should be written
* @param readsDictionary sequence dictionary for the reads
* @return a VCF or GVCF writer as appropriate, ready to use
*/
public VariantContextWriter makeVCFWriter(final String outputVCF, final SAMSequenceDictionary readsDictionary) {
Utils.nonNull(outputVCF);
Utils.nonNull(readsDictionary);
VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(new File(outputVCF), readsDictionary, false);
if (hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF) {
try {
writer = new GVCFWriter(writer, hcArgs.GVCFGQBands, hcArgs.genotypeArgs.samplePloidy);
} catch (IllegalArgumentException e) {
throw new CommandLineException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
}
}
return writer;
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.
the class GATKToolUnitTest method testCreateVCFWriterLenientFalse.
@Test(dataProvider = "createVCFWriterLenientData", expectedExceptions = IllegalStateException.class)
public void testCreateVCFWriterLenientFalse(final File inputFile, final String outputExtension, // unused
final String indexExtension, final boolean createIndex, final boolean createMD5) throws IOException {
// verify lenient==false is honored by writing a bad attribute
final TestGATKToolWithVariants tool = new TestGATKToolWithVariants();
final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, false);
try (VariantContextWriter writer = tool.createVCFWriter(outputFile)) {
// throws due to bad attribute
writeHeaderAndBadVariant(writer);
}
}
Aggregations