Search in sources :

Example 1 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk-protected by broadinstitute.

the class HaplotypeCallerSpark method writeVariants.

/**
     * WriteVariants, this is currently going to be horribly slow and explosive on a full size file since it performs a collect.
     *
     * This will be replaced by a parallel writer similar to what's done with {@link org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSink}
     */
private void writeVariants(JavaRDD<VariantContext> variants) {
    final List<VariantContext> collectedVariants = variants.collect();
    final SAMSequenceDictionary referenceDictionary = getReferenceSequenceDictionary();
    final List<VariantContext> sortedVariants = collectedVariants.stream().sorted((o1, o2) -> IntervalUtils.compareLocatables(o1, o2, referenceDictionary)).collect(Collectors.toList());
    final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgs, getHeaderForReads(), new ReferenceMultiSourceAdapter(getReference(), getAuthHolder()));
    try (final VariantContextWriter writer = hcEngine.makeVCFWriter(output, getBestAvailableSequenceDictionary())) {
        hcEngine.writeHeader(writer, getHeaderForReads().getSequenceDictionary(), Collections.emptySet());
        sortedVariants.forEach(writer::add);
    }
}
Also used : CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) SparkProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.SparkProgramGroup) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) Argument(org.broadinstitute.barclay.argparser.Argument) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Advanced(org.broadinstitute.barclay.argparser.Advanced) org.broadinstitute.hellbender.cmdline(org.broadinstitute.hellbender.cmdline) ArgumentCollection(org.broadinstitute.barclay.argparser.ArgumentCollection) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) Function(java.util.function.Function) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) IntervalUtils(org.broadinstitute.hellbender.utils.IntervalUtils) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) HaplotypeCallerArgumentCollection(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerArgumentCollection) SparkReadShard(org.broadinstitute.hellbender.engine.spark.SparkReadShard) StreamSupport(java.util.stream.StreamSupport) JavaRDD(org.apache.spark.api.java.JavaRDD) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) org.broadinstitute.barclay.argparser(org.broadinstitute.barclay.argparser) Broadcast(org.apache.spark.broadcast.Broadcast) OverlapDetector(htsjdk.samtools.util.OverlapDetector) Iterator(java.util.Iterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Collection(java.util.Collection) GATKSparkTool(org.broadinstitute.hellbender.engine.spark.GATKSparkTool) IOException(java.io.IOException) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) Tuple2(scala.Tuple2) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) HaplotypeCaller(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCaller) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) HaplotypeCallerEngine(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerEngine) Serializable(java.io.Serializable) org.broadinstitute.hellbender.engine(org.broadinstitute.hellbender.engine) List(java.util.List) Stream(java.util.stream.Stream) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Utils(org.broadinstitute.hellbender.utils.Utils) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) HaplotypeCallerEngine(org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerEngine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Example 2 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class GVCFWriterUnitTest method testAgainstExampleGVCF.

@Test
public void testAgainstExampleGVCF() throws IOException {
    final List<Integer> gqPartitions = Arrays.asList(1, 10, 20, 30, 40, 50, 60);
    final File comparisonFile = new File("src/test/resources/org/broadinstitute/hellbender/utils/variant/writers/small.g.vcf");
    final File outputFile = createTempFile("generated", ".g.vcf");
    final VariantContextBuilder chr = new VariantContextBuilder().chr(CHR1);
    final Allele REF_C = Allele.create("C", true);
    final Allele REF_G = Allele.create("G", true);
    final Allele C = Allele.create("C", false);
    final GenotypeBuilder block1GenotypeBuilder = new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF_C, REF_C)).DP(35).GQ(57).PL(new int[] { 0, 57, 855 });
    final VariantContextBuilder block1 = new VariantContextBuilder(null, "1", 14663, 14663, Arrays.asList(REF_C, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)).genotypes(block1GenotypeBuilder.make());
    final VariantContextBuilder block2 = new VariantContextBuilder(null, "1", 14667, 14667, Arrays.asList(REF_G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)).genotypes(new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF_G, REF_G)).DP(34).GQ(60).PL(new int[] { 0, 60, 900 }).make());
    final VariantContext snp = new VariantContextBuilder(null, "1", 14673, 14673, Arrays.asList(REF_G, C, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)).log10PError(541.77 / -10).attribute(GATKVCFConstants.BASE_QUAL_RANK_SUM_KEY, "-1.814").attribute(GATKVCFConstants.CLIPPING_RANK_SUM_KEY, "2.599").attribute(VCFConstants.DEPTH_KEY, 30).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, new int[] { 1, 0 }).attribute(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY, new double[] { 0.500, 0.00 }).attribute(VCFConstants.RMS_MAPPING_QUALITY_KEY, 26.87).attribute(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 0).attribute(GATKVCFConstants.MAP_QUAL_RANK_SUM_KEY, 0.245).attribute(GATKVCFConstants.READ_POS_RANK_SUM_KEY, 0.294).genotypes(new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF_G, C)).AD(new int[] { 7, 23, 0 }).DP(30).GQ(99).PL(new int[] { 570, 0, 212, 591, 281, 872 }).attribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, new int[] { 4, 3, 23, 0 }).make()).make();
    final GenotypeBuilder block3genotypeBuilder = new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF_G, REF_G)).DP(29).GQ(54).PL(new int[] { 0, 54, 810 });
    final VariantContextBuilder block3 = new VariantContextBuilder(null, "1", 14674, 14674, Arrays.asList(REF_G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)).genotypes(block3genotypeBuilder.make());
    try (VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputFile, null, false);
        GVCFWriter gvcfWriter = new GVCFWriter(writer, gqPartitions, HomoSapiensConstants.DEFAULT_PLOIDY)) {
        gvcfWriter.writeHeader(getMinimalVCFHeader());
        // add with different DP's so that the DP and MIN_DP are correct
        gvcfWriter.add(block1.genotypes(block1GenotypeBuilder.DP(35).make()).make());
        gvcfWriter.add(block1.start(14664).stop(14664).genotypes(block1GenotypeBuilder.DP(35).make()).make());
        gvcfWriter.add(block1.start(14665).stop(14665).genotypes(block1GenotypeBuilder.DP(33).make()).make());
        gvcfWriter.add(block1.start(14666).stop(14666).genotypes(block1GenotypeBuilder.DP(35).make()).make());
        for (int i = 14667; i < 14673; i++) {
            gvcfWriter.add(block2.start(i).stop(i).make());
        }
        gvcfWriter.add(snp);
        //vary both DP and PL so that MIN_DP and MIN_PL are both correct
        gvcfWriter.add(block3.start(14674).stop(14674).genotypes(block3genotypeBuilder.DP(28).PL(new int[] { 1, 54, 980 }).make()).make());
        gvcfWriter.add(block3.start(14675).stop(14675).genotypes(block3genotypeBuilder.DP(29).PL(new int[] { 0, 56, 900 }).make()).make());
        gvcfWriter.add(block3.start(14676).stop(14676).genotypes(block3genotypeBuilder.DP(29).PL(new int[] { 0, 59, 810 }).make()).make());
    }
    IntegrationTestSpec.assertEqualTextFiles(outputFile, comparisonFile);
}
Also used : VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 3 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class GATKVariantContextUtilsUnitTest method testCreateVariantContextWriterNoOptions.

@Test
public void testCreateVariantContextWriterNoOptions() {
    final File tmpDir = createTempDir("createVCFTest");
    final File outFile = new File(tmpDir.getAbsolutePath(), "createVCFTest" + ".vcf");
    final VariantContextWriter vcw = GATKVariantContextUtils.createVCFWriter(outFile, null, false);
    vcw.close();
    final File outFileIndex = new File(outFile.getAbsolutePath() + ".idx");
    final File outFileMD5 = new File(outFile.getAbsolutePath() + ".md5");
    Assert.assertTrue(outFile.exists(), "No output file was not created");
    Assert.assertFalse(outFileIndex.exists(), "The createIndex argument was not honored");
    Assert.assertFalse(outFileMD5.exists(), "The createMD5 argument was not honored");
}
Also used : VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) File(java.io.File) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 4 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class HaplotypeCallerEngine method makeVCFWriter.

/**
     * Create a VCF or GVCF writer as appropriate, given our arguments
     *
     * @param outputVCF location to which the vcf should be written
     * @param readsDictionary sequence dictionary for the reads
     * @return a VCF or GVCF writer as appropriate, ready to use
     */
public VariantContextWriter makeVCFWriter(final String outputVCF, final SAMSequenceDictionary readsDictionary) {
    Utils.nonNull(outputVCF);
    Utils.nonNull(readsDictionary);
    VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(new File(outputVCF), readsDictionary, false);
    if (hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF) {
        try {
            writer = new GVCFWriter(writer, hcArgs.GVCFGQBands, hcArgs.genotypeArgs.samplePloidy);
        } catch (IllegalArgumentException e) {
            throw new CommandLineException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
        }
    }
    return writer;
}
Also used : GVCFWriter(org.broadinstitute.hellbender.utils.variant.writers.GVCFWriter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) File(java.io.File) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException)

Example 5 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class GATKToolUnitTest method testCreateVCFWriterLenientFalse.

@Test(dataProvider = "createVCFWriterLenientData", expectedExceptions = IllegalStateException.class)
public void testCreateVCFWriterLenientFalse(final File inputFile, final String outputExtension, // unused
final String indexExtension, final boolean createIndex, final boolean createMD5) throws IOException {
    // verify lenient==false is honored by writing a bad attribute
    final TestGATKToolWithVariants tool = new TestGATKToolWithVariants();
    final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, false);
    try (VariantContextWriter writer = tool.createVCFWriter(outputFile)) {
        // throws due to bad attribute
        writeHeaderAndBadVariant(writer);
    }
}
Also used : VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)132 File (java.io.File)67 VCFHeader (htsjdk.variant.vcf.VCFHeader)65 VariantContext (htsjdk.variant.variantcontext.VariantContext)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)53 ArrayList (java.util.ArrayList)41 IOException (java.io.IOException)38 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)35 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)34 HashSet (java.util.HashSet)32 List (java.util.List)29 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)28 Allele (htsjdk.variant.variantcontext.Allele)28 Collectors (java.util.stream.Collectors)27 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)26 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)26 Parameter (com.beust.jcommander.Parameter)24 Logger (com.github.lindenb.jvarkit.util.log.Logger)24 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)23 Program (com.github.lindenb.jvarkit.util.jcommander.Program)23