Search in sources :

Example 16 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk-protected by broadinstitute.

the class CreateSomaticPanelOfNormals method doWork.

public Object doWork() {
    final List<File> inputVcfs = new ArrayList<>(vcfs);
    final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
    final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
    final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
    final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
    final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();
    for (final File vcf : inputVcfs) {
        final VCFFileReader reader = new VCFFileReader(vcf, false);
        iterators.add(reader.iterator());
        final VCFHeader header = reader.getFileHeader();
        Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
        headers.add(header);
    }
    final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));
    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
    SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
    final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
    while (mergingIterator.hasNext()) {
        final VariantContext vc = mergingIterator.next();
        if (!currentPosition.overlaps(vc)) {
            processVariantsAtSamePosition(variantsAtThisPosition, writer);
            variantsAtThisPosition.clear();
            currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
        }
        variantsAtThisPosition.add(vc);
    }
    mergingIterator.close();
    writer.close();
    return "SUCCESS";
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) MergingIterator(htsjdk.samtools.util.MergingIterator) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Example 17 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk-protected by broadinstitute.

the class EvaluateCopyNumberTriStateCalls method openVCFWriter.

private VariantContextWriter openVCFWriter(final File outputFile, final Set<String> samples) {
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder();
    builder.setOutputFile(outputFile);
    builder.clearOptions();
    final VariantContextWriter result = builder.build();
    final VCFHeader header = new VCFHeader(Collections.emptySet(), samples);
    CopyNumberTriStateAllele.addHeaderLinesTo(header);
    EvaluationClass.addHeaderLinesTo(header);
    // Format annotations.
    header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.Character, "Called genotype"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALL_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Quality of the call"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of called segments that overlap with the truth"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Called allele count for mixed calls"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, 1, VCFHeaderLineType.Float, "Truth copy fraction estimated"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Truth call quality"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.EVALUATION_CLASS_KEY, 1, VCFHeaderLineType.Character, "The evaluation class for the call or lack of call. It the values of the header key '" + EvaluationClass.VCF_HEADER_KEY + "'"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, 1, VCFHeaderLineType.Character, "The truth genotype"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of targets covered by called segments"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALL_QUALITY_KEY, 1, VCFHeaderLineType.Float, "1 - The probability of th event in Phred scale (the maximum if ther are more than one segment"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "The quality of the call (the maximum if there are more than one segment"));
    header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Character, "Genotype filters"));
    // Info annotations.
    header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "The frequency of the alternative alleles in the truth callset"));
    header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of called alleles in the truth callset"));
    header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "The frequency of the alternative alleles in the actual callset"));
    header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of called alleles in the actual callset"));
    header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_TARGET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of targets overlapped by this variant"));
    header.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position for the variant"));
    // Filter annotations.
    for (final EvaluationFilter filter : EvaluationFilter.values()) {
        header.addMetaDataLine(new VCFFilterHeaderLine(filter.name(), filter.description));
        header.addMetaDataLine(new VCFFilterHeaderLine(filter.acronym, filter.description));
    }
    header.addMetaDataLine(new VCFFilterHeaderLine(EvaluationFilter.PASS, "Indicates that it passes all filters"));
    result.writeHeader(header);
    return result;
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Example 18 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class GATKToolUnitTest method testCreateVCFWriterWithOptions.

@Test(dataProvider = "createVCFWriterData")
public void testCreateVCFWriterWithOptions(final File inputFile, final String outputExtension, final String indexExtension, final boolean createIndex, final boolean createMD5) throws IOException {
    // create a writer and make sure the requested index/md5 params are honored
    final TestGATKToolWithVariants tool = new TestGATKToolWithVariants();
    final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, false);
    final VariantContextWriter writer = tool.createVCFWriter(outputFile);
    writer.close();
    final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension);
    final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5");
    Assert.assertTrue(outputFile.exists(), "No output file was not created");
    Assert.assertEquals(outFileIndex.exists(), createIndex, "The createIndex argument was not honored");
    Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored");
}
Also used : VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 19 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class GATKToolUnitTest method testCreateVCFWriterLenientTrue.

@Test(dataProvider = "createVCFWriterLenientData")
public void testCreateVCFWriterLenientTrue(final File inputFile, final String outputExtension, final String indexExtension, final boolean createIndex, final boolean createMD5) throws IOException {
    final TestGATKToolWithVariants tool = new TestGATKToolWithVariants();
    // verify lenient==true is honored by writing a bad attribute
    final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, true);
    try (VariantContextWriter writer = tool.createVCFWriter(outputFile)) {
        // write bad attribute succeed with lenient set
        writeHeaderAndBadVariant(writer);
    }
    final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension);
    final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5");
    Assert.assertTrue(outputFile.exists(), "No output file was not created");
    Assert.assertEquals(outFileIndex.exists(), createIndex, "The createIndex argument was not honored");
    Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored");
}
Also used : VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 20 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class GATKToolUnitTest method testCreateVCFWriterWithNoSequenceDictionary.

@Test(dataProvider = "createVCFWriterData")
public void testCreateVCFWriterWithNoSequenceDictionary(final File inputFile, final String outputExtension, final String indexExtension, final boolean createIndex, final boolean createMD5) throws IOException {
    // verify that a null sequence dictionary still results in a file, but with no index
    final TestGATKVariantToolWithNoSequenceDictionary tool = new TestGATKVariantToolWithNoSequenceDictionary();
    final File outputFile = setupVCFWriter(inputFile, outputExtension, tool, createIndex, createMD5, false);
    final VariantContextWriter writer = tool.createVCFWriter(outputFile);
    writer.close();
    final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension);
    final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5");
    Assert.assertTrue(outputFile.exists(), "No output file was not created");
    // always false with no seq dictionary
    Assert.assertEquals(outFileIndex.exists(), false, "An index file should not have been created");
    Assert.assertEquals(outFileMD5.exists(), createMD5, "The createMD5 argument was not honored");
}
Also used : VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Aggregations

VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)31 File (java.io.File)19 VariantContext (htsjdk.variant.variantcontext.VariantContext)13 VariantContextWriterBuilder (htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)12 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)12 Test (org.testng.annotations.Test)12 VCFHeader (htsjdk.variant.vcf.VCFHeader)9 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)8 UserException (org.broadinstitute.hellbender.exceptions.UserException)8 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)7 Options (htsjdk.variant.variantcontext.writer.Options)6 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)6 IOException (java.io.IOException)5 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)4 ReferenceSequenceFile (htsjdk.samtools.reference.ReferenceSequenceFile)4 VariantContextComparator (htsjdk.variant.variantcontext.VariantContextComparator)4 CloseableIterator (htsjdk.samtools.util.CloseableIterator)3 MergingIterator (htsjdk.samtools.util.MergingIterator)3 Function (java.util.function.Function)3 Collectors (java.util.stream.Collectors)3