Search in sources :

Example 21 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class GATKToolUnitTest method testCreateVCFWriterDefaults.

@Test(dataProvider = "createVCFWriterData")
public void testCreateVCFWriterDefaults(// unused
final File inputFile, final String outputExtension, final String indexExtension, // unused
final boolean createIndex, // unused
final boolean createMD5) throws IOException {
    // create a writer and make sure the default index/md5 params are honored
    final TestGATKToolWithVariants tool = createTestVariantTool(null);
    final File tmpDir = createTempDir("createVCFTest");
    final File outputFile = new File(tmpDir.getAbsolutePath(), "createVCFTest" + outputExtension);
    final VariantContextWriter writer = tool.createVCFWriter(outputFile);
    writer.close();
    final File outFileIndex = new File(outputFile.getAbsolutePath() + indexExtension);
    final File outFileMD5 = new File(outputFile.getAbsolutePath() + ".md5");
    Assert.assertTrue(outputFile.exists(), "No output file was not created");
    Assert.assertTrue(outFileIndex.exists(), "The index file was not created");
    Assert.assertFalse(outFileMD5.exists(), "An md5 file was created and should not have been");
}
Also used : VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BaseTest(org.broadinstitute.hellbender.utils.test.BaseTest) Test(org.testng.annotations.Test)

Example 22 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class SVVCFWriter method writeVariants.

private static void writeVariants(final PipelineOptions pipelineOptions, final String fileName, final List<VariantContext> variantsArrayList, final SAMSequenceDictionary referenceSequenceDictionary) {
    try (final OutputStream outputStream = new BufferedOutputStream(BucketUtils.createFile(fileName))) {
        final VariantContextWriter vcfWriter = getVariantContextWriter(outputStream, referenceSequenceDictionary);
        vcfWriter.writeHeader(getVcfHeader(referenceSequenceDictionary));
        variantsArrayList.forEach(vcfWriter::add);
        vcfWriter.close();
    } catch (final IOException e) {
        throw new GATKException("Could not create output file", e);
    }
}
Also used : OutputStream(java.io.OutputStream) BufferedOutputStream(java.io.BufferedOutputStream) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) IOException(java.io.IOException) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) BufferedOutputStream(java.io.BufferedOutputStream)

Example 23 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class GatherVcfs method gatherConventionally.

/** Code for gathering multiple VCFs that works regardless of input format and output format, but can be slow. */
private static void gatherConventionally(final SAMSequenceDictionary sequenceDictionary, final boolean createIndex, final List<Path> inputFiles, final File outputFile, final int cloudPrefetchBuffer) {
    final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
    if (createIndex)
        options.add(Options.INDEX_ON_THE_FLY);
    else
        options.remove(Options.INDEX_ON_THE_FLY);
    try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(outputFile).setReferenceDictionary(sequenceDictionary).setOptions(options).build()) {
        final ProgressLogger progress = new ProgressLogger(log, 10000);
        VariantContext lastContext = null;
        Path lastFile = null;
        VCFHeader firstHeader = null;
        VariantContextComparator comparator = null;
        for (final Path f : inputFiles) {
            try {
                log.debug("Gathering from file: ", f.toUri().toString());
                final FeatureReader<VariantContext> variantReader = getReaderFromVCFUri(f, cloudPrefetchBuffer);
                final PeekableIterator<VariantContext> variantIterator;
                variantIterator = new PeekableIterator<>(variantReader.iterator());
                final VCFHeader header = (VCFHeader) variantReader.getHeader();
                if (firstHeader == null) {
                    firstHeader = header;
                    out.writeHeader(firstHeader);
                    comparator = new VariantContextComparator(firstHeader.getContigLines());
                }
                if (lastContext != null && variantIterator.hasNext()) {
                    final VariantContext vc = variantIterator.peek();
                    if (comparator.compare(vc, lastContext) <= 0) {
                        throw new IllegalStateException("First variant in file " + f.toUri().toString() + " is at " + vc.getSource() + " but last variant in earlier file " + lastFile.toUri().toString() + " is at " + lastContext.getSource());
                    }
                }
                while (variantIterator.hasNext()) {
                    lastContext = variantIterator.next();
                    out.add(lastContext);
                    progress.record(lastContext.getContig(), lastContext.getStart());
                }
                lastFile = f;
                CloserUtil.close(variantIterator);
                CloserUtil.close(variantReader);
            } catch (IOException e) {
                throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e);
            }
        }
    }
}
Also used : Path(java.nio.file.Path) Options(htsjdk.variant.variantcontext.writer.Options) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) UserException(org.broadinstitute.hellbender.exceptions.UserException) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 24 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class LiftOverVcf method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsReadable(CHAIN);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsWritable(REJECT);
    ////////////////////////////////////////////////////////////////////////
    // Setup the inputs
    ////////////////////////////////////////////////////////////////////////
    final LiftOver liftOver = new LiftOver(CHAIN);
    final VCFFileReader in = new VCFFileReader(INPUT, false);
    logger.info("Loading up the target reference genome.");
    final ReferenceSequenceFileWalker walker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final Map<String, byte[]> refSeqs = new HashMap<>();
    for (final SAMSequenceRecord rec : walker.getSequenceDictionary().getSequences()) {
        refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex()).getBases());
    }
    CloserUtil.close(walker);
    ////////////////////////////////////////////////////////////////////////
    // Setup the outputs
    ////////////////////////////////////////////////////////////////////////
    final VCFHeader inHeader = in.getFileHeader();
    final VCFHeader outHeader = new VCFHeader(inHeader);
    outHeader.setSequenceDictionary(walker.getSequenceDictionary());
    final VariantContextWriter out = new VariantContextWriterBuilder().setOption(Options.INDEX_ON_THE_FLY).setOutputFile(OUTPUT).setReferenceDictionary(walker.getSequenceDictionary()).build();
    out.writeHeader(outHeader);
    final VariantContextWriter rejects = new VariantContextWriterBuilder().setOutputFile(REJECT).unsetOption(Options.INDEX_ON_THE_FLY).build();
    final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader());
    for (final VCFFilterHeaderLine line : FILTERS) rejectHeader.addMetaDataLine(line);
    rejects.writeHeader(rejectHeader);
    ////////////////////////////////////////////////////////////////////////
    // Read the input VCF, lift the records over and write to the sorting
    // collection.
    ////////////////////////////////////////////////////////////////////////
    long failedLiftover = 0, failedAlleleCheck = 0, total = 0;
    logger.info("Lifting variants over and sorting.");
    final SortingCollection<VariantContext> sorter = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(outHeader), outHeader.getVCFRecordComparator(), MAX_RECORDS_IN_RAM, TMP_DIR);
    ProgressLogger progress = new ProgressLogger(logger, 1000000, "read");
    for (final VariantContext ctx : in) {
        ++total;
        final Interval source = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, ctx.getContig() + ":" + ctx.getStart() + "-" + ctx.getEnd());
        final Interval target = liftOver.liftOver(source, 1.0);
        if (target == null) {
            rejects.add(new VariantContextBuilder(ctx).filter(FILTER_CANNOT_LIFTOVER).make());
            failedLiftover++;
        } else {
            // Fix the alleles if we went from positive to negative strand
            final List<Allele> alleles = new ArrayList<>();
            for (final Allele oldAllele : ctx.getAlleles()) {
                if (target.isPositiveStrand() || oldAllele.isSymbolic()) {
                    alleles.add(oldAllele);
                } else {
                    alleles.add(Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference()));
                }
            }
            // Build the new variant context
            final VariantContextBuilder builder = new VariantContextBuilder(ctx.getSource(), target.getContig(), target.getStart(), target.getEnd(), alleles);
            builder.id(ctx.getID());
            builder.attributes(ctx.getAttributes());
            builder.genotypes(ctx.getGenotypes());
            builder.filters(ctx.getFilters());
            builder.log10PError(ctx.getLog10PError());
            // Check that the reference allele still agrees with the reference sequence
            boolean mismatchesReference = false;
            for (final Allele allele : builder.getAlleles()) {
                if (allele.isReference()) {
                    final byte[] ref = refSeqs.get(target.getContig());
                    final String refString = StringUtil.bytesToString(ref, target.getStart() - 1, target.length());
                    if (!refString.equalsIgnoreCase(allele.getBaseString())) {
                        mismatchesReference = true;
                    }
                    break;
                }
            }
            if (mismatchesReference) {
                rejects.add(new VariantContextBuilder(ctx).filter(FILTER_MISMATCHING_REF_ALLELE).make());
                failedAlleleCheck++;
            } else {
                sorter.add(builder.make());
            }
        }
        progress.record(ctx.getContig(), ctx.getStart());
    }
    final NumberFormat pfmt = new DecimalFormat("0.0000%");
    final String pct = pfmt.format((failedLiftover + failedAlleleCheck) / (double) total);
    logger.info("Processed ", total, " variants.");
    logger.info(Long.toString(failedLiftover), " variants failed to liftover.");
    logger.info(Long.toString(failedAlleleCheck), " variants lifted over but had mismatching reference alleles after lift over.");
    logger.info(pct, " of variants were not successfully lifted over and written to the output.");
    rejects.close();
    in.close();
    ////////////////////////////////////////////////////////////////////////
    // Write the sorted outputs to the final output file
    ////////////////////////////////////////////////////////////////////////
    sorter.doneAdding();
    progress = new ProgressLogger(logger, 1000000, "written");
    logger.info("Writing out sorted records to final VCF.");
    for (final VariantContext ctx : sorter) {
        out.add(ctx);
        progress.record(ctx.getContig(), ctx.getStart());
    }
    out.close();
    sorter.cleanup();
    return null;
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) HashMap(java.util.HashMap) DecimalFormat(java.text.DecimalFormat) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VCFRecordCodec(htsjdk.variant.vcf.VCFRecordCodec) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) ReferenceSequenceFileWalker(htsjdk.samtools.reference.ReferenceSequenceFileWalker) VCFHeader(htsjdk.variant.vcf.VCFHeader) Allele(htsjdk.variant.variantcontext.Allele) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) NumberFormat(java.text.NumberFormat)

Example 25 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.

the class SplitVcfs method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final ProgressLogger progress = new ProgressLogger(logger, 10000);
    final VCFFileReader fileReader = new VCFFileReader(INPUT);
    final VCFHeader fileHeader = fileReader.getFileHeader();
    final SAMSequenceDictionary sequenceDictionary = SEQUENCE_DICTIONARY != null ? SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY).getSequenceDictionary() : fileHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }
    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setReferenceDictionary(sequenceDictionary).clearOptions();
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    try (final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
        final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build()) {
        snpWriter.writeHeader(fileHeader);
        indelWriter.writeHeader(fileHeader);
        int incorrectVariantCount = 0;
        final CloseableIterator<VariantContext> iterator = fileReader.iterator();
        while (iterator.hasNext()) {
            final VariantContext context = iterator.next();
            if (context.isIndel())
                indelWriter.add(context);
            else if (context.isSNP())
                snpWriter.add(context);
            else {
                if (STRICT)
                    throw new IllegalStateException("Found a record with type " + context.getType().name());
                else
                    incorrectVariantCount++;
            }
            progress.record(context.getContig(), context.getStart());
        }
        if (incorrectVariantCount > 0) {
            logger.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
        }
        CloserUtil.close(iterator);
        CloserUtil.close(fileReader);
    }
    return null;
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) UserException(org.broadinstitute.hellbender.exceptions.UserException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary)

Aggregations

VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)31 File (java.io.File)19 VariantContext (htsjdk.variant.variantcontext.VariantContext)13 VariantContextWriterBuilder (htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)12 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)12 Test (org.testng.annotations.Test)12 VCFHeader (htsjdk.variant.vcf.VCFHeader)9 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)8 UserException (org.broadinstitute.hellbender.exceptions.UserException)8 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)7 Options (htsjdk.variant.variantcontext.writer.Options)6 ProgressLogger (org.broadinstitute.hellbender.utils.runtime.ProgressLogger)6 IOException (java.io.IOException)5 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)4 ReferenceSequenceFile (htsjdk.samtools.reference.ReferenceSequenceFile)4 VariantContextComparator (htsjdk.variant.variantcontext.VariantContextComparator)4 CloseableIterator (htsjdk.samtools.util.CloseableIterator)3 MergingIterator (htsjdk.samtools.util.MergingIterator)3 Function (java.util.function.Function)3 Collectors (java.util.stream.Collectors)3