Search in sources :

Example 96 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class RemoveNearbyIndels method onTraversalStart.

@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final VCFHeader vcfHeader = new VCFHeader(inputHeader.getMetaDataInSortedOrder(), inputHeader.getGenotypeSamples());
    getDefaultToolVCFHeaderLines().forEach(vcfHeader::addMetaDataLine);
    vcfWriter = createVCFWriter(new File(outputVcf));
    vcfWriter.writeHeader(vcfHeader);
}
Also used : VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Example 97 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class AnnotateVcfWithExpectedAlleleFraction method onTraversalStart.

@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    vcfWriter = createVCFWriter(outputVcf);
    vcfWriter.writeHeader(vcfHeader);
    final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
    final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream().collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
    mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream().mapToDouble(mixingfractionsMap::get).toArray();
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) HashSet(java.util.HashSet)

Example 98 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class HaplotypeCallerEngine method writeHeader.

/**
     * Writes an appropriate VCF header, given our arguments, to the provided writer
     *
     * @param vcfWriter writer to which the header should be written
     */
public void writeHeader(final VariantContextWriter vcfWriter, final SAMSequenceDictionary sequenceDictionary, final Set<VCFHeaderLine> defaultToolHeaderLines) {
    Utils.nonNull(vcfWriter);
    final Set<VCFHeaderLine> headerInfo = new HashSet<>();
    headerInfo.addAll(defaultToolHeaderLines);
    headerInfo.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
    // all annotation fields from VariantAnnotatorEngine
    headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
    // all callers need to add these standard annotation header lines
    headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.DOWNSAMPLED_KEY));
    headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
    headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
    // all callers need to add these standard FORMAT field header lines
    VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_PL_KEY);
    if (!hcArgs.doNotRunPhysicalPhasing) {
        headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY));
        headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY));
    }
    // FILTER fields are added unconditionally as it's not always 100% certain the circumstances
    // where the filters are used.  For example, in emitting all sites the lowQual field is used
    headerInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));
    if (emitReferenceConfidence()) {
        headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
    }
    final VCFHeader vcfHeader = new VCFHeader(headerInfo, sampleSet);
    vcfHeader.setSequenceDictionary(sequenceDictionary);
    vcfWriter.writeHeader(vcfHeader);
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 99 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class VariantsSparkSinkUnitTest method assertSingleShardedWritingWorks.

private void assertSingleShardedWritingWorks(String vcf, String outputPath) throws IOException {
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
    VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
    JavaRDD<VariantContext> variants = variantsSparkSource.getParallelVariantContexts(vcf, null);
    if (variants.getNumPartitions() == 1) {
        // repartition to more than 1 partition
        variants = variants.repartition(3);
    }
    VCFHeader header = getHeader(vcf);
    VariantsSparkSink.writeVariants(ctx, outputPath, variants, header);
    JavaRDD<VariantContext> variants2 = variantsSparkSource.getParallelVariantContexts(outputPath, null);
    final List<VariantContext> writtenVariants = variants2.collect();
    VariantContextTestUtils.assertEqualVariants(readVariants(vcf), writtenVariants);
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 100 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.

the class GatherVcfs method gatherConventionally.

/** Code for gathering multiple VCFs that works regardless of input format and output format, but can be slow. */
private static void gatherConventionally(final SAMSequenceDictionary sequenceDictionary, final boolean createIndex, final List<Path> inputFiles, final File outputFile, final int cloudPrefetchBuffer) {
    final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
    if (createIndex)
        options.add(Options.INDEX_ON_THE_FLY);
    else
        options.remove(Options.INDEX_ON_THE_FLY);
    try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(outputFile).setReferenceDictionary(sequenceDictionary).setOptions(options).build()) {
        final ProgressLogger progress = new ProgressLogger(log, 10000);
        VariantContext lastContext = null;
        Path lastFile = null;
        VCFHeader firstHeader = null;
        VariantContextComparator comparator = null;
        for (final Path f : inputFiles) {
            try {
                log.debug("Gathering from file: ", f.toUri().toString());
                final FeatureReader<VariantContext> variantReader = getReaderFromVCFUri(f, cloudPrefetchBuffer);
                final PeekableIterator<VariantContext> variantIterator;
                variantIterator = new PeekableIterator<>(variantReader.iterator());
                final VCFHeader header = (VCFHeader) variantReader.getHeader();
                if (firstHeader == null) {
                    firstHeader = header;
                    out.writeHeader(firstHeader);
                    comparator = new VariantContextComparator(firstHeader.getContigLines());
                }
                if (lastContext != null && variantIterator.hasNext()) {
                    final VariantContext vc = variantIterator.peek();
                    if (comparator.compare(vc, lastContext) <= 0) {
                        throw new IllegalStateException("First variant in file " + f.toUri().toString() + " is at " + vc.getSource() + " but last variant in earlier file " + lastFile.toUri().toString() + " is at " + lastContext.getSource());
                    }
                }
                while (variantIterator.hasNext()) {
                    lastContext = variantIterator.next();
                    out.add(lastContext);
                    progress.record(lastContext.getContig(), lastContext.getStart());
                }
                lastFile = f;
                CloserUtil.close(variantIterator);
                CloserUtil.close(variantReader);
            } catch (IOException e) {
                throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e);
            }
        }
    }
}
Also used : Path(java.nio.file.Path) Options(htsjdk.variant.variantcontext.writer.Options) VariantContext(htsjdk.variant.variantcontext.VariantContext) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) UserException(org.broadinstitute.hellbender.exceptions.UserException) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Aggregations

VCFHeader (htsjdk.variant.vcf.VCFHeader)182 VariantContext (htsjdk.variant.variantcontext.VariantContext)113 File (java.io.File)93 ArrayList (java.util.ArrayList)79 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)73 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)64 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)63 HashSet (java.util.HashSet)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)58 IOException (java.io.IOException)55 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)52 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)49 Genotype (htsjdk.variant.variantcontext.Genotype)48 Allele (htsjdk.variant.variantcontext.Allele)47 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)47 List (java.util.List)44 Set (java.util.Set)38 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Collectors (java.util.stream.Collectors)34