use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.
the class RemoveNearbyIndels method onTraversalStart.
@Override
public void onTraversalStart() {
final VCFHeader inputHeader = getHeaderForVariants();
final VCFHeader vcfHeader = new VCFHeader(inputHeader.getMetaDataInSortedOrder(), inputHeader.getGenotypeSamples());
getDefaultToolVCFHeaderLines().forEach(vcfHeader::addMetaDataLine);
vcfWriter = createVCFWriter(new File(outputVcf));
vcfWriter.writeHeader(vcfHeader);
}
use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.
the class AnnotateVcfWithExpectedAlleleFraction method onTraversalStart.
@Override
public void onTraversalStart() {
final VCFHeader inputHeader = getHeaderForVariants();
final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
headerLines.addAll(getDefaultToolVCFHeaderLines());
vcfWriter = createVCFWriter(outputVcf);
vcfWriter.writeHeader(vcfHeader);
final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream().collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream().mapToDouble(mixingfractionsMap::get).toArray();
}
use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.
the class HaplotypeCallerEngine method writeHeader.
/**
* Writes an appropriate VCF header, given our arguments, to the provided writer
*
* @param vcfWriter writer to which the header should be written
*/
public void writeHeader(final VariantContextWriter vcfWriter, final SAMSequenceDictionary sequenceDictionary, final Set<VCFHeaderLine> defaultToolHeaderLines) {
Utils.nonNull(vcfWriter);
final Set<VCFHeaderLine> headerInfo = new HashSet<>();
headerInfo.addAll(defaultToolHeaderLines);
headerInfo.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
// all annotation fields from VariantAnnotatorEngine
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
// all callers need to add these standard annotation header lines
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.DOWNSAMPLED_KEY));
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
// all callers need to add these standard FORMAT field header lines
VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_PL_KEY);
if (!hcArgs.doNotRunPhysicalPhasing) {
headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY));
headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY));
}
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
// where the filters are used. For example, in emitting all sites the lowQual field is used
headerInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));
if (emitReferenceConfidence()) {
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
}
final VCFHeader vcfHeader = new VCFHeader(headerInfo, sampleSet);
vcfHeader.setSequenceDictionary(sequenceDictionary);
vcfWriter.writeHeader(vcfHeader);
}
use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.
the class VariantsSparkSinkUnitTest method assertSingleShardedWritingWorks.
private void assertSingleShardedWritingWorks(String vcf, String outputPath) throws IOException {
JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
VariantsSparkSource variantsSparkSource = new VariantsSparkSource(ctx);
JavaRDD<VariantContext> variants = variantsSparkSource.getParallelVariantContexts(vcf, null);
if (variants.getNumPartitions() == 1) {
// repartition to more than 1 partition
variants = variants.repartition(3);
}
VCFHeader header = getHeader(vcf);
VariantsSparkSink.writeVariants(ctx, outputPath, variants, header);
JavaRDD<VariantContext> variants2 = variantsSparkSource.getParallelVariantContexts(outputPath, null);
final List<VariantContext> writtenVariants = variants2.collect();
VariantContextTestUtils.assertEqualVariants(readVariants(vcf), writtenVariants);
}
use of htsjdk.variant.vcf.VCFHeader in project gatk by broadinstitute.
the class GatherVcfs method gatherConventionally.
/** Code for gathering multiple VCFs that works regardless of input format and output format, but can be slow. */
private static void gatherConventionally(final SAMSequenceDictionary sequenceDictionary, final boolean createIndex, final List<Path> inputFiles, final File outputFile, final int cloudPrefetchBuffer) {
final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
if (createIndex)
options.add(Options.INDEX_ON_THE_FLY);
else
options.remove(Options.INDEX_ON_THE_FLY);
try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(outputFile).setReferenceDictionary(sequenceDictionary).setOptions(options).build()) {
final ProgressLogger progress = new ProgressLogger(log, 10000);
VariantContext lastContext = null;
Path lastFile = null;
VCFHeader firstHeader = null;
VariantContextComparator comparator = null;
for (final Path f : inputFiles) {
try {
log.debug("Gathering from file: ", f.toUri().toString());
final FeatureReader<VariantContext> variantReader = getReaderFromVCFUri(f, cloudPrefetchBuffer);
final PeekableIterator<VariantContext> variantIterator;
variantIterator = new PeekableIterator<>(variantReader.iterator());
final VCFHeader header = (VCFHeader) variantReader.getHeader();
if (firstHeader == null) {
firstHeader = header;
out.writeHeader(firstHeader);
comparator = new VariantContextComparator(firstHeader.getContigLines());
}
if (lastContext != null && variantIterator.hasNext()) {
final VariantContext vc = variantIterator.peek();
if (comparator.compare(vc, lastContext) <= 0) {
throw new IllegalStateException("First variant in file " + f.toUri().toString() + " is at " + vc.getSource() + " but last variant in earlier file " + lastFile.toUri().toString() + " is at " + lastContext.getSource());
}
}
while (variantIterator.hasNext()) {
lastContext = variantIterator.next();
out.add(lastContext);
progress.record(lastContext.getContig(), lastContext.getStart());
}
lastFile = f;
CloserUtil.close(variantIterator);
CloserUtil.close(variantReader);
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(f, e.getMessage(), e);
}
}
}
}
Aggregations