use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.
the class RenameSampleInVcf method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
try (final VCFFileReader in = new VCFFileReader(INPUT)) {
final VCFHeader header = in.getFileHeader();
Utils.validateArg(header.getGenotypeSamples().size() == 1, "Input VCF must be single-sample.");
Utils.validateArg(OLD_SAMPLE_NAME == null || OLD_SAMPLE_NAME.equals(header.getGenotypeSamples().get(0)), () -> "Input VCF did not contain expected sample. Contained: " + header.getGenotypeSamples().get(0));
final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterBuilder.DEFAULT_OPTIONS);
if (CREATE_INDEX)
options.add(Options.INDEX_ON_THE_FLY);
else
options.remove(Options.INDEX_ON_THE_FLY);
final VCFHeader outHeader = new VCFHeader(header.getMetaDataInInputOrder(), CollectionUtil.makeList(NEW_SAMPLE_NAME));
try (final VariantContextWriter out = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(outHeader.getSequenceDictionary()).setOptions(options).build()) {
out.writeHeader(outHeader);
for (final VariantContext ctx : in) {
out.add(ctx);
}
}
}
return null;
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.
the class SortVcf method writeSortedOutput.
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
final ProgressLogger writeProgress = new ProgressLogger(logger, 25000, "wrote", "records");
final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
final VariantContextWriter out = new VariantContextWriterBuilder().setReferenceDictionary(outputHeader.getSequenceDictionary()).setOptions(options).setOutputFile(OUTPUT).build();
out.writeHeader(outputHeader);
for (final VariantContext variantContext : sortedOutput) {
out.add(variantContext);
writeProgress.record(variantContext.getContig(), variantContext.getStart());
}
out.close();
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.
the class CreateSomaticPanelOfNormals method doWork.
public Object doWork() {
final List<File> inputVcfs = new ArrayList<>(vcfs);
final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();
for (final File vcf : inputVcfs) {
final VCFFileReader reader = new VCFFileReader(vcf, false);
iterators.add(reader.iterator());
final VCFHeader header = reader.getFileHeader();
Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
headers.add(header);
}
final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
while (mergingIterator.hasNext()) {
final VariantContext vc = mergingIterator.next();
if (!currentPosition.overlaps(vc)) {
processVariantsAtSamePosition(variantsAtThisPosition, writer);
variantsAtThisPosition.clear();
currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
}
variantsAtThisPosition.add(vc);
}
mergingIterator.close();
writer.close();
return "SUCCESS";
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk by broadinstitute.
the class GATKVariantContextUtilsUnitTest method testCreateVariantContextWriterNoReference.
@Test(expectedExceptions = IllegalArgumentException.class)
public void testCreateVariantContextWriterNoReference() {
// should throw due to lack of reference
final File outFile = createTempFile("testVCFWriter", ".vcf");
final VariantContextWriter vcw = GATKVariantContextUtils.createVCFWriter(outFile, null, true, Options.INDEX_ON_THE_FLY);
vcw.close();
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk-protected by broadinstitute.
the class HaplotypeCallerSpark method writeVariants.
/**
* WriteVariants, this is currently going to be horribly slow and explosive on a full size file since it performs a collect.
*
* This will be replaced by a parallel writer similar to what's done with {@link org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSink}
*/
private void writeVariants(JavaRDD<VariantContext> variants) {
final List<VariantContext> collectedVariants = variants.collect();
final SAMSequenceDictionary referenceDictionary = getReferenceSequenceDictionary();
final List<VariantContext> sortedVariants = collectedVariants.stream().sorted((o1, o2) -> IntervalUtils.compareLocatables(o1, o2, referenceDictionary)).collect(Collectors.toList());
final HaplotypeCallerEngine hcEngine = new HaplotypeCallerEngine(hcArgs, getHeaderForReads(), new ReferenceMultiSourceAdapter(getReference(), getAuthHolder()));
try (final VariantContextWriter writer = hcEngine.makeVCFWriter(output, getBestAvailableSequenceDictionary())) {
hcEngine.writeHeader(writer, getHeaderForReads().getSequenceDictionary(), Collections.emptySet());
sortedVariants.forEach(writer::add);
}
}
Aggregations