use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class VcfSetSequenceDictionary method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter delegate) {
final VariantContextWriter out = this.component.open(delegate);
out.writeHeader(in.getHeader());
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
while (in.hasNext()) {
out.add(progress.watch(in.next()));
}
progress.finish();
return 0;
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.
the class SortVcfOnRef2 method sortvcf.
protected int sortvcf(BufferedReader in) throws IOException {
if (this.refdict != null) {
LOG.info("load dict from " + this.refdict);
this.dict = SAMSequenceDictionaryExtractor.extractDictionary(this.refdict);
if (this.dict == null) {
LOG.error("cannot find sam sequence dictionary from " + refdict);
}
}
final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(in);
final VCFHeader h2 = new VCFHeader(cah.header);
if (this.dict != null) {
h2.setSequenceDictionary(this.dict);
} else {
this.dict = h2.getSequenceDictionary();
if (this.dict == null) {
LOG.error("No internal sequence dictionay found in input");
return -1;
}
}
addMetaData(h2);
if (this.dict.isEmpty()) {
LOG.warn("SEQUENCE DICTIONARY IS EMPTY/NULL");
}
CloseableIterator<ChromPosLine> iter = null;
SortingCollection<ChromPosLine> array = null;
VariantContextWriter w = null;
try {
array = SortingCollection.newInstance(ChromPosLine.class, new VariantCodec(), new VariantComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
array.setDestructiveIteration(true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dict);
String line;
while ((line = in.readLine()) != null) {
final ChromPosLine cpl = new ChromPosLine(line);
progress.watch(cpl.tid, cpl.pos);
array.add(cpl);
}
array.doneAdding();
progress.finish();
w = super.openVariantContextWriter(outputFile);
w.writeHeader(h2);
iter = array.iterator();
while (iter.hasNext()) {
w.add(cah.codec.decode(iter.next().line));
if (w.checkError())
break;
}
return RETURN_OK;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(w);
CloserUtil.close(iter);
if (array != null)
array.cleanup();
}
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk-protected by broadinstitute.
the class HaplotypeCallerEngine method makeVCFWriter.
/**
* Create a VCF or GVCF writer as appropriate, given our arguments
*
* @param outputVCF location to which the vcf should be written
* @param readsDictionary sequence dictionary for the reads
* @return a VCF or GVCF writer as appropriate, ready to use
*/
public VariantContextWriter makeVCFWriter(final String outputVCF, final SAMSequenceDictionary readsDictionary) {
Utils.nonNull(outputVCF);
Utils.nonNull(readsDictionary);
VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(new File(outputVCF), readsDictionary, false);
if (hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF) {
try {
writer = new GVCFWriter(writer, hcArgs.GVCFGQBands, hcArgs.genotypeArgs.samplePloidy);
} catch (IllegalArgumentException e) {
throw new CommandLineException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
}
}
return writer;
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk-protected by broadinstitute.
the class CreateSomaticPanelOfNormals method doWork.
public Object doWork() {
final List<File> inputVcfs = new ArrayList<>(vcfs);
final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();
for (final File vcf : inputVcfs) {
final VCFFileReader reader = new VCFFileReader(vcf, false);
iterators.add(reader.iterator());
final VCFHeader header = reader.getFileHeader();
Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
headers.add(header);
}
final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
while (mergingIterator.hasNext()) {
final VariantContext vc = mergingIterator.next();
if (!currentPosition.overlaps(vc)) {
processVariantsAtSamePosition(variantsAtThisPosition, writer);
variantsAtThisPosition.clear();
currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
}
variantsAtThisPosition.add(vc);
}
mergingIterator.close();
writer.close();
return "SUCCESS";
}
use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project gatk-protected by broadinstitute.
the class EvaluateCopyNumberTriStateCalls method openVCFWriter.
private VariantContextWriter openVCFWriter(final File outputFile, final Set<String> samples) {
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder();
builder.setOutputFile(outputFile);
builder.clearOptions();
final VariantContextWriter result = builder.build();
final VCFHeader header = new VCFHeader(Collections.emptySet(), samples);
CopyNumberTriStateAllele.addHeaderLinesTo(header);
EvaluationClass.addHeaderLinesTo(header);
// Format annotations.
header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.Character, "Called genotype"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALL_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Quality of the call"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_SEGMENTS_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of called segments that overlap with the truth"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_ALLELE_COUNTS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Called allele count for mixed calls"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_COPY_FRACTION_KEY, 1, VCFHeaderLineType.Float, "Truth copy fraction estimated"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Truth call quality"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.EVALUATION_CLASS_KEY, 1, VCFHeaderLineType.Character, "The evaluation class for the call or lack of call. It the values of the header key '" + EvaluationClass.VCF_HEADER_KEY + "'"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.TRUTH_GENOTYPE_KEY, 1, VCFHeaderLineType.Character, "The truth genotype"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALLED_TARGET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of targets covered by called segments"));
header.addMetaDataLine(new VCFFormatHeaderLine(VariantEvaluationContext.CALL_QUALITY_KEY, 1, VCFHeaderLineType.Float, "1 - The probability of th event in Phred scale (the maximum if ther are more than one segment"));
header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "The quality of the call (the maximum if there are more than one segment"));
header.addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Character, "Genotype filters"));
// Info annotations.
header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "The frequency of the alternative alleles in the truth callset"));
header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of called alleles in the truth callset"));
header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.CALLS_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "The frequency of the alternative alleles in the actual callset"));
header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.CALLS_ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of called alleles in the actual callset"));
header.addMetaDataLine(new VCFInfoHeaderLine(VariantEvaluationContext.TRUTH_TARGET_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Number of targets overlapped by this variant"));
header.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position for the variant"));
// Filter annotations.
for (final EvaluationFilter filter : EvaluationFilter.values()) {
header.addMetaDataLine(new VCFFilterHeaderLine(filter.name(), filter.description));
header.addMetaDataLine(new VCFFilterHeaderLine(filter.acronym, filter.description));
}
header.addMetaDataLine(new VCFFilterHeaderLine(EvaluationFilter.PASS, "Indicates that it passes all filters"));
result.writeHeader(header);
return result;
}
Aggregations