use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfMultiToOneInfo method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
final VCFHeader srcHeader = in.getHeader();
final VCFInfoHeaderLine srcInfo = srcHeader.getInfoHeaderLine(this.infoTag);
if (srcInfo == null) {
LOG.error("Cannot find INFO FIELD '" + this.infoTag + "'");
return -1;
}
switch(srcInfo.getCountType()) {
case INTEGER:
break;
case UNBOUNDED:
break;
default:
{
LOG.error("CountType is not supported '" + srcInfo.getCountType() + "'");
return -1;
}
}
switch(srcInfo.getType()) {
case Flag:
{
LOG.error("Type is not supported '" + srcInfo.getType() + "'");
return -1;
}
default:
break;
}
final VCFHeader destHeader = new VCFHeader(srcHeader);
super.addMetaData(destHeader);
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(srcHeader);
out.writeHeader(destHeader);
while (in.hasNext()) {
final VariantContext ctx = progess.watch(in.next());
final List<Object> L = ctx.getAttributeAsList(srcInfo.getID());
if (L.isEmpty() || L.size() == 1) {
out.add(ctx);
continue;
}
for (final Object o : L) {
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(srcInfo.getID(), o);
out.add(vcb.make());
}
}
progess.finish();
LOG.info("done");
return RETURN_OK;
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfRegulomeDB method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
VCFHeader header = in.getHeader();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Format: Position|Distance|Rank"));
out.writeHeader(header);
while (in.hasNext()) {
List<String> regDataList = new ArrayList<String>();
VariantContext ctx = in.next();
progress.watch(ctx.getContig(), ctx.getStart());
int start = Math.max(0, ctx.getStart() - this.extend);
int end = ctx.getEnd() + this.extend;
for (Iterator<RegData> iter = this.regDataTabixFileReader.iterator(ctx.getContig(), start, end); iter.hasNext(); ) {
RegData curr = iter.next();
if (this.acceptRegex != null && !this.acceptRegex.matcher(curr.rank).matches()) {
continue;
}
String str = String.valueOf(curr.chromSart) + "|" + String.valueOf(Math.abs(curr.chromSart - (ctx.getStart() - 1))) + "|" + curr.rank;
regDataList.add(str);
}
if (regDataList.isEmpty()) {
out.add(ctx);
continue;
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(this.infoTag, regDataList.toArray());
out.add(vcb.make());
}
progress.finish();
return 0;
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project gatk-protected by broadinstitute.
the class AnnotateVcfWithExpectedAlleleFraction method onTraversalStart.
@Override
public void onTraversalStart() {
final VCFHeader inputHeader = getHeaderForVariants();
final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
headerLines.addAll(getDefaultToolVCFHeaderLines());
vcfWriter = createVCFWriter(outputVcf);
vcfWriter.writeHeader(vcfHeader);
final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream().collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream().mapToDouble(mixingfractionsMap::get).toArray();
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project gatk by broadinstitute.
the class AnnotateVcfWithExpectedAlleleFraction method onTraversalStart.
@Override
public void onTraversalStart() {
final VCFHeader inputHeader = getHeaderForVariants();
final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
headerLines.addAll(getDefaultToolVCFHeaderLines());
vcfWriter = createVCFWriter(outputVcf);
vcfWriter.writeHeader(vcfHeader);
final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream().collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream().mapToDouble(mixingfractionsMap::get).toArray();
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfMultiToOne method doWork.
@Override
public int doWork(final List<String> arguments) {
VariantContextWriter out = null;
Set<String> args = IOUtils.unrollFiles(arguments);
List<VcfIterator> inputs = new ArrayList<>(args.size() + 1);
List<String> inputFiles = new ArrayList<>(args.size() + 1);
try {
if (args.isEmpty() && arguments.isEmpty()) {
inputs.add(VCFUtils.createVcfIteratorStdin());
inputFiles.add("stdin");
} else if (args.isEmpty()) {
LOG.error("No vcf provided");
return -1;
} else {
for (final String vcfFile : args) {
inputs.add(VCFUtils.createVcfIterator(vcfFile));
inputFiles.add(VCFUtils.escapeInfoField(vcfFile));
}
}
SAMSequenceDictionary dict = null;
final Set<String> sampleNames = new HashSet<String>();
final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
for (final VcfIterator in : inputs) {
final VCFHeader header = in.getHeader();
if (dict == null) {
dict = header.getSequenceDictionary();
} else if (header.getSequenceDictionary() == null) {
LOG.error("No Dictionary in vcf");
return -1;
} else if (!SequenceUtil.areSequenceDictionariesEqual(dict, header.getSequenceDictionary())) {
LOG.error("Not the same dictionary between vcfs");
return -1;
}
metaData.addAll(in.getHeader().getMetaDataInInputOrder());
sampleNames.addAll(in.getHeader().getSampleNamesInOrder());
}
final Comparator<VariantContext> comparator = (dict == null ? VCFUtils.createChromPosRefComparator() : VCFUtils.createTidPosRefComparator(dict));
// addMetaData(metaData);
metaData.add(new VCFInfoHeaderLine(DEFAULT_SAMPLE_TAGID, 1, VCFHeaderLineType.String, "Sample Name from multi-sample vcf"));
metaData.add(new VCFInfoHeaderLine(DEFAULT_SAMPLE_FILETAGID, 1, VCFHeaderLineType.String, "Origin of sample"));
for (final String sample : sampleNames) {
metaData.add(new VCFHeaderLine(SAMPLE_HEADER_DECLARATION, sample));
}
final VCFHeader h2 = new VCFHeader(metaData, Collections.singleton(DEFAULT_VCF_SAMPLE_NAME));
recalculator.setHeader(h2);
out = super.openVariantContextWriter(this.outputFile);
out.writeHeader(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
for (; ; ) {
if (out.checkError())
break;
/* get 'smallest' variant */
VariantContext smallest = null;
int idx = 0;
int best_idx = -1;
while (idx < inputs.size()) {
final VcfIterator in = inputs.get(idx);
if (!in.hasNext()) {
CloserUtil.close(in);
inputs.remove(idx);
inputFiles.remove(idx);
} else {
final VariantContext ctx = in.peek();
if (smallest == null || comparator.compare(smallest, ctx) > 0) {
smallest = ctx;
best_idx = idx;
}
++idx;
}
}
if (smallest == null)
break;
final VariantContext ctx = progress.watch(inputs.get(best_idx).next());
if (ctx.getNSamples() == 0) {
if (!this.discard_no_call) {
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(DEFAULT_SAMPLE_FILETAGID, inputFiles.get(best_idx));
vcb.genotypes(GenotypeBuilder.createMissing(DEFAULT_VCF_SAMPLE_NAME, 2));
out.add(this.recalculator.apply(vcb.make()));
}
continue;
}
for (int i = 0; i < ctx.getNSamples(); ++i) {
final Genotype g = ctx.getGenotype(i);
final String sample = g.getSampleName();
if (!g.isCalled() && this.discard_no_call)
continue;
if (!g.isAvailable() && this.discard_non_available)
continue;
if (g.isHomRef() && this.discard_hom_ref)
continue;
final GenotypeBuilder gb = new GenotypeBuilder(g);
gb.name(DEFAULT_VCF_SAMPLE_NAME);
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(DEFAULT_SAMPLE_TAGID, sample);
vcb.attribute(DEFAULT_SAMPLE_FILETAGID, inputFiles.get(best_idx));
vcb.genotypes(gb.make());
out.add(this.recalculator.apply(vcb.make()));
}
}
progress.finish();
LOG.debug("done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(inputs);
CloserUtil.close(out);
}
}
Aggregations