Search in sources :

Example 26 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class VcfMultiToOneInfo method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final VCFHeader srcHeader = in.getHeader();
    final VCFInfoHeaderLine srcInfo = srcHeader.getInfoHeaderLine(this.infoTag);
    if (srcInfo == null) {
        LOG.error("Cannot find INFO FIELD '" + this.infoTag + "'");
        return -1;
    }
    switch(srcInfo.getCountType()) {
        case INTEGER:
            break;
        case UNBOUNDED:
            break;
        default:
            {
                LOG.error("CountType is not supported '" + srcInfo.getCountType() + "'");
                return -1;
            }
    }
    switch(srcInfo.getType()) {
        case Flag:
            {
                LOG.error("Type is not supported '" + srcInfo.getType() + "'");
                return -1;
            }
        default:
            break;
    }
    final VCFHeader destHeader = new VCFHeader(srcHeader);
    super.addMetaData(destHeader);
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(srcHeader);
    out.writeHeader(destHeader);
    while (in.hasNext()) {
        final VariantContext ctx = progess.watch(in.next());
        final List<Object> L = ctx.getAttributeAsList(srcInfo.getID());
        if (L.isEmpty() || L.size() == 1) {
            out.add(ctx);
            continue;
        }
        for (final Object o : L) {
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(srcInfo.getID(), o);
            out.add(vcb.make());
        }
    }
    progess.finish();
    LOG.info("done");
    return RETURN_OK;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Example 27 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class VcfRegulomeDB method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    VCFHeader header = in.getHeader();
    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
    header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
    header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
    header.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Format: Position|Distance|Rank"));
    out.writeHeader(header);
    while (in.hasNext()) {
        List<String> regDataList = new ArrayList<String>();
        VariantContext ctx = in.next();
        progress.watch(ctx.getContig(), ctx.getStart());
        int start = Math.max(0, ctx.getStart() - this.extend);
        int end = ctx.getEnd() + this.extend;
        for (Iterator<RegData> iter = this.regDataTabixFileReader.iterator(ctx.getContig(), start, end); iter.hasNext(); ) {
            RegData curr = iter.next();
            if (this.acceptRegex != null && !this.acceptRegex.matcher(curr.rank).matches()) {
                continue;
            }
            String str = String.valueOf(curr.chromSart) + "|" + String.valueOf(Math.abs(curr.chromSart - (ctx.getStart() - 1))) + "|" + curr.rank;
            regDataList.add(str);
        }
        if (regDataList.isEmpty()) {
            out.add(ctx);
            continue;
        }
        VariantContextBuilder vcb = new VariantContextBuilder(ctx);
        vcb.attribute(this.infoTag, regDataList.toArray());
        out.add(vcb.make());
    }
    progress.finish();
    return 0;
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Example 28 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project gatk-protected by broadinstitute.

the class AnnotateVcfWithExpectedAlleleFraction method onTraversalStart.

@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    vcfWriter = createVCFWriter(outputVcf);
    vcfWriter.writeHeader(vcfHeader);
    final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
    final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream().collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
    mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream().mapToDouble(mixingfractionsMap::get).toArray();
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) HashSet(java.util.HashSet)

Example 29 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project gatk by broadinstitute.

the class AnnotateVcfWithExpectedAlleleFraction method onTraversalStart.

@Override
public void onTraversalStart() {
    final VCFHeader inputHeader = getHeaderForVariants();
    final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
    headerLines.add(new VCFInfoHeaderLine(EXPECTED_ALLELE_FRACTION_NAME, 1, VCFHeaderLineType.Float, "expected allele fraction in pooled bam"));
    final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
    headerLines.addAll(getDefaultToolVCFHeaderLines());
    vcfWriter = createVCFWriter(outputVcf);
    vcfWriter.writeHeader(vcfHeader);
    final List<MixingFraction> mixingFractionsList = MixingFraction.readMixingFractions(inputMixingFractions);
    final Map<String, Double> mixingfractionsMap = mixingFractionsList.stream().collect(Collectors.toMap(MixingFraction::getSample, MixingFraction::getMixingFraction));
    mixingFractionsInSampleOrder = inputHeader.getSampleNamesInOrder().stream().mapToDouble(mixingfractionsMap::get).toArray();
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) HashSet(java.util.HashSet)

Example 30 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class VcfMultiToOne method doWork.

@Override
public int doWork(final List<String> arguments) {
    VariantContextWriter out = null;
    Set<String> args = IOUtils.unrollFiles(arguments);
    List<VcfIterator> inputs = new ArrayList<>(args.size() + 1);
    List<String> inputFiles = new ArrayList<>(args.size() + 1);
    try {
        if (args.isEmpty() && arguments.isEmpty()) {
            inputs.add(VCFUtils.createVcfIteratorStdin());
            inputFiles.add("stdin");
        } else if (args.isEmpty()) {
            LOG.error("No vcf provided");
            return -1;
        } else {
            for (final String vcfFile : args) {
                inputs.add(VCFUtils.createVcfIterator(vcfFile));
                inputFiles.add(VCFUtils.escapeInfoField(vcfFile));
            }
        }
        SAMSequenceDictionary dict = null;
        final Set<String> sampleNames = new HashSet<String>();
        final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        for (final VcfIterator in : inputs) {
            final VCFHeader header = in.getHeader();
            if (dict == null) {
                dict = header.getSequenceDictionary();
            } else if (header.getSequenceDictionary() == null) {
                LOG.error("No Dictionary in vcf");
                return -1;
            } else if (!SequenceUtil.areSequenceDictionariesEqual(dict, header.getSequenceDictionary())) {
                LOG.error("Not the same dictionary between vcfs");
                return -1;
            }
            metaData.addAll(in.getHeader().getMetaDataInInputOrder());
            sampleNames.addAll(in.getHeader().getSampleNamesInOrder());
        }
        final Comparator<VariantContext> comparator = (dict == null ? VCFUtils.createChromPosRefComparator() : VCFUtils.createTidPosRefComparator(dict));
        // addMetaData(metaData);
        metaData.add(new VCFInfoHeaderLine(DEFAULT_SAMPLE_TAGID, 1, VCFHeaderLineType.String, "Sample Name from multi-sample vcf"));
        metaData.add(new VCFInfoHeaderLine(DEFAULT_SAMPLE_FILETAGID, 1, VCFHeaderLineType.String, "Origin of sample"));
        for (final String sample : sampleNames) {
            metaData.add(new VCFHeaderLine(SAMPLE_HEADER_DECLARATION, sample));
        }
        final VCFHeader h2 = new VCFHeader(metaData, Collections.singleton(DEFAULT_VCF_SAMPLE_NAME));
        recalculator.setHeader(h2);
        out = super.openVariantContextWriter(this.outputFile);
        out.writeHeader(h2);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        for (; ; ) {
            if (out.checkError())
                break;
            /* get 'smallest' variant */
            VariantContext smallest = null;
            int idx = 0;
            int best_idx = -1;
            while (idx < inputs.size()) {
                final VcfIterator in = inputs.get(idx);
                if (!in.hasNext()) {
                    CloserUtil.close(in);
                    inputs.remove(idx);
                    inputFiles.remove(idx);
                } else {
                    final VariantContext ctx = in.peek();
                    if (smallest == null || comparator.compare(smallest, ctx) > 0) {
                        smallest = ctx;
                        best_idx = idx;
                    }
                    ++idx;
                }
            }
            if (smallest == null)
                break;
            final VariantContext ctx = progress.watch(inputs.get(best_idx).next());
            if (ctx.getNSamples() == 0) {
                if (!this.discard_no_call) {
                    final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                    vcb.attribute(DEFAULT_SAMPLE_FILETAGID, inputFiles.get(best_idx));
                    vcb.genotypes(GenotypeBuilder.createMissing(DEFAULT_VCF_SAMPLE_NAME, 2));
                    out.add(this.recalculator.apply(vcb.make()));
                }
                continue;
            }
            for (int i = 0; i < ctx.getNSamples(); ++i) {
                final Genotype g = ctx.getGenotype(i);
                final String sample = g.getSampleName();
                if (!g.isCalled() && this.discard_no_call)
                    continue;
                if (!g.isAvailable() && this.discard_non_available)
                    continue;
                if (g.isHomRef() && this.discard_hom_ref)
                    continue;
                final GenotypeBuilder gb = new GenotypeBuilder(g);
                gb.name(DEFAULT_VCF_SAMPLE_NAME);
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                vcb.attribute(DEFAULT_SAMPLE_TAGID, sample);
                vcb.attribute(DEFAULT_SAMPLE_FILETAGID, inputFiles.get(best_idx));
                vcb.genotypes(gb.make());
                out.add(this.recalculator.apply(vcb.make()));
            }
        }
        progress.finish();
        LOG.debug("done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(inputs);
        CloserUtil.close(out);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Aggregations

VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)55 VCFHeader (htsjdk.variant.vcf.VCFHeader)49 VariantContext (htsjdk.variant.variantcontext.VariantContext)37 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)37 ArrayList (java.util.ArrayList)34 HashSet (java.util.HashSet)32 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)31 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)25 Allele (htsjdk.variant.variantcontext.Allele)22 IOException (java.io.IOException)20 File (java.io.File)19 Genotype (htsjdk.variant.variantcontext.Genotype)17 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)17 Set (java.util.Set)17 HashMap (java.util.HashMap)16 List (java.util.List)16 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 VCFFilterHeaderLine (htsjdk.variant.vcf.VCFFilterHeaderLine)14 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)14