Search in sources :

Example 41 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfBurdenFisherH method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator r, final VariantContextWriter delegate) {
    final VariantContextWriter w = this.component.open(delegate);
    w.writeHeader(r.getHeader());
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(r.getHeader()).logger(LOG);
    while (r.hasNext()) {
        w.add(progress.watch(r.next()));
    }
    progress.finish();
    w.close();
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) DelegateVariantContextWriter(com.github.lindenb.jvarkit.util.vcf.DelegateVariantContextWriter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Example 42 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class EvsToVcf method doWork.

@Override
public int doWork(List<String> args) {
    VariantContextWriter out = null;
    try {
        if (!args.isEmpty()) {
            LOG.error("Illegal number of arguments");
            return -1;
        }
        JAXBContext jc = JAXBContext.newInstance(SnpData.class);
        Unmarshaller unmarshaller = jc.createUnmarshaller();
        out = VCFUtils.createVariantContextWriterToStdout();
        SAMSequenceDictionary dict = new SAMSequenceDictionary();
        _fillDict(dict, "1", 249250621);
        _fillDict(dict, "2", 243199373);
        _fillDict(dict, "3", 198022430);
        _fillDict(dict, "4", 191154276);
        _fillDict(dict, "5", 180915260);
        _fillDict(dict, "6", 171115067);
        _fillDict(dict, "7", 159138663);
        _fillDict(dict, "8", 146364022);
        _fillDict(dict, "9", 141213431);
        _fillDict(dict, "10", 135534747);
        _fillDict(dict, "11", 135006516);
        _fillDict(dict, "12", 133851895);
        _fillDict(dict, "13", 115169878);
        _fillDict(dict, "14", 107349540);
        _fillDict(dict, "15", 102531392);
        _fillDict(dict, "16", 90354753);
        _fillDict(dict, "17", 81195210);
        _fillDict(dict, "18", 78077248);
        _fillDict(dict, "19", 59128983);
        _fillDict(dict, "20", 63025520);
        _fillDict(dict, "21", 48129895);
        _fillDict(dict, "22", 51304566);
        _fillDict(dict, "X", 155270560);
        _fillDict(dict, "Y", 59373566);
        _fillDict(dict, "MT", 16569);
        VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(dict);
        header.addMetaDataLine(new VCFInfoHeaderLine("CONS", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScore"));
        header.addMetaDataLine(new VCFInfoHeaderLine("GERP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("uaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("aaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("totalMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("DP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Integer, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
        out.writeHeader(header);
        Pattern comma = Pattern.compile("[,]");
        XMLInputFactory xif = XMLInputFactory.newFactory();
        xif.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
        XMLEventReader xmlr = xif.createXMLEventReader(System.in);
        while (xmlr.hasNext() && !System.out.checkError()) {
            XMLEvent evt = xmlr.peek();
            if (!evt.isStartElement() || !evt.asStartElement().getName().getLocalPart().equals("snpList")) {
                xmlr.nextEvent();
                continue;
            }
            SnpData snpData = unmarshaller.unmarshal(xmlr, SnpData.class).getValue();
            VariantContextBuilder vcb = new VariantContextBuilder();
            Set<Allele> alleles = new HashSet<Allele>();
            alleles.add(Allele.create(snpData.getRefAllele(), true));
            for (String s : comma.split(snpData.getAltAlleles())) {
                if (isEmpty(s))
                    continue;
                alleles.add(Allele.create(s, false));
            }
            vcb.chr(snpData.getChromosome());
            vcb.start(snpData.getChrPosition());
            vcb.stop(snpData.getChrPosition() + snpData.getRefAllele().length() - 1);
            if (!isEmpty(snpData.getRsIds()) && !snpData.getRsIds().equals("none")) {
                vcb.id(snpData.getRsIds());
            }
            vcb.alleles(alleles);
            Float d = parseDouble(snpData.getConservationScore());
            if (d != null) {
                vcb.attribute("CONS", d);
            }
            d = parseDouble(snpData.getConservationScoreGERP());
            if (d != null) {
                vcb.attribute("GERP", d);
            }
            vcb.attribute("uaMAF", (float) snpData.getUaMAF());
            vcb.attribute("aaMAF", (float) snpData.getAaMAF());
            vcb.attribute("totalMAF", (float) snpData.getTotalMAF());
            vcb.attribute("DP", snpData.getAvgSampleReadDepth());
            out.add(vcb.make());
        }
        xmlr.close();
        out.close();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : Pattern(java.util.regex.Pattern) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) JAXBContext(javax.xml.bind.JAXBContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) XMLEvent(javax.xml.stream.events.XMLEvent) XMLEventReader(javax.xml.stream.XMLEventReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) SnpData(edu.washington.gs.evs.SnpData) Unmarshaller(javax.xml.bind.Unmarshaller) VCFHeader(htsjdk.variant.vcf.VCFHeader) XMLInputFactory(javax.xml.stream.XMLInputFactory) HashSet(java.util.HashSet)

Example 43 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VCFMerge method workUsingPeekIterator.

private int workUsingPeekIterator() {
    VariantContextWriter out = null;
    final List<PeekVCF> input = new ArrayList<PeekVCF>();
    try {
        final Set<String> genotypeSampleNames = new TreeSet<String>();
        final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        // get all VCF, check same dict
        for (final String arg : this.userVcfFiles) {
            LOG.info("Opening " + arg);
            final PeekVCF p = new PeekVCF(arg);
            input.add(p);
            genotypeSampleNames.addAll(p.header.getSampleNamesInOrder());
            metaData.addAll(p.header.getMetaDataInInputOrder());
            if (this.global_dictionary == null) {
                this.global_dictionary = p.header.getSequenceDictionary();
            } else if (!SequenceUtil.areSequenceDictionariesEqual(this.global_dictionary, p.header.getSequenceDictionary())) {
                throw new JvarkitException.DictionariesAreNotTheSame(this.global_dictionary, p.header.getSequenceDictionary());
            }
        }
        if (this.global_dictionary == null) {
            throw new IllegalStateException("No Dict");
        }
        // super.addMetaData(metaData);
        if (this.doNotMergeRowLines) {
            metaData.add(NO_MERGE_INFO_HEADER);
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.global_dictionary);
        out = super.openVariantContextWriter(this.outputFile);
        final VCFHeader headerOut = new VCFHeader(metaData, genotypeSampleNames);
        out.writeHeader(headerOut);
        final List<VariantContext> row = new ArrayList<VariantContext>(input.size());
        // find smallest ordered variant
        long nCountForGC = 0L;
        for (; ; ) {
            row.clear();
            for (final PeekVCF peekVcf : input) {
                final List<VariantContext> peekVcfList = peekVcf.peek();
                if (peekVcf.peek().isEmpty())
                    continue;
                final VariantContext ctx = peekVcfList.get(0);
                if (row.isEmpty()) {
                    row.addAll(peekVcfList);
                    continue;
                }
                final int cmp = this.compareChromPosRef.compare(ctx, row.get(0));
                if (cmp == 0) {
                    row.addAll(peekVcfList);
                } else if (cmp < 0) {
                    row.clear();
                    row.addAll(peekVcfList);
                }
            }
            if (row.isEmpty())
                break;
            for (final VariantContext merged : buildContextFromVariantContext(headerOut, row)) {
                out.add(progress.watch(merged));
            }
            // consumme peeked variants
            for (final PeekVCF peekVcf : input) {
                peekVcf.reset(row.get(0));
            }
            if (nCountForGC++ % 100000 == 0)
                System.gc();
        }
        for (final PeekVCF peekVcf : input) {
            peekVcf.close();
        }
        input.clear();
        CloserUtil.close(out);
        out = null;
        progress.finish();
        LOG.info("Done peek sorting");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        for (final PeekVCF p : input) {
            p.close();
        }
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) TreeSet(java.util.TreeSet) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 44 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfRebase method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator iter, final VariantContextWriter delegate) {
    final VariantContextWriter out = this.component.open(delegate);
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(iter.getHeader()).logger(LOG);
    out.writeHeader(iter.getHeader());
    while (iter.hasNext()) {
        out.add(progress.watch(iter.next()));
    }
    out.close();
    progress.finish();
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) DelegateVariantContextWriter(com.github.lindenb.jvarkit.util.vcf.DelegateVariantContextWriter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) PostponedVariantContextWriter(com.github.lindenb.jvarkit.util.vcf.PostponedVariantContextWriter)

Example 45 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VCFStripAnnotations method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator iter, final VariantContextWriter delegate) {
    final VariantContextWriter out = this.component.open(delegate);
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(iter.getHeader()).logger(LOG);
    out.writeHeader(iter.getHeader());
    while (iter.hasNext()) {
        out.add(progress.watch(iter.next()));
    }
    out.close();
    progress.finish();
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) DelegateVariantContextWriter(com.github.lindenb.jvarkit.util.vcf.DelegateVariantContextWriter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Aggregations

VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)132 File (java.io.File)67 VCFHeader (htsjdk.variant.vcf.VCFHeader)65 VariantContext (htsjdk.variant.variantcontext.VariantContext)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)53 ArrayList (java.util.ArrayList)41 IOException (java.io.IOException)38 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)35 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)34 HashSet (java.util.HashSet)32 List (java.util.List)29 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)28 Allele (htsjdk.variant.variantcontext.Allele)28 Collectors (java.util.stream.Collectors)27 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)26 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)26 Parameter (com.beust.jcommander.Parameter)24 Logger (com.github.lindenb.jvarkit.util.log.Logger)24 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)23 Program (com.github.lindenb.jvarkit.util.jcommander.Program)23