Search in sources :

Example 51 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfCalledWithAnotherMethod method doVcfToVcf.

protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final List<ExternalVcf> externalVcfs = new ArrayList<>();
    try {
        final VCFHeader header = in.getHeader();
        this.dictionary = header.getSequenceDictionary();
        /**
         * open primitive input
         */
        if (this.dictionary == null) {
            LOG.error("no dictionary in input");
            return -1;
        }
        final Set<File> samtoolsFiles = new HashSet<>();
        this.externalVcfStrs.stream().filter(S -> S.endsWith(".list")).map(S -> Paths.get(S)).forEach(P -> {
            try {
                samtoolsFiles.addAll(Files.readAllLines(P).stream().filter(L -> !(L.startsWith("#") || L.trim().isEmpty())).map(S -> new File(S)).collect(Collectors.toSet()));
            } catch (final Exception err) {
                throw new RuntimeIOException(err);
            }
        });
        samtoolsFiles.addAll(this.externalVcfStrs.stream().filter(S -> !S.endsWith(".list")).map(S -> new File(S)).collect(Collectors.toSet()));
        externalVcfs.addAll(samtoolsFiles.stream().map(F -> new ExternalVcf(F)).collect(Collectors.toList()));
        /**
         * check for uniq keys
         */
        final Set<String> uniqKeys = new HashSet<>();
        for (final ExternalVcf extvcf : externalVcfs) {
            int n = 0;
            for (; ; ) {
                final String newkey = extvcf.key + (n == 0 ? "" : "_" + n);
                if (!uniqKeys.contains(newkey)) {
                    extvcf.key = newkey;
                    uniqKeys.add(newkey);
                    break;
                }
                ++n;
            }
        }
        final VCFHeader h2 = new VCFHeader(header);
        for (final ExternalVcf extvcf : externalVcfs) {
            h2.addMetaDataLine(new VCFHeaderLine("otherVcf:" + extvcf.key, extvcf.vcfFile.getPath()));
        }
        final VCFFilterHeaderLine variantNotFoundElsewhereFILTER = (filterNameVariantNotFoundElseWhere.isEmpty() ? null : new VCFFilterHeaderLine(filterNameVariantNotFoundElseWhere, "Variant Was not found in other VCFs: " + externalVcfs.stream().map(S -> S.vcfFile.getPath()).collect(Collectors.joining(", "))));
        if (variantNotFoundElsewhereFILTER != null) {
            h2.addMetaDataLine(variantNotFoundElsewhereFILTER);
        }
        final VCFInfoHeaderLine variantFoundElseWhereKeys = new VCFInfoHeaderLine(this.infoFoundKey, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Variant was found in the VCFs designed by those keys");
        h2.addMetaDataLine(variantFoundElseWhereKeys);
        final VCFInfoHeaderLine variantFoundElseWhereCount = new VCFInfoHeaderLine(this.infoFoundKey, 1, VCFHeaderLineType.Integer, "Number of times the Variant was found  in the VCFs");
        h2.addMetaDataLine(variantFoundElseWhereCount);
        final VCFFormatHeaderLine genotypeCountSame = new VCFFormatHeaderLine(this.formatCountSame, 1, VCFHeaderLineType.Integer, "Number of times the Genotype was found the same in other VCFs");
        h2.addMetaDataLine(genotypeCountSame);
        final VCFFormatHeaderLine genotypeCountDiscordant = new VCFFormatHeaderLine(this.formatCountDiscordant, 1, VCFHeaderLineType.Integer, "Number of times the Genotype was found dicordant in other VCFs");
        h2.addMetaDataLine(genotypeCountDiscordant);
        super.addMetaData(h2);
        final VariantContextWriter w = super.openVariantContextWriter(outputFile);
        w.writeHeader(h2);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dictionary);
        while (in.hasNext()) {
            final VariantContext ctx = progress.watch(in.next());
            final List<GenotypeCount> genotypeCounts = ctx.getGenotypes().stream().map(G -> new GenotypeCount(G)).collect(Collectors.toList());
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            final Set<String> variantFoundInOtherVcfs = new HashSet<>();
            for (final ExternalVcf extvcf : externalVcfs) {
                final List<VariantContext> otherVariants = extvcf.get(ctx);
                if (otherVariants.stream().filter(CTX2 -> {
                    if (ctx.getAlternateAlleles().isEmpty())
                        return true;
                    for (final Allele a : ctx.getAlternateAlleles()) {
                        if (CTX2.hasAllele(a))
                            return true;
                    }
                    return false;
                }).findAny().isPresent()) {
                    variantFoundInOtherVcfs.add(extvcf.key);
                }
                for (final GenotypeCount gt : genotypeCounts) {
                    for (final VariantContext otherVariant : otherVariants) {
                        final Genotype otherGt = otherVariant.getGenotype(gt.gt.getSampleName());
                        if (otherGt == null)
                            continue;
                        if (gt.gt.sameGenotype(otherGt) || (this.noCallSameAsHomRef && ((gt.gt.isNoCall() && otherGt.isHomRef()) || (gt.gt.isHomRef() && otherGt.isNoCall())))) {
                            gt.countSame++;
                        } else {
                            gt.countDiscordant++;
                        }
                    }
                }
            }
            vcb.genotypes(genotypeCounts.stream().map(G -> {
                final GenotypeBuilder gb = new GenotypeBuilder(G.gt);
                gb.attribute(genotypeCountSame.getID(), G.countSame);
                gb.attribute(genotypeCountDiscordant.getID(), G.countDiscordant);
                return gb.make();
            }).collect(Collectors.toList()));
            vcb.attribute(variantFoundElseWhereCount.getID(), variantFoundInOtherVcfs.size());
            if (variantFoundInOtherVcfs.isEmpty()) {
                if (variantNotFoundElsewhereFILTER != null) {
                    vcb.filter(variantNotFoundElsewhereFILTER.getID());
                }
            } else {
                if (variantNotFoundElsewhereFILTER != null && !ctx.isFiltered()) {
                    vcb.passFilters();
                }
                vcb.attribute(variantFoundElseWhereKeys.getID(), new ArrayList<>(variantFoundInOtherVcfs));
            }
            w.add(vcb.make());
        }
        progress.finish();
        while (!externalVcfs.isEmpty()) externalVcfs.remove(0).close();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        while (!externalVcfs.isEmpty()) externalVcfs.remove(0).close();
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) List(java.util.List) Paths(java.nio.file.Paths) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) PeekIterator(htsjdk.samtools.util.PeekIterator) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) Comparator(java.util.Comparator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) File(java.io.File)

Example 52 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfCreateDictionary method doWork.

@Override
public int doWork(final List<String> args) {
    VcfIterator in = null;
    Writer out = null;
    if (this.outputFile != null && !outputFile.getName().endsWith(".dict")) {
        LOG.error("output file should end with .dict :" + this.outputFile);
        return -1;
    }
    try {
        final LinkedHashMap<String, Integer> buildNewDictionary = new LinkedHashMap<String, Integer>();
        int optind = 0;
        do {
            in = super.openVcfIterator(args.isEmpty() ? null : args.get(optind));
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
            while (in.hasNext()) {
                final VariantContext ctx = progress.watch(in.next());
                Integer length = buildNewDictionary.get(ctx.getContig());
                if (length == null)
                    length = 0;
                if (ctx.getEnd() > length) {
                    length = ctx.getEnd();
                    buildNewDictionary.put(ctx.getContig(), length);
                }
                final Object END = ctx.getAttribute("END");
                if (END != null) {
                    try {
                        final int end = Integer.parseInt(END.toString());
                        if (end > length) {
                            length = end;
                            buildNewDictionary.put(ctx.getContig(), length);
                        }
                    } catch (Throwable err) {
                    // not an integer
                    }
                }
            }
            progress.finish();
            in.close();
            in = null;
            ++optind;
        } while (optind < args.size());
        final List<SAMSequenceRecord> list = new ArrayList<SAMSequenceRecord>(buildNewDictionary.size());
        for (final String k : buildNewDictionary.keySet()) {
            list.add(new SAMSequenceRecord(k, buildNewDictionary.get(k)));
        }
        final SAMFileHeader sfh = new SAMFileHeader();
        sfh.setSequenceDictionary(new SAMSequenceDictionary(list));
        final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
        codec.setValidationStringency(htsjdk.samtools.ValidationStringency.SILENT);
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        codec.encode(out, sfh);
        out.flush();
        out.close();
        out = null;
        return 0;
    } catch (final Exception err2) {
        LOG.error(err2);
        return -1;
    } finally {
        CloserUtil.close(out);
        CloserUtil.close(in);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) LinkedHashMap(java.util.LinkedHashMap) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Writer(java.io.Writer)

Example 53 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfCutSamples method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    VCFHeader header = in.getHeader();
    final Set<String> samples1 = new HashSet<String>(header.getSampleNamesInOrder());
    for (String my : this.getUserSamples()) {
        if (!samples1.contains(my)) {
            String msg = "user sample " + my + " is not present in VCF Header : " + samples1;
            if (this.missing_sample_is_error) {
                throw new RuntimeException(msg);
            } else {
                LOG.warning(msg);
            }
        }
    }
    final List<String> samples2 = new ArrayList<String>();
    for (final String sample : header.getSampleNamesInOrder()) {
        if (this.getUserSamples().contains(sample)) {
            if (!invert) {
                samples2.add(sample);
            }
        } else {
            if (invert) {
                samples2.add(sample);
            }
        }
    }
    final VCFHeader header2 = new VCFHeader(header.getMetaDataInInputOrder(), samples2);
    header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
    header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
    header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
    header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
    this.recalculator.setHeader(header2);
    out.writeHeader(header2);
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
    while (in.hasNext()) {
        final VariantContext ctx = progress.watch(in.next());
        final VariantContextBuilder vb = new VariantContextBuilder(ctx);
        final List<Genotype> genotypes = new ArrayList<Genotype>();
        final Set<Allele> alleles = new HashSet<Allele>();
        boolean only_no_call = true;
        for (final String sample : samples2) {
            final Genotype g = ctx.getGenotype(sample);
            if (g.isNoCall() || !g.isCalled())
                continue;
            alleles.addAll(g.getAlleles());
            genotypes.add(g);
            if (g.isCalled())
                only_no_call = false;
        }
        if (removeCtxIfNoCall && only_no_call)
            continue;
        alleles.add(ctx.getReference());
        vb.alleles(alleles);
        vb.genotypes(genotypes);
        out.add(this.recalculator.apply(vb.make()));
    }
    progress.finish();
    return 0;
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 54 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfFilterXPath method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    try {
        // TODO in jdk8 replace with http://docs.oracle.com/javase/8/docs/api/java/util/Base64.html
        VCFHeader header = in.getHeader();
        VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine(this.infoTag);
        if (infoHeader == null) {
            LOG.warning("No INFO header line for " + this.infoTag + " in " + inputName);
        } else if (!(infoHeader.getCountType() == VCFHeaderLineCount.INTEGER && infoHeader.getCount() == 1 && infoHeader.getType() == VCFHeaderLineType.String)) {
            LOG.warning("Bad definition of INFO header line for " + this.infoTag + " in " + inputName + " expected one 'string' got " + infoHeader);
            infoHeader = null;
        }
        VCFHeader h2 = new VCFHeader(header);
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
        SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
        out.writeHeader(h2);
        while (in.hasNext()) {
            VariantContext ctx = progess.watch(in.next());
            if (// no tag in header
            infoHeader == null) {
                out.add(ctx);
                continue;
            }
            Object o = ctx.getAttribute(this.infoTag);
            if (o == null) {
                out.add(ctx);
                continue;
            }
            StringBuilder base64 = new StringBuilder(o.toString());
            while (base64.length() % 4 != 0) base64.append('=');
            ByteArrayInputStream xmlBytes = new ByteArrayInputStream(Base64.getDecoder().decode(base64.toString()));
            InputSource inputSource = new InputSource(xmlBytes);
            xpathVariableMap.put(new QName("chrom"), ctx.getContig());
            xpathVariableMap.put(new QName("start"), ctx.getStart());
            xpathVariableMap.put(new QName("end"), ctx.getEnd());
            xpathVariableMap.put(new QName("id"), ctx.hasID() ? ctx.getID() : ".");
            boolean accept = (Boolean) xpathExpr.evaluate(inputSource, XPathConstants.BOOLEAN);
            if (accept) {
                out.add(ctx);
            }
            if (out.checkError())
                break;
        }
        progess.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        xpathVariableMap.clear();
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) InputSource(org.xml.sax.InputSource) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) QName(javax.xml.namespace.QName) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) ByteArrayInputStream(java.io.ByteArrayInputStream) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 55 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfHead method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter delegate) {
    final CtxWriterFactory.CtxWriter out = this.component.open(delegate);
    out.writeHeader(in.getHeader());
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
    while (in.hasNext() && !out.done) {
        out.add(progress.watch(in.next()));
    }
    progress.finish();
    out.close();
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)

Aggregations

SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)146 ArrayList (java.util.ArrayList)64 VariantContext (htsjdk.variant.variantcontext.VariantContext)59 VCFHeader (htsjdk.variant.vcf.VCFHeader)57 SAMRecord (htsjdk.samtools.SAMRecord)54 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)54 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)48 IOException (java.io.IOException)48 File (java.io.File)47 SamReader (htsjdk.samtools.SamReader)40 SAMFileHeader (htsjdk.samtools.SAMFileHeader)38 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)37 HashSet (java.util.HashSet)34 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)32 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)30 List (java.util.List)30 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)29 HashMap (java.util.HashMap)28 Parameter (com.beust.jcommander.Parameter)27 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)27