Search in sources :

Example 11 with Sample

use of com.github.lindenb.jvarkit.pedigree.Sample in project jvarkit by lindenb.

the class VcfBurdenFisherH method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator r, final VariantContextWriter w) {
    final PrintWriter report;
    if (this.bedExportPath == null) {
        report = new PrintWriter(new NullOuputStream());
    } else {
        report = new PrintWriter(IOUtil.openFileForBufferedUtf8Writing(this.bedExportPath));
    }
    final VCFHeader header = r.getHeader();
    final VCFHeader h2 = new VCFHeader(header);
    final Pedigree pedigree;
    try {
        pedigree = new PedigreeParser().parse(this.pedigreeFile);
    } catch (final IOException error) {
        throw new RuntimeIOException(error);
    }
    final Set<Sample> individualSet = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isStatusSet()).collect(Collectors.toSet());
    if (individualSet.isEmpty())
        throw new IllegalArgumentException("No overlapping samples between header and pedigree.");
    final VCFInfoHeaderLine fisherAlleleInfoHeader = new VCFInfoHeaderLine(this.burdenHFisherTag, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Fisher Exact Test Case/Control.");
    final VCFFilterHeaderLine variantFilterHeader;
    if (StringUtils.isBlank(this.softFilterName)) {
        variantFilterHeader = null;
    } else {
        variantFilterHeader = new VCFFilterHeaderLine(this.softFilterName, "Fisher case:control vs miss|have ALT not between " + this.min_fisher + " and " + this.max_fisher);
        h2.addMetaDataLine(variantFilterHeader);
    }
    final VCFFilterHeaderLine filterCtrlgtCaseRatio;
    if (StringUtils.isBlank(this.filterCtrlgtCaseRatioStr)) {
        filterCtrlgtCaseRatio = null;
    } else {
        filterCtrlgtCaseRatio = new VCFFilterHeaderLine(this.filterCtrlgtCaseRatioStr, "The number of CONTROLS carrying the ALT allele is creater than the number of CASES carrying the ALT allele.");
        h2.addMetaDataLine(filterCtrlgtCaseRatio);
    }
    final VCFInfoHeaderLine fisherDetailInfoHeader = new VCFInfoHeaderLine(this.burdenHFisherTag + "Detail", VCFHeaderLineCount.A, VCFHeaderLineType.String, "Fisher Exact Test Case/Control");
    h2.addMetaDataLine(fisherAlleleInfoHeader);
    h2.addMetaDataLine(fisherDetailInfoHeader);
    JVarkitVersion.getInstance().addMetaData(this, h2);
    w.writeHeader(h2);
    while (r.hasNext()) {
        final VariantContext ctx = r.next();
        final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
        if (this.overwrite_qual)
            vcb.log10PError(VariantContext.NO_LOG10_PERROR);
        vcb.rmAttribute(fisherAlleleInfoHeader.getID());
        vcb.rmAttribute(fisherDetailInfoHeader.getID());
        final Set<String> oldFilters = new HashSet<>(ctx.getFilters());
        if (variantFilterHeader != null)
            oldFilters.remove(variantFilterHeader.getID());
        if (filterCtrlgtCaseRatio != null)
            oldFilters.remove(filterCtrlgtCaseRatio.getID());
        if (this.ignoreFiltered && ctx.isFiltered()) {
            w.add(vcb.make());
            continue;
        }
        boolean set_filter_in_range = true;
        boolean set_filter_case_ctrl_ratio = true;
        boolean found_one_alt = false;
        final List<String> infoData = new ArrayList<>(ctx.getAlleles().size());
        final List<Double> fisherValues = new ArrayList<>(ctx.getAlleles().size());
        for (final Allele observed_alt : ctx.getAlternateAlleles()) {
            if (observed_alt.isNoCall()) {
                infoData.add(String.join("|", "ALLELE", String.valueOf(observed_alt.getDisplayString()), "FISHER", "1.0"));
                fisherValues.add(1.0);
                continue;
            }
            /* count for fisher allele */
            int count_case_have_alt = 0;
            int count_case_miss_alt = 0;
            int count_ctrl_have_alt = 0;
            int count_ctrl_miss_alt = 0;
            /* loop over persons in this pop */
            for (final Sample p : individualSet) {
                /* get genotype for this individual */
                final Genotype genotype = ctx.getGenotype(p.getId());
                /* individual is not in vcf header */
                if (genotype == null || !genotype.isCalled() || (this.ignore_filtered_genotype && genotype.isFiltered())) {
                    if (genotype == null)
                        LOG.warn("Genotype is null for sample " + p.getId() + " not is pedigree!");
                    // no information , we consider that sample was called AND HOM REF
                    if (p.isAffected()) {
                        count_case_miss_alt++;
                    } else {
                        count_ctrl_miss_alt++;
                    }
                    continue;
                }
                /* loop over alleles */
                final boolean genotype_contains_allele = genotype.getAlleles().stream().anyMatch(A -> A.equals(observed_alt));
                /* fisher */
                if (genotype_contains_allele) {
                    if (p.isAffected()) {
                        count_case_have_alt++;
                        ;
                    } else {
                        count_ctrl_have_alt++;
                    }
                } else {
                    if (p.isAffected()) {
                        count_case_miss_alt++;
                    } else {
                        count_ctrl_miss_alt++;
                    }
                }
            }
            /* end of loop over persons */
            /* fisher test for alleles */
            final FisherExactTest fisherAlt = FisherExactTest.compute(count_case_have_alt, count_case_miss_alt, count_ctrl_have_alt, count_ctrl_miss_alt);
            fisherValues.add(fisherAlt.getAsDouble());
            infoData.add(String.join("|", "ALLELE", String.valueOf(observed_alt.getDisplayString()), "FISHER", String.valueOf(fisherAlt.getAsDouble()), "CASE_HAVE_ALT", String.valueOf(count_case_have_alt), "CASE_MISS_ALT", String.valueOf(count_case_miss_alt), "CTRL_HAVE_ALT", String.valueOf(count_ctrl_have_alt), "CTRL_MISS_ALT", String.valueOf(count_ctrl_miss_alt)));
            found_one_alt = true;
            final boolean is_in_range = this.min_fisher <= fisherAlt.getAsDouble() && fisherAlt.getAsDouble() <= this.max_fisher;
            final int total_ctrls = count_ctrl_have_alt + count_ctrl_miss_alt;
            final int total_cases = count_case_have_alt + count_case_miss_alt;
            // check ratio case/control
            if (total_ctrls > 0 && total_cases > 0 && (count_case_have_alt / (double) total_cases) >= (count_ctrl_have_alt / (double) total_ctrls)) {
                set_filter_case_ctrl_ratio = false;
            }
            if (is_in_range) {
                set_filter_in_range = false;
            }
            if (this.bedExportPath != null && is_in_range) {
                report.print(ctx.getContig());
                report.print('\t');
                report.print(ctx.getStart() - 1);
                report.print('\t');
                report.print(ctx.getEnd());
                report.print('\t');
                report.print(ctx.getReference().getDisplayString());
                report.print('\t');
                report.print(observed_alt.getDisplayString());
                report.print('\t');
                report.print(fisherAlt.getAsDouble());
                report.print('\t');
                report.print(count_case_have_alt);
                report.print('\t');
                report.print(count_case_miss_alt);
                report.print('\t');
                report.print(count_ctrl_have_alt);
                report.print('\t');
                report.print(count_ctrl_miss_alt);
                report.println();
            }
        }
        // better than nothing, otherwise i'll mess with the filters
        if (!found_one_alt) {
            w.add(vcb.make());
            continue;
        }
        // soft filter, skip variant
        if ((set_filter_in_range && variantFilterHeader == null) || (set_filter_case_ctrl_ratio && filterCtrlgtCaseRatio == null))
            // skip variant
            continue;
        vcb.attribute(fisherAlleleInfoHeader.getID(), fisherValues);
        vcb.attribute(fisherDetailInfoHeader.getID(), infoData);
        if (this.overwrite_qual) {
            final OptionalDouble minV = fisherValues.stream().mapToDouble(V -> V.doubleValue()).min();
            if (minV.isPresent())
                vcb.log10PError(Math.max(1.0E-100, /* arbitrary */
                minV.getAsDouble()) / -10);
        }
        if (set_filter_in_range && variantFilterHeader != null) {
            vcb.filter(variantFilterHeader.getID());
            // only set this one if the filter is set above
            if (set_filter_case_ctrl_ratio && filterCtrlgtCaseRatio != null) {
                vcb.filter(filterCtrlgtCaseRatio.getID());
            }
        }
        w.add(vcb.make());
    }
    w.close();
    report.flush();
    report.close();
    return 0;
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) VCFIterator(htsjdk.variant.vcf.VCFIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) OptionalDouble(java.util.OptionalDouble) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) List(java.util.List) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Sample(com.github.lindenb.jvarkit.pedigree.Sample) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Sample(com.github.lindenb.jvarkit.pedigree.Sample) Genotype(htsjdk.variant.variantcontext.Genotype) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) OptionalDouble(java.util.OptionalDouble) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) OptionalDouble(java.util.OptionalDouble) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Example 12 with Sample

use of com.github.lindenb.jvarkit.pedigree.Sample in project jvarkit by lindenb.

the class VcfBurdenFisherV method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator in, final VariantContextWriter out) {
    final boolean HAS_VARIANT = true;
    final boolean NO_VARIANT = false;
    Path tmVcfOut = null;
    VariantContextWriter tmpw = null;
    try {
        final VCFHeader header = in.getHeader();
        if (tableOut == null && header.getMetaDataLine(VCF_HEADER_FISHER_VALUE) != null) {
            throw new JvarkitException.UserError("VCF Header " + VCF_HEADER_FISHER_VALUE + " already specified in input");
        }
        final VCFHeader header2 = new VCFHeader(header);
        final Set<Sample> persons = new PedigreeParser().parse(this.pedigreeFile).getSamplesInVcfHeader(header).filter(S -> S.isStatusSet()).collect(Collectors.toSet());
        if (persons.isEmpty()) {
            LOG.warn("No sample in pedigree + vcf header");
            return -1;
        }
        final Map<Sample, Boolean> indi2supervariant = new HashMap<>(persons.size());
        for (final Sample person : persons) {
            indi2supervariant.put(person, NO_VARIANT);
        }
        if (this.tableOut != null) {
            tmVcfOut = null;
        } else if (this.outputFile == null) {
            tmVcfOut = Files.createTempFile("tmp.", FileExtensions.BCF);
        } else {
            tmVcfOut = Files.createTempFile(this.outputFile.getParent(), "tmp.", FileExtensions.BCF);
        }
        if (tmVcfOut != null) {
            final VariantContextWriterBuilder vcwb = new VariantContextWriterBuilder();
            vcwb.setCreateMD5(false);
            vcwb.setReferenceDictionary(SequenceDictionaryUtils.extractRequired(header));
            vcwb.clearOptions();
            vcwb.setOutputPath(tmVcfOut);
            tmpw = vcwb.build();
            tmpw.writeHeader(header);
        } else {
            tmpw = null;
        }
        long count_variants = 0L;
        while (in.hasNext()) {
            final VariantContext ctx = in.next();
            if (tmpw != null) {
                tmpw.add(ctx);
            } else {
                out.add(ctx);
            }
            if (ctx.isFiltered() && !this.acceptFiltered)
                continue;
            final int n_alts = ctx.getAlternateAlleles().size();
            if (n_alts == 0) {
                LOG.warn("ignoring variant without ALT allele.");
                continue;
            }
            count_variants++;
            if (n_alts > 1) {
                LOG.warn("variant with more than one ALT. " + ctx.getContig() + ":" + ctx.getStart());
            }
            // loop over person in this pedigree
            for (final Sample person : indi2supervariant.keySet()) {
                if (indi2supervariant.get(person) == HAS_VARIANT)
                    continue;
                final Genotype g = ctx.getGenotype(person.getId());
                if (g == null) {
                    // not in vcf header
                    continue;
                }
                if (g.isFiltered()) {
                    LOG.warn("ignoring filtered genotype");
                    // not filter.
                    continue;
                }
                if (g.getAlleles().stream().anyMatch(A -> A.isCalled() && A.isNonReference())) {
                    indi2supervariant.put(person, HAS_VARIANT);
                }
            // end of allele
            }
        // en dof for[person]
        }
        // end of
        int count_case_sv0 = 0;
        int count_ctrl_sv0 = 0;
        int count_case_sv1 = 0;
        int count_ctrl_sv1 = 0;
        for (final Sample person : indi2supervariant.keySet()) {
            final boolean hasVariant = indi2supervariant.get(person);
            if (!hasVariant) {
                if (person.isAffected())
                    count_case_sv0++;
                else
                    count_ctrl_sv0++;
            } else // AT_LEAST_ONE_VARIANT
            {
                if (person.isAffected())
                    count_case_sv1++;
                else
                    count_ctrl_sv1++;
            }
        }
        // end of person
        final FisherExactTest fisher = FisherExactTest.compute(count_case_sv0, count_case_sv1, count_ctrl_sv0, count_ctrl_sv1);
        if (this.tableOut == null) {
            header2.addMetaDataLine(new VCFHeaderLine(VCF_HEADER_FISHER_VALUE, String.valueOf(fisher.getAsDouble())));
            header2.addMetaDataLine(new VCFHeaderLine(VCF_HEADER_FISHER_VALUE + ".count", String.join("|", "CASE_SV0=" + count_case_sv0, "CASE_SV1=" + count_case_sv1, "CTRL_SV0=" + count_ctrl_sv0, "CTRL_SV1=" + count_ctrl_sv1)));
            try (VCFIterator in2 = new VCFIteratorBuilder().open(tmVcfOut)) {
                out.writeHeader(header2);
                while (in2.hasNext()) {
                    out.add(in2.next());
                }
            }
            Files.deleteIfExists(tmVcfOut);
            tmpw = null;
        } else {
            /* save xml report */
            try (final OutputStream os = super.openPathOrStdoutAsPrintStream(this.tableOut)) {
                final XMLOutputFactory xof = XMLOutputFactory.newFactory();
                final XMLStreamWriter w = xof.createXMLStreamWriter(os, "UTF-8");
                w.writeStartDocument("UTF-8", "1.0");
                w.writeStartElement("html");
                w.writeStartElement("head");
                w.writeStartElement("title");
                w.writeCharacters(getClass().getSimpleName() + ":" + inputName);
                w.writeEndElement();
                w.writeEmptyElement("meta");
                w.writeAttribute("name", "vcf");
                w.writeAttribute("content", String.valueOf(inputName));
                w.writeEmptyElement("meta");
                w.writeAttribute("name", "version");
                w.writeAttribute("content", JVarkitVersion.getInstance().getLabel());
                // hread
                w.writeEndElement();
                w.writeStartElement("body");
                w.writeStartElement("table");
                w.writeStartElement("caption");
                w.writeCharacters("Fisher: ");
                w.writeStartElement("span");
                w.writeAttribute("id", "fisher");
                w.writeCharacters(String.valueOf(fisher.getAsDouble()));
                // span
                w.writeEndElement();
                w.writeCharacters(" Variant(s): ");
                w.writeStartElement("span");
                w.writeAttribute("id", "variants");
                w.writeCharacters(String.valueOf(count_variants));
                // span
                w.writeEndElement();
                // caption
                w.writeEndElement();
                w.writeStartElement("tr");
                w.writeEmptyElement("th");
                w.writeStartElement("th");
                w.writeCharacters("With Rare");
                // th
                w.writeEndElement();
                w.writeStartElement("th");
                w.writeCharacters("No Rare");
                // th
                w.writeEndElement();
                // tr
                w.writeEndElement();
                w.writeStartElement("tr");
                w.writeStartElement("th");
                w.writeCharacters("Case");
                // th
                w.writeEndElement();
                w.writeStartElement("td");
                w.writeAttribute("id", "case1");
                w.writeCharacters(String.valueOf(count_case_sv1));
                // td
                w.writeEndElement();
                w.writeStartElement("td");
                w.writeAttribute("id", "case0");
                w.writeCharacters(String.valueOf(count_case_sv0));
                // td
                w.writeEndElement();
                // tr
                w.writeEndElement();
                w.writeStartElement("tr");
                w.writeStartElement("th");
                w.writeCharacters("Controls");
                // th
                w.writeEndElement();
                w.writeStartElement("td");
                w.writeAttribute("id", "ctrl1");
                w.writeCharacters(String.valueOf(count_ctrl_sv1));
                // td
                w.writeEndElement();
                w.writeStartElement("td");
                w.writeAttribute("id", "ctrl0");
                w.writeCharacters(String.valueOf(count_ctrl_sv0));
                // td
                w.writeEndElement();
                // tr
                w.writeEndElement();
                // table
                w.writeEndElement();
                // body
                w.writeEndElement();
                // html
                w.writeEndElement();
                w.writeEndDocument();
                w.close();
                os.flush();
            }
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        if (tmVcfOut != null)
            try {
                Files.deleteIfExists(tmVcfOut);
            } catch (IOException err) {
            }
    }
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) ParametersDelegate(com.beust.jcommander.ParametersDelegate) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) Path(java.nio.file.Path) OutputStream(java.io.OutputStream) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Files(java.nio.file.Files) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) VCFIteratorBuilder(htsjdk.variant.vcf.VCFIteratorBuilder) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) List(java.util.List) FileExtensions(htsjdk.samtools.util.FileExtensions) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Sample(com.github.lindenb.jvarkit.pedigree.Sample) VCFIteratorBuilder(htsjdk.variant.vcf.VCFIteratorBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) VariantContext(htsjdk.variant.variantcontext.VariantContext) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFIterator(htsjdk.variant.vcf.VCFIterator) Path(java.nio.file.Path) Sample(com.github.lindenb.jvarkit.pedigree.Sample) Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder)

Example 13 with Sample

use of com.github.lindenb.jvarkit.pedigree.Sample in project jvarkit by lindenb.

the class VcfBurdenMAF method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator in, final VariantContextWriter out) {
    final int CASE_POP = 0;
    final VCFHeader header0 = in.getHeader();
    final String maf_label = "(" + this.min_AF + "<= maf <= " + this.max_AF + ")";
    final VCFInfoHeaderLine mafCasInfoHeader = (StringUtils.isBlank(this.prefix) ? null : new VCFInfoHeaderLine(this.prefix + "AF_Cases", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "MAF Cases"));
    final VCFInfoHeaderLine mafControlsInfoHeader = (StringUtils.isBlank(this.prefix) ? null : new VCFInfoHeaderLine(this.prefix + "AF_Controls", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "MAF Controls"));
    final VCFInfoHeaderLine acCasInfoHeader = (StringUtils.isBlank(this.prefix) ? null : new VCFInfoHeaderLine(this.prefix + "AC_Cases", VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "AC Cases"));
    final VCFInfoHeaderLine acControlsInfoHeader = (StringUtils.isBlank(this.prefix) ? null : new VCFInfoHeaderLine(this.prefix + "AC_Controls", VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "AC Controls"));
    final VCFFilterHeaderLine filterCasHeader = (StringUtils.isBlank(this.prefix) ? null : new VCFFilterHeaderLine(mafCasInfoHeader.getID(), "MAF of case failed: " + maf_label));
    final VCFFilterHeaderLine filterControlsHeader = (StringUtils.isBlank(this.prefix) ? null : new VCFFilterHeaderLine(mafControlsInfoHeader.getID(), "MAF of controls failed: " + maf_label));
    final VCFFilterHeaderLine filterCaseOrControlsHeader = (StringUtils.isBlank(this.prefix) ? null : new VCFFilterHeaderLine(this.prefix + "MAFCaseOrControls", "MAF of cases OR MAF of controls failed: " + maf_label));
    final Set<Sample> persons;
    {
        try {
            persons = new PedigreeParser().parse(this.pedigreeFile).getSamplesInVcfHeader(header0).filter(S -> S.isStatusSet()).collect(Collectors.toSet());
        } catch (final IOException err) {
            LOG.error(err);
            return -1;
        }
    }
    final DoublePredicate isInAfRange = AF -> this.min_AF <= AF && AF <= this.max_AF;
    final Set<Sample> caseSamples = persons.stream().filter(I -> I.isAffected()).collect(Collectors.toSet());
    final Set<Sample> controlSamples = persons.stream().filter(I -> I.isUnaffected()).collect(Collectors.toSet());
    if (caseSamples.isEmpty()) {
        LOG.warn("NO case in " + this.pedigreeFile);
    }
    if (controlSamples.isEmpty()) {
        LOG.warn("NO control in " + this.pedigreeFile);
    }
    final VCFHeader h2 = new VCFHeader(header0);
    if (!StringUtils.isBlank(this.prefix)) {
        h2.addMetaDataLine(mafCasInfoHeader);
        h2.addMetaDataLine(mafControlsInfoHeader);
        h2.addMetaDataLine(acCasInfoHeader);
        h2.addMetaDataLine(acControlsInfoHeader);
        h2.addMetaDataLine(filterCasHeader);
        h2.addMetaDataLine(filterControlsHeader);
        h2.addMetaDataLine(filterCaseOrControlsHeader);
    }
    JVarkitVersion.getInstance().addMetaData(this, h2);
    out.writeHeader(h2);
    while (in.hasNext()) {
        final VariantContext ctx = in.next();
        if (this.ignoreFiltered && ctx.isFiltered()) {
            if (!StringUtils.isBlank(this.prefix))
                out.add(ctx);
            continue;
        }
        if (!ctx.hasGenotypes()) {
            if (!StringUtils.isBlank(this.prefix))
                out.add(ctx);
            continue;
        }
        final List<Double> mafCasList = new ArrayList<>();
        final List<Double> mafCtrlList = new ArrayList<>();
        final List<Integer> acCasList = new ArrayList<>();
        final List<Integer> acCtrlList = new ArrayList<>();
        boolean set_max_maf_cas = true;
        boolean set_max_maf_control = true;
        boolean seen_data = false;
        for (final Allele observed_alt : ctx.getAlternateAlleles()) {
            /* loop over two populations : 0 = case, 1=controls */
            for (int pop = 0; pop < 2; ++pop) {
                final MafCalculator mafCalculator = new MafCalculator(observed_alt, ctx.getContig());
                mafCalculator.setNoCallIsHomRef(this.noCallAreHomRef);
                /* loop over persons in this pop */
                for (final Sample p : (pop == CASE_POP ? caseSamples : controlSamples)) {
                    /* get genotype for this individual */
                    final Genotype genotype = ctx.getGenotype(p.getId());
                    if (this.ignore_filtered_genotype && genotype.isFiltered())
                        continue;
                    mafCalculator.add(genotype, p.isMale());
                /* if(pop==CASE_POP && genotype.isCalled()) LOG.info("DEBUGMAF: "+p+" "+genotype); */
                }
                /* at least one genotype found */
                if (!mafCalculator.isEmpty()) {
                    seen_data = true;
                    /* get MAF */
                    final double maf = mafCalculator.getMaf();
                    final double ac = mafCalculator.getCountAlt();
                    if (pop == CASE_POP) {
                        /* add INFO attribute */
                        mafCasList.add(maf);
                        acCasList.add((int) ac);
                        /* remove FILTER if needed */
                        if (isInAfRange.test(maf))
                            set_max_maf_cas = false;
                    } else {
                        /* add INFO attribute */
                        mafCtrlList.add(maf);
                        acCtrlList.add((int) ac);
                        /* remove FILTER if needed */
                        if (isInAfRange.test(maf))
                            set_max_maf_control = false;
                    }
                } else {
                    if (pop == CASE_POP) {
                        mafCasList.add(0.0);
                        acCasList.add(0);
                        set_max_maf_cas = false;
                    } else {
                        mafCtrlList.add(0.0);
                        acCtrlList.add(0);
                        set_max_maf_control = false;
                    }
                }
            }
        /* end of loop over pop */
        }
        if (StringUtils.isBlank(this.prefix)) {
            if (!seen_data || set_max_maf_cas || set_max_maf_control)
                continue;
            out.add(ctx);
        } else {
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(mafCasInfoHeader.getID(), mafCasList);
            vcb.attribute(mafControlsInfoHeader.getID(), mafCtrlList);
            vcb.attribute(acCasInfoHeader.getID(), acCasList);
            vcb.attribute(acControlsInfoHeader.getID(), acCtrlList);
            if (seen_data) {
                if (set_max_maf_cas)
                    vcb.filter(filterCasHeader.getID());
                if (set_max_maf_control)
                    vcb.filter(filterControlsHeader.getID());
                if (set_max_maf_cas || set_max_maf_control) {
                    vcb.filter(filterCaseOrControlsHeader.getID());
                }
            }
            out.add(vcb.make());
        }
    }
    return 0;
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) ArrayList(java.util.ArrayList) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Path(java.nio.file.Path) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Logger(com.github.lindenb.jvarkit.util.log.Logger) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) List(java.util.List) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) DoublePredicate(java.util.function.DoublePredicate) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) Sample(com.github.lindenb.jvarkit.pedigree.Sample) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) DoublePredicate(java.util.function.DoublePredicate) Sample(com.github.lindenb.jvarkit.pedigree.Sample) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 14 with Sample

use of com.github.lindenb.jvarkit.pedigree.Sample in project jvarkit by lindenb.

the class IndexCovToVcf method doWork.

@Override
public int doWork(final List<String> args) {
    final IndexCovUtils indexCovUtils = new IndexCovUtils(this.treshold);
    final CharSplitter tab = CharSplitter.TAB;
    BufferedReader r = null;
    VariantContextWriter vcw = null;
    try {
        final SAMSequenceDictionary dict;
        if (this.refFile == null) {
            dict = null;
        } else {
            dict = SequenceDictionaryUtils.extractRequired(this.refFile);
        }
        r = super.openBufferedReader(oneFileOrNull(args));
        String line = r.readLine();
        if (line == null) {
            LOG.error("Cannot read first line of input");
            return -1;
        }
        String[] tokens = tab.split(line);
        if (tokens.length < 4 || !tokens[0].equals("#chrom") || !tokens[1].equals("start") || !tokens[2].equals("end")) {
            LOG.error("bad first line " + line);
            return -1;
        }
        final Set<VCFHeaderLine> metaData = new HashSet<>();
        VCFStandardHeaderLines.addStandardFormatLines(metaData, true, VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_QUALITY_KEY);
        VCFStandardHeaderLines.addStandardInfoLines(metaData, true, VCFConstants.END_KEY);
        /**
         * raw value in indexcov
         */
        final VCFFormatHeaderLine foldHeader = new VCFFormatHeaderLine("F", 1, VCFHeaderLineType.Float, "Relative number of copy: 0.5 deletion 1 normal 2.0 duplication");
        metaData.add(foldHeader);
        final VCFFilterHeaderLine filterAllDel = new VCFFilterHeaderLine("ALL_DEL", "number of samples greater than 1 and all are deletions");
        metaData.add(filterAllDel);
        final VCFFilterHeaderLine filterAllDup = new VCFFilterHeaderLine("ALL_DUP", "number of samples  greater than  1 and all are duplication");
        metaData.add(filterAllDup);
        final VCFFilterHeaderLine filterNoSV = new VCFFilterHeaderLine("NO_SV", "There is no DUP or DEL in this variant");
        metaData.add(filterNoSV);
        final VCFFilterHeaderLine filterHomDel = new VCFFilterHeaderLine("HOM_DEL", "There is one Homozygous deletion.");
        metaData.add(filterHomDel);
        final VCFFilterHeaderLine filterHomDup = new VCFFilterHeaderLine("HOM_DUP", "There is one Homozygous duplication.");
        metaData.add(filterHomDup);
        final VCFInfoHeaderLine infoNumDup = new VCFInfoHeaderLine("NDUP", 1, VCFHeaderLineType.Integer, "Number of samples being duplicated");
        metaData.add(infoNumDup);
        final VCFInfoHeaderLine infoNumDel = new VCFInfoHeaderLine("NDEL", 1, VCFHeaderLineType.Integer, "Number of samples being deleted");
        metaData.add(infoNumDel);
        final VCFInfoHeaderLine infoSingleton = new VCFInfoHeaderLine("SINGLETON", 1, VCFHeaderLineType.Flag, "Singleton candidate");
        metaData.add(infoSingleton);
        final VCFInfoHeaderLine infoAllAffected = new VCFInfoHeaderLine("ALL_CASES", 1, VCFHeaderLineType.Flag, "All cases are affected");
        metaData.add(infoAllAffected);
        final List<String> samples = Arrays.asList(tokens).subList(3, tokens.length);
        final Pedigree pedigree;
        final int count_cases_in_pedigree;
        if (this.pedFile == null) {
            pedigree = PedigreeParser.empty();
            count_cases_in_pedigree = 0;
        } else {
            pedigree = new PedigreeParser().parse(this.pedFile);
            final Set<String> set = new HashSet<>(samples);
            count_cases_in_pedigree = (int) pedigree.getAffectedSamples().stream().filter(S -> set.contains(S.getId())).count();
        }
        final VCFHeader vcfHeader = new VCFHeader(metaData, samples);
        JVarkitVersion.getInstance().addMetaData(this, vcfHeader);
        if (dict != null) {
            vcfHeader.setSequenceDictionary(dict);
        }
        vcw = this.writingDelegate.dictionary(dict).open(outputFile);
        vcw.writeHeader(vcfHeader);
        // final List<Allele> NO_CALL_NO_CALL = Arrays.asList(Allele.NO_CALL,Allele.NO_CALL);
        final Allele DUP_ALLELE = Allele.create("<DUP>", false);
        final Allele DEL_ALLELE = Allele.create("<DEL>", false);
        final Allele REF_ALLELE = Allele.create("N", true);
        while ((line = r.readLine()) != null) {
            if (StringUtil.isBlank(line))
                continue;
            tokens = tab.split(line);
            if (tokens.length != 3 + samples.size()) {
                r.close();
                vcw.close();
                throw new JvarkitException.TokenErrors("expected " + (samples.size() + 3) + "columns.", tokens);
            }
            final Set<Allele> alleles = new HashSet<>();
            alleles.add(REF_ALLELE);
            final VariantContextBuilder vcb = new VariantContextBuilder();
            vcb.chr(tokens[0]);
            vcb.start(Integer.parseInt(tokens[1]));
            final int chromEnd = Integer.parseInt(tokens[2]);
            vcb.stop(chromEnd);
            vcb.attribute(VCFConstants.END_KEY, chromEnd);
            if (dict != null) {
                final SAMSequenceRecord ssr = dict.getSequence(tokens[0]);
                if (ssr == null) {
                    LOG.error(JvarkitException.ContigNotFoundInDictionary.getMessage(tokens[0], dict));
                    return -1;
                }
                if (chromEnd > ssr.getSequenceLength()) {
                    LOG.warn("WARNING sequence length in " + line + " is greater than in dictionary ");
                }
            }
            int count_dup = 0;
            int count_del = 0;
            final Map<String, Float> sample2fold = new HashMap<>(samples.size());
            for (int i = 3; i < tokens.length; i++) {
                final String sampleName = samples.get(i - 3);
                final float f = Float.parseFloat(tokens[i]);
                if (f < 0 || Float.isNaN(f) || !Float.isFinite(f)) {
                    LOG.error("Bad fold " + f + " for sample " + sampleName + " in " + line);
                }
                sample2fold.put(sampleName, f);
            }
            final List<Genotype> genotypes = new ArrayList<>(samples.size());
            int count_sv_cases = 0;
            int count_sv_controls = 0;
            int count_ref_cases = 0;
            int count_ref_controls = 0;
            boolean got_sv = false;
            for (final String sampleName : sample2fold.keySet()) {
                final float normDepth = sample2fold.get(sampleName);
                final IndexCovUtils.SvType type = indexCovUtils.getType(normDepth);
                final GenotypeBuilder gb;
                switch(type) {
                    case REF:
                        {
                            gb = new GenotypeBuilder(sampleName, Arrays.asList(REF_ALLELE, REF_ALLELE));
                            break;
                        }
                    case HET_DEL:
                        {
                            gb = new GenotypeBuilder(sampleName, Arrays.asList(REF_ALLELE, DEL_ALLELE));
                            alleles.add(DEL_ALLELE);
                            count_del++;
                            break;
                        }
                    case HOM_DEL:
                        {
                            gb = new GenotypeBuilder(sampleName, Arrays.asList(DEL_ALLELE, DEL_ALLELE));
                            alleles.add(DEL_ALLELE);
                            count_del++;
                            vcb.filter(filterHomDel.getID());
                            break;
                        }
                    case HET_DUP:
                        {
                            gb = new GenotypeBuilder(sampleName, Arrays.asList(REF_ALLELE, DUP_ALLELE));
                            alleles.add(DUP_ALLELE);
                            count_dup++;
                            break;
                        }
                    case HOM_DUP:
                        {
                            gb = new GenotypeBuilder(sampleName, Arrays.asList(DUP_ALLELE, DUP_ALLELE));
                            alleles.add(DUP_ALLELE);
                            vcb.filter(filterHomDup.getID());
                            count_dup++;
                            break;
                        }
                    default:
                        {
                            gb = new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
                            break;
                        }
                }
                if (type.isVariant())
                    got_sv = true;
                gb.attribute(foldHeader.getID(), normDepth);
                gb.GQ(type.getGenotypeQuality(normDepth));
                final Sample sn = pedigree.getSampleById(sampleName);
                if (sn != null) {
                    if (type.isVariant()) {
                        if (sn.isAffected()) {
                            count_sv_cases++;
                        } else if (sn.isUnaffected()) {
                            count_sv_controls++;
                        }
                    } else {
                        if (sn.isAffected()) {
                            count_ref_cases++;
                        } else if (sn.isUnaffected()) {
                            count_ref_controls++;
                        }
                    }
                }
                genotypes.add(gb.make());
            }
            vcb.alleles(alleles);
            if (!pedigree.isEmpty() && count_sv_cases == 1 && count_ref_cases > 0 && count_sv_controls == 0 && count_ref_controls > 0) {
                vcb.attribute(infoSingleton.getID(), Boolean.TRUE);
            } else if (!pedigree.isEmpty() && count_sv_cases > 0 && count_sv_cases == count_cases_in_pedigree && count_ref_cases == 0 && count_sv_controls == 0 && count_ref_controls > 0) {
                vcb.attribute(infoAllAffected.getID(), Boolean.TRUE);
            }
            vcb.genotypes(genotypes);
            if (count_dup == samples.size() && samples.size() != 1) {
                vcb.filter(filterAllDup.getID());
            }
            if (count_del == samples.size() && samples.size() != 1) {
                vcb.filter(filterAllDel.getID());
            }
            if (!got_sv) {
                vcb.filter(filterNoSV.getID());
            }
            vcb.attribute(infoNumDel.getID(), count_del);
            vcb.attribute(infoNumDup.getID(), count_dup);
            vcw.add(vcb.make());
        }
        vcw.close();
        vcw = null;
        r.close();
        r = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(vcw);
    }
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringUtil(htsjdk.samtools.util.StringUtil) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) List(java.util.List) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Sample(com.github.lindenb.jvarkit.pedigree.Sample) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Sample(com.github.lindenb.jvarkit.pedigree.Sample) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Example 15 with Sample

use of com.github.lindenb.jvarkit.pedigree.Sample in project jvarkit by lindenb.

the class VCFComposite method doVcfToVcf.

protected int doVcfToVcf(final String inputName, final VCFIterator iterin, final VariantContextWriter out) {
    final VCFHeader header = iterin.getHeader();
    if (!header.hasGenotypingData()) {
        LOG.error("No genotypes in " + inputName);
        return -1;
    }
    final GeneExtractorFactory geneExtractorFactory = new GeneExtractorFactory(header);
    final List<GeneExtractorFactory.GeneExtractor> extractors = geneExtractorFactory.parse(this.extractorsNames);
    if (extractors.isEmpty()) {
        LOG.error("no gene extractor found/defined.");
        return -1;
    }
    final Pedigree pedigree;
    try {
        final Set<String> sampleNames = new HashSet<>(header.getSampleNamesInOrder());
        final PedigreeParser pedParser = new PedigreeParser();
        pedigree = pedParser.parse(this.pedigreeFile);
        if (pedigree == null || pedigree.isEmpty()) {
            LOG.error("pedigree missing/empty");
            return -1;
        }
        this.affectedSamples.addAll(pedigree.getAffectedSamples());
        this.affectedSamples.removeIf(S -> !sampleNames.contains(S.getId()));
        if (this.affectedSamples.isEmpty()) {
            LOG.error("No Affected sample in pedigree. " + this.pedigreeFile + "/" + inputName);
            return -1;
        }
        this.unaffectedSamples.addAll(pedigree.getUnaffectedSamples());
        this.unaffectedSamples.removeIf(S -> !sampleNames.contains(S.getId()));
        if (pedigree.getUnaffectedSamples().isEmpty()) {
            LOG.error("No Unaffected sample in " + this.pedigreeFile + "/" + inputName);
            return -1;
        }
    } catch (final IOException err) {
        throw new RuntimeIOException(err);
    }
    header.addMetaDataLine(new VCFInfoHeaderLine(INFO_TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Variant of VCFComposite"));
    if (!StringUtils.isBlank(this.filterNonCompositeTag)) {
        header.addMetaDataLine(new VCFFilterHeaderLine(this.filterNonCompositeTag, "Not a Variant fir VCFComposite"));
    }
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    final Comparator<String> contigCmp;
    if (dict == null || dict.isEmpty()) {
        contigCmp = (A, B) -> A.compareTo(B);
    } else {
        contigCmp = new ContigDictComparator(dict);
    }
    final Comparator<VariantContext> ctxComparator = (V1, V2) -> {
        int i = contigCmp.compare(V1.getContig(), V2.getContig());
        if (i != 0)
            return i;
        i = Integer.compare(V1.getStart(), V2.getStart());
        if (i != 0)
            return i;
        return V1.getReference().compareTo(V2.getReference());
    };
    final Comparator<VariantLine> variantLineComparator = (V1, V2) -> {
        final int i = ctxComparator.compare(V1.ctx, V2.ctx);
        if (i != 0)
            return i;
        return Long.compare(V1.id, V2.id);
    };
    long ID_GENERATOR = 0L;
    this.vcfDecoder = VCFUtils.createDefaultVCFCodec();
    this.vcfDecoder.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
    this.vcfEncoder = new VCFEncoder(header, false, true);
    SortingCollection<GeneAndVariant> sorting = null;
    SortingCollection<VariantLine> outputSorter = null;
    try {
        LOG.info("reading variants and genes");
        /* Gene and variant sorter */
        sorting = SortingCollection.newInstance(GeneAndVariant.class, new GeneAndVariantCodec(), GeneAndVariant::compareGeneThenIndex, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        sorting.setDestructiveIteration(true);
        /* Variant sorter */
        outputSorter = SortingCollection.newInstance(VariantLine.class, new VariantLineCodec(), variantLineComparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        outputSorter.setDestructiveIteration(true);
        /* read input */
        while (iterin.hasNext()) {
            final VariantContext ctx = iterin.next();
            final VariantLine variantLine = new VariantLine(++ID_GENERATOR, ctx);
            if (!this.variantJexl.test(ctx)) {
                outputSorter.add(variantLine);
                continue;
            }
            if (!acceptVariant(ctx)) {
                outputSorter.add(variantLine);
                continue;
            }
            final Set<GeneIdentifier> geneKeys = new HashSet<>();
            extractors.stream().map(EX -> EX.apply(ctx)).flatMap(H -> H.keySet().stream()).forEach(KG -> {
                geneKeys.add(new GeneIdentifier(KG.getKey(), KG.getGene(), KG.getMethod().replace('/', '_')));
            });
            if (geneKeys.isEmpty()) {
                outputSorter.add(variantLine);
                continue;
            }
            for (final GeneIdentifier gk : geneKeys) {
                final GeneAndVariant gav = new GeneAndVariant(gk, variantLine);
                gav.gene.contig = ctx.getContig();
                sorting.add(gav);
            }
        }
        sorting.doneAdding();
        LOG.info("compile per gene");
        this.reportGeneWriter = (this.geneReportPath == null ? new PrintWriter(new NullOuputStream()) : IOUtils.openPathForPrintWriter(this.geneReportPath));
        this.reportGeneWriter.print("#CHROM");
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print("bed.start");
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print("bed.end");
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print("gene.key");
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print("gene.label");
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print("gene.source");
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print("count.variants");
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print("affected.counts");
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print("affected.total");
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print("affected.samples");
        this.reportGeneWriter.println();
        this.reportWriter = (this.reportPath == null ? new PrintWriter(new NullOuputStream()) : IOUtils.openPathForPrintWriter(this.reportPath));
        this.reportWriter.print("#CHROM");
        this.reportWriter.print('\t');
        this.reportWriter.print("bed.start");
        this.reportWriter.print('\t');
        this.reportWriter.print("bed.end");
        this.reportWriter.print('\t');
        this.reportWriter.print("gene.index");
        this.reportWriter.print('\t');
        this.reportWriter.print("gene.key");
        this.reportWriter.print('\t');
        this.reportWriter.print("gene.label");
        this.reportWriter.print('\t');
        this.reportWriter.print("gene.source");
        for (int side = 0; side < 2; ++side) {
            this.reportWriter.print('\t');
            final String prefix = "variant" + (side + 1) + ".";
            this.reportWriter.print(prefix + "start");
            this.reportWriter.print('\t');
            this.reportWriter.print(prefix + "end");
            this.reportWriter.print('\t');
            this.reportWriter.print(prefix + "ref");
            this.reportWriter.print('\t');
            this.reportWriter.print(prefix + "alt");
            this.reportWriter.print('\t');
            this.reportWriter.print(prefix + "info");
            for (final Sample sn : this.affectedSamples) {
                this.reportWriter.print('\t');
                this.reportWriter.print(prefix + "gt[" + sn.getId() + "].affected");
            }
            for (final Sample sn : this.unaffectedSamples) {
                this.reportWriter.print('\t');
                this.reportWriter.print(prefix + "gt[" + sn.getId() + "].unaffected");
            }
        }
        this.reportWriter.println();
        // compile data
        CloseableIterator<GeneAndVariant> iter2 = sorting.iterator();
        EqualRangeIterator<GeneAndVariant> eqiter = new EqualRangeIterator<>(iter2, (A, B) -> A.gene.compareTo(B.gene));
        while (eqiter.hasNext()) {
            final List<GeneAndVariant> variants = eqiter.next();
            scan(variants.get(0).gene, variants.stream().map(L -> L.variant).collect(Collectors.toList()));
            for (final GeneAndVariant ga : variants) outputSorter.add(ga.variant);
        }
        eqiter.close();
        iter2.close();
        sorting.cleanup();
        // 
        this.reportWriter.flush();
        this.reportWriter.close();
        this.reportGeneWriter.flush();
        this.reportGeneWriter.close();
        LOG.info("write variants");
        CloseableIterator<VariantLine> iter1 = outputSorter.iterator();
        EqualRangeIterator<VariantLine> eqiter1 = new EqualRangeIterator<>(iter1, variantLineComparator);
        out.writeHeader(header);
        while (eqiter1.hasNext()) {
            final List<VariantLine> array = eqiter1.next();
            final VariantContext firstCtx = array.get(0).ctx;
            final Set<String> set = getAnnotationsForVariant(firstCtx);
            final VariantContext outCtx;
            final VariantContextBuilder vcb = new VariantContextBuilder(firstCtx);
            for (int y = 1; y < array.size(); ++y) {
                set.addAll(getAnnotationsForVariant(array.get(y).ctx));
            }
            if (set.isEmpty()) {
                if (StringUtils.isBlank(this.filterNonCompositeTag)) {
                    // ignore
                    continue;
                } else {
                    vcb.filter(this.filterNonCompositeTag);
                }
            } else {
                if (!firstCtx.isFiltered()) {
                    vcb.passFilters();
                }
                vcb.attribute(INFO_TAG, new ArrayList<>(set));
            }
            outCtx = vcb.make();
            out.add(outCtx);
        }
        outputSorter.cleanup();
        eqiter1.close();
        iter1.close();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    }
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) Program(com.github.lindenb.jvarkit.util.jcommander.Program) GeneExtractorFactory(com.github.lindenb.jvarkit.util.vcf.predictions.GeneExtractorFactory) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFEncoder(htsjdk.variant.vcf.VCFEncoder) VCFHeaderVersion(htsjdk.variant.vcf.VCFHeaderVersion) DataOutputStream(java.io.DataOutputStream) StringUtil(htsjdk.samtools.util.StringUtil) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) AbstractVCFCodec(htsjdk.variant.vcf.AbstractVCFCodec) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) EOFException(java.io.EOFException) Collectors(java.util.stream.Collectors) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) JexlGenotypePredicate(com.github.lindenb.jvarkit.util.vcf.JexlGenotypePredicate) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) Comparator(java.util.Comparator) Sample(com.github.lindenb.jvarkit.pedigree.Sample) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) PrintWriter(java.io.PrintWriter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Sample(com.github.lindenb.jvarkit.pedigree.Sample) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) EOFException(java.io.EOFException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) VCFEncoder(htsjdk.variant.vcf.VCFEncoder) GeneExtractorFactory(com.github.lindenb.jvarkit.util.vcf.predictions.GeneExtractorFactory) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Aggregations

Sample (com.github.lindenb.jvarkit.pedigree.Sample)17 PedigreeParser (com.github.lindenb.jvarkit.pedigree.PedigreeParser)14 Parameter (com.beust.jcommander.Parameter)13 Pedigree (com.github.lindenb.jvarkit.pedigree.Pedigree)13 Program (com.github.lindenb.jvarkit.util.jcommander.Program)13 Logger (com.github.lindenb.jvarkit.util.log.Logger)13 Genotype (htsjdk.variant.variantcontext.Genotype)13 VariantContext (htsjdk.variant.variantcontext.VariantContext)13 Path (java.nio.file.Path)13 List (java.util.List)13 VCFHeader (htsjdk.variant.vcf.VCFHeader)12 ArrayList (java.util.ArrayList)12 Collectors (java.util.stream.Collectors)12 Set (java.util.Set)11 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)10 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)10 IOException (java.io.IOException)10 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)9 PrintWriter (java.io.PrintWriter)9 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)8