Search in sources :

Example 6 with Pedigree

use of com.github.lindenb.jvarkit.pedigree.Pedigree in project jvarkit by lindenb.

the class VCFFamilies method doVcfToVcf.

@Override
public int doVcfToVcf(final String inputName, VCFIterator r, final VariantContextWriter w) {
    final VCFHeader header = r.getHeader();
    final Pedigree pedigree;
    try {
        pedigree = new PedigreeParser().parse(this.pedigreeFile);
    } catch (final Throwable err) {
        throw new RuntimeIOException(err);
    }
    if (pedigree == null || pedigree.isEmpty()) {
        throw new RuntimeIOException("Pedigree null/empty");
    }
    final VCFHeader h2 = new VCFHeader(header);
    final Map<String, FamilyInfo> famidToFamilyInfo = new HashMap<>();
    pedigree.getSamplesInVcfHeader(header).forEach(P -> {
        final Family pedFamily = P.getFamily();
        FamilyInfo finfo = famidToFamilyInfo.get(pedFamily.getId());
        if (finfo == null) {
            finfo = new FamilyInfo(pedFamily);
            famidToFamilyInfo.put(pedFamily.getId(), finfo);
        }
        finfo.samples.add(P.getId());
    });
    famidToFamilyInfo.values().stream().flatMap(F -> F.getMetaDataLines().stream()).forEach(H -> h2.addMetaDataLine(H));
    JVarkitVersion.getInstance().addMetaData(this, h2);
    w.writeHeader(h2);
    while (r.hasNext()) {
        final VariantContext ctx = r.next();
        final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
        final List<Allele> alts = ctx.getAlternateAlleles();
        famidToFamilyInfo.values().forEach(F -> F.visit(vcb, ctx, alts));
        w.add(vcb.make());
    }
    w.close();
    return 0;
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) VCFIterator(htsjdk.variant.vcf.VCFIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) ParametersDelegate(com.beust.jcommander.ParametersDelegate) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Map(java.util.Map) Path(java.nio.file.Path) VCFConstants(htsjdk.variant.vcf.VCFConstants) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) JexlGenotypePredicate(com.github.lindenb.jvarkit.util.vcf.JexlGenotypePredicate) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) Family(com.github.lindenb.jvarkit.pedigree.Family) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Family(com.github.lindenb.jvarkit.pedigree.Family) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 7 with Pedigree

use of com.github.lindenb.jvarkit.pedigree.Pedigree in project jvarkit by lindenb.

the class VCFTrios method doVcfToVcf.

@Override
public int doVcfToVcf(final String inputName, VCFIterator r, final VariantContextWriter w) {
    long count_incompats = 0L;
    final Set<String> sampleNotFoundInVcf = new HashSet<>();
    Pedigree pedigree = null;
    final List<TrioTriple> trios = new ArrayList<>();
    try {
        final DeNovoDetector detector = new DeNovoDetector();
        detector.setConvertingNoCallToHomRef(this.nocall_to_homref);
        final VCFHeader header = r.getHeader();
        final PedigreeParser pedParser = new PedigreeParser();
        pedigree = pedParser.parse(this.pedigreeFile);
        final VCFHeader h2 = new VCFHeader(header);
        final Set<VCFHeaderLine> meta = new HashSet<>();
        meta.add(new VCFInfoHeaderLine(this.attributeName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Samples with mendelian incompatibilities." + (this.pedigreeFile == null ? "" : " Pedigree File was : " + this.pedigreeFile)));
        meta.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY, true));
        if (!StringUtil.isBlank(this.filterAnyIncompat)) {
            meta.add(new VCFFilterHeaderLine(this.filterAnyIncompat, "Variant contains at least one mendelian incompatibilities"));
        }
        if (!StringUtil.isBlank(this.filterNoIncompat)) {
            meta.add(new VCFFilterHeaderLine(this.filterNoIncompat, "Variant does not contain any mendelian incompatibilities"));
        }
        meta.stream().forEach(H -> h2.addMetaDataLine(H));
        JVarkitVersion.getInstance().addMetaData(this, h2);
        for (final Trio pedTrio : pedigree.getTrios()) {
            final TrioTriple trio = new TrioTriple();
            final Sample child = pedTrio.getChild();
            trio.child_id = header.getSampleNameToOffset().getOrDefault(child.getId(), -1);
            if (trio.child_id < 0)
                continue;
            if (pedTrio.hasFather()) {
                final Sample parent = pedTrio.getFather();
                trio.father_id = header.getSampleNameToOffset().getOrDefault(parent.getId(), -1);
            }
            if (pedTrio.hasMother()) {
                final Sample parent = pedTrio.getMother();
                trio.mother_id = header.getSampleNameToOffset().getOrDefault(parent.getId(), -1);
            }
            if (trio.father_id == -1 && trio.mother_id == -1) {
                continue;
            }
            trios.add(trio);
        }
        LOG.info("trios(s) in pedigree: " + trios.size());
        final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header).logger(LOG).build();
        w.writeHeader(h2);
        while (r.hasNext()) {
            final VariantContext ctx = progress.apply(r.next());
            final Set<String> incompatibilities = new HashSet<String>();
            for (final TrioTriple trio : trios) {
                final Genotype gChild = ctx.getGenotype(trio.child_id);
                if (gChild == null)
                    throw new IllegalStateException();
                final Genotype gFather = trio.father_id < 0 ? null : ctx.getGenotype(trio.father_id);
                final Genotype gMother = trio.mother_id < 0 ? null : ctx.getGenotype(trio.mother_id);
                final DeNovoDetector.DeNovoMutation mut = detector.test(ctx, gFather, gMother, gChild);
                if (mut != null) {
                    incompatibilities.add(gChild.getSampleName());
                }
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            if (!incompatibilities.isEmpty()) {
                // set filter for samples that are not a mendelian violation
                if (!StringUtil.isBlank(this.genotypeFilterNameNoIncompat)) {
                    vcb.genotypes(ctx.getGenotypes().stream().map(G -> incompatibilities.contains(G.getSampleName()) ? G : new GenotypeBuilder(G).filters(this.genotypeFilterNameNoIncompat).make()).collect(Collectors.toList()));
                }
                ++count_incompats;
                // set INFO attribute
                vcb.attribute(attributeName, incompatibilities.toArray());
                // set FILTER
                if (!StringUtil.isBlank(this.filterAnyIncompat)) {
                    vcb.filter(this.filterAnyIncompat);
                } else if (!ctx.isFiltered()) {
                    vcb.passFilters();
                }
            } else // No denovo
            {
                // dicard variant
                if (this.discard_variants_without_mendelian_incompat) {
                    continue;
                }
                // set filters
                if (!StringUtil.isBlank(this.filterNoIncompat)) {
                    vcb.filter(this.filterNoIncompat);
                } else if (!ctx.isFiltered()) {
                    vcb.passFilters();
                }
            }
            w.add(vcb.make());
        }
        progress.close();
        LOG.info("incompatibilitie(s) N=" + count_incompats);
        if (!sampleNotFoundInVcf.isEmpty()) {
            LOG.info("SAMPLE(S) not found: " + String.join(" / ", sampleNotFoundInVcf));
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) Trio(com.github.lindenb.jvarkit.pedigree.Trio) Sample(com.github.lindenb.jvarkit.pedigree.Sample) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Example 8 with Pedigree

use of com.github.lindenb.jvarkit.pedigree.Pedigree in project jvarkit by lindenb.

the class SwingVcfView method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        final List<Path> all_vcf_paths = new ArrayList<>(IOUtils.unrollPaths(args));
        if (all_vcf_paths.isEmpty()) {
            final JFileChooser jfc = new JFileChooser();
            jfc.setMultiSelectionEnabled(true);
            jfc.setFileFilter(new FileFilter() {

                @Override
                public String getDescription() {
                    return "Indexed Variant File";
                }

                @Override
                public boolean accept(final File f) {
                    if (f.isDirectory())
                        return true;
                    if (!f.canRead())
                        return false;
                    if (FileExtensions.VCF_LIST.stream().noneMatch(X -> f.getName().endsWith(X)))
                        return false;
                    return true;
                }
            });
            if (jfc.showOpenDialog(null) != JFileChooser.APPROVE_OPTION)
                return -1;
            final File[] sel = jfc.getSelectedFiles();
            if (sel == null || sel.length == 0)
                return -1;
            Arrays.asList(sel).stream().map(F -> F.toPath()).forEach(P -> all_vcf_paths.add(P));
        }
        for (final Path path : all_vcf_paths) {
            IOUtil.assertFileIsReadable(path);
        }
        /**
         * create VCF if needed
         */
        for (final Path path : all_vcf_paths) {
            try {
                ActionIndexVcf.indexVcf(path, false);
            } catch (IOException err) {
                ThrowablePane.show(null, err);
                return -1;
            }
        }
        final Pedigree ped;
        if (this.pedigreePath != null) {
            ped = new PedigreeParser().parse(this.pedigreePath);
        } else {
            ped = null;
        }
        JFrame.setDefaultLookAndFeelDecorated(true);
        final Dimension screen = Toolkit.getDefaultToolkit().getScreenSize();
        SwingUtilities.invokeAndWait(() -> {
            int dx = 0;
            for (final Path vcfPath : all_vcf_paths) {
                final XFrame frame = new XFrame(vcfPath, defaultRegion, this.limit_number_variant, ped, this.gffPath);
                frame.setBounds(50 + dx, 50 + dx, screen.width - 100, screen.height - 100);
                frame.setVisible(true);
                dx += 2;
            }
        });
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : Path(java.nio.file.Path) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) LineIterator(htsjdk.tribble.readers.LineIterator) VCFHeader(htsjdk.variant.vcf.VCFHeader) IntervalParserFactory(com.github.lindenb.jvarkit.samtools.util.IntervalParserFactory) Panel(java.awt.Panel) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) Prediction(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffLofNmdParser.Prediction) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) JFileChooser(javax.swing.JFileChooser) JFrame(javax.swing.JFrame) SwingVariantsTableModel(com.github.lindenb.jvarkit.variant.swing.SwingVariantsTableModel) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SimpleInterval(com.github.lindenb.jvarkit.samtools.util.SimpleInterval) UrlSupplier(com.github.lindenb.jvarkit.net.UrlSupplier) SnpEffLofNmdParser(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffLofNmdParser) SwingGff3TableModel(com.github.lindenb.jvarkit.gff3.SwingGff3TableModel) WindowAdapter(java.awt.event.WindowAdapter) SwingPedigreeTableModel(com.github.lindenb.jvarkit.variant.swing.SwingPedigreeTableModel) Component(java.awt.Component) SwingVCFGenotypesTableModel(com.github.lindenb.jvarkit.variant.swing.SwingVCFGenotypesTableModel) Stream(java.util.stream.Stream) AbstractAction(javax.swing.AbstractAction) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) EmptyBorder(javax.swing.border.EmptyBorder) JPanel(javax.swing.JPanel) ListSelectionModel(javax.swing.ListSelectionModel) Genotype(htsjdk.variant.variantcontext.Genotype) CloseableIterator(htsjdk.samtools.util.CloseableIterator) JSplitPane(javax.swing.JSplitPane) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Parameter(com.beust.jcommander.Parameter) Action(javax.swing.Action) ArrayList(java.util.ArrayList) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) JTabbedPane(javax.swing.JTabbedPane) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) TabixReader(htsjdk.tribble.readers.TabixReader) SwingVCFInfoHeaderLineTableModel(com.github.lindenb.jvarkit.variant.swing.SwingVCFInfoHeaderLineTableModel) FlowLayout(java.awt.FlowLayout) Counter(com.github.lindenb.jvarkit.util.Counter) JButton(javax.swing.JButton) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) IOException(java.io.IOException) FileFilter(javax.swing.filechooser.FileFilter) MouseEvent(java.awt.event.MouseEvent) File(java.io.File) JScrollPane(javax.swing.JScrollPane) DefaultListModel(javax.swing.DefaultListModel) Gff3Codec(htsjdk.tribble.gff.Gff3Codec) SwingVCFInfoTableModel(com.github.lindenb.jvarkit.variant.swing.SwingVCFInfoTableModel) BufferedReader(java.io.BufferedReader) AbstractGenericTable(com.github.lindenb.jvarkit.util.swing.AbstractGenericTable) SwingAnnPredictionTableModel(com.github.lindenb.jvarkit.variant.swing.SwingAnnPredictionTableModel) IOUtil(htsjdk.samtools.util.IOUtil) ThrowablePane(com.github.lindenb.jvarkit.util.swing.ThrowablePane) VariantContextUtils(htsjdk.variant.variantcontext.VariantContextUtils) MouseAdapter(java.awt.event.MouseAdapter) JexlVCMatchExp(htsjdk.variant.variantcontext.VariantContextUtils.JexlVCMatchExp) URI(java.net.URI) BorderLayout(java.awt.BorderLayout) TableModel(javax.swing.table.TableModel) JMenuBar(javax.swing.JMenuBar) Logger(com.github.lindenb.jvarkit.util.log.Logger) GenotypeType(htsjdk.variant.variantcontext.GenotypeType) JMenu(javax.swing.JMenu) BorderFactory(javax.swing.BorderFactory) Collectors(java.util.stream.Collectors) WindowEvent(java.awt.event.WindowEvent) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) Dimension(java.awt.Dimension) List(java.util.List) FileExtensions(htsjdk.samtools.util.FileExtensions) JSeparator(javax.swing.JSeparator) JCheckBox(javax.swing.JCheckBox) Optional(java.util.Optional) JTable(javax.swing.JTable) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) Toolkit(java.awt.Toolkit) JTextField(javax.swing.JTextField) HashMap(java.util.HashMap) SwingUtilities(javax.swing.SwingUtilities) SwingVepPredictionTableModel(com.github.lindenb.jvarkit.variant.swing.SwingVepPredictionTableModel) JMenuItem(javax.swing.JMenuItem) TabixIteratorLineReader(htsjdk.tribble.readers.TabixIteratorLineReader) ActionIndexVcf(com.github.lindenb.jvarkit.variant.swing.ActionIndexVcf) SwingSequenceDictionaryTableModel(com.github.lindenb.jvarkit.samtools.reference.SwingSequenceDictionaryTableModel) SwingTrioTableModel(com.github.lindenb.jvarkit.variant.swing.SwingTrioTableModel) SwingBcsqPredictionTableModel(com.github.lindenb.jvarkit.variant.swing.SwingBcsqPredictionTableModel) JComponent(javax.swing.JComponent) Gff3Feature(htsjdk.tribble.gff.Gff3Feature) Desktop(java.awt.Desktop) SwingVCFFormatHeaderLineTableModel(com.github.lindenb.jvarkit.variant.swing.SwingVCFFormatHeaderLineTableModel) JPopupMenu(javax.swing.JPopupMenu) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) JList(javax.swing.JList) VCFReader(htsjdk.variant.vcf.VCFReader) SwingAllelesTableModel(com.github.lindenb.jvarkit.variant.swing.SwingAllelesTableModel) JOptionPane(javax.swing.JOptionPane) ActionEvent(java.awt.event.ActionEvent) SmooveGenesParser(com.github.lindenb.jvarkit.util.vcf.predictions.SmooveGenesParser) JLabel(javax.swing.JLabel) SwingVCFFilterHeaderLineTableModel(com.github.lindenb.jvarkit.variant.swing.SwingVCFFilterHeaderLineTableModel) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) Collections(java.util.Collections) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Dimension(java.awt.Dimension) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) JFileChooser(javax.swing.JFileChooser) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) FileFilter(javax.swing.filechooser.FileFilter) File(java.io.File)

Example 9 with Pedigree

use of com.github.lindenb.jvarkit.pedigree.Pedigree in project jvarkit by lindenb.

the class VcfBurdenFisherH method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator r, final VariantContextWriter w) {
    final PrintWriter report;
    if (this.bedExportPath == null) {
        report = new PrintWriter(new NullOuputStream());
    } else {
        report = new PrintWriter(IOUtil.openFileForBufferedUtf8Writing(this.bedExportPath));
    }
    final VCFHeader header = r.getHeader();
    final VCFHeader h2 = new VCFHeader(header);
    final Pedigree pedigree;
    try {
        pedigree = new PedigreeParser().parse(this.pedigreeFile);
    } catch (final IOException error) {
        throw new RuntimeIOException(error);
    }
    final Set<Sample> individualSet = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isStatusSet()).collect(Collectors.toSet());
    if (individualSet.isEmpty())
        throw new IllegalArgumentException("No overlapping samples between header and pedigree.");
    final VCFInfoHeaderLine fisherAlleleInfoHeader = new VCFInfoHeaderLine(this.burdenHFisherTag, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Fisher Exact Test Case/Control.");
    final VCFFilterHeaderLine variantFilterHeader;
    if (StringUtils.isBlank(this.softFilterName)) {
        variantFilterHeader = null;
    } else {
        variantFilterHeader = new VCFFilterHeaderLine(this.softFilterName, "Fisher case:control vs miss|have ALT not between " + this.min_fisher + " and " + this.max_fisher);
        h2.addMetaDataLine(variantFilterHeader);
    }
    final VCFFilterHeaderLine filterCtrlgtCaseRatio;
    if (StringUtils.isBlank(this.filterCtrlgtCaseRatioStr)) {
        filterCtrlgtCaseRatio = null;
    } else {
        filterCtrlgtCaseRatio = new VCFFilterHeaderLine(this.filterCtrlgtCaseRatioStr, "The number of CONTROLS carrying the ALT allele is creater than the number of CASES carrying the ALT allele.");
        h2.addMetaDataLine(filterCtrlgtCaseRatio);
    }
    final VCFInfoHeaderLine fisherDetailInfoHeader = new VCFInfoHeaderLine(this.burdenHFisherTag + "Detail", VCFHeaderLineCount.A, VCFHeaderLineType.String, "Fisher Exact Test Case/Control");
    h2.addMetaDataLine(fisherAlleleInfoHeader);
    h2.addMetaDataLine(fisherDetailInfoHeader);
    JVarkitVersion.getInstance().addMetaData(this, h2);
    w.writeHeader(h2);
    while (r.hasNext()) {
        final VariantContext ctx = r.next();
        final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
        if (this.overwrite_qual)
            vcb.log10PError(VariantContext.NO_LOG10_PERROR);
        vcb.rmAttribute(fisherAlleleInfoHeader.getID());
        vcb.rmAttribute(fisherDetailInfoHeader.getID());
        final Set<String> oldFilters = new HashSet<>(ctx.getFilters());
        if (variantFilterHeader != null)
            oldFilters.remove(variantFilterHeader.getID());
        if (filterCtrlgtCaseRatio != null)
            oldFilters.remove(filterCtrlgtCaseRatio.getID());
        if (this.ignoreFiltered && ctx.isFiltered()) {
            w.add(vcb.make());
            continue;
        }
        boolean set_filter_in_range = true;
        boolean set_filter_case_ctrl_ratio = true;
        boolean found_one_alt = false;
        final List<String> infoData = new ArrayList<>(ctx.getAlleles().size());
        final List<Double> fisherValues = new ArrayList<>(ctx.getAlleles().size());
        for (final Allele observed_alt : ctx.getAlternateAlleles()) {
            if (observed_alt.isNoCall()) {
                infoData.add(String.join("|", "ALLELE", String.valueOf(observed_alt.getDisplayString()), "FISHER", "1.0"));
                fisherValues.add(1.0);
                continue;
            }
            /* count for fisher allele */
            int count_case_have_alt = 0;
            int count_case_miss_alt = 0;
            int count_ctrl_have_alt = 0;
            int count_ctrl_miss_alt = 0;
            /* loop over persons in this pop */
            for (final Sample p : individualSet) {
                /* get genotype for this individual */
                final Genotype genotype = ctx.getGenotype(p.getId());
                /* individual is not in vcf header */
                if (genotype == null || !genotype.isCalled() || (this.ignore_filtered_genotype && genotype.isFiltered())) {
                    if (genotype == null)
                        LOG.warn("Genotype is null for sample " + p.getId() + " not is pedigree!");
                    // no information , we consider that sample was called AND HOM REF
                    if (p.isAffected()) {
                        count_case_miss_alt++;
                    } else {
                        count_ctrl_miss_alt++;
                    }
                    continue;
                }
                /* loop over alleles */
                final boolean genotype_contains_allele = genotype.getAlleles().stream().anyMatch(A -> A.equals(observed_alt));
                /* fisher */
                if (genotype_contains_allele) {
                    if (p.isAffected()) {
                        count_case_have_alt++;
                        ;
                    } else {
                        count_ctrl_have_alt++;
                    }
                } else {
                    if (p.isAffected()) {
                        count_case_miss_alt++;
                    } else {
                        count_ctrl_miss_alt++;
                    }
                }
            }
            /* end of loop over persons */
            /* fisher test for alleles */
            final FisherExactTest fisherAlt = FisherExactTest.compute(count_case_have_alt, count_case_miss_alt, count_ctrl_have_alt, count_ctrl_miss_alt);
            fisherValues.add(fisherAlt.getAsDouble());
            infoData.add(String.join("|", "ALLELE", String.valueOf(observed_alt.getDisplayString()), "FISHER", String.valueOf(fisherAlt.getAsDouble()), "CASE_HAVE_ALT", String.valueOf(count_case_have_alt), "CASE_MISS_ALT", String.valueOf(count_case_miss_alt), "CTRL_HAVE_ALT", String.valueOf(count_ctrl_have_alt), "CTRL_MISS_ALT", String.valueOf(count_ctrl_miss_alt)));
            found_one_alt = true;
            final boolean is_in_range = this.min_fisher <= fisherAlt.getAsDouble() && fisherAlt.getAsDouble() <= this.max_fisher;
            final int total_ctrls = count_ctrl_have_alt + count_ctrl_miss_alt;
            final int total_cases = count_case_have_alt + count_case_miss_alt;
            // check ratio case/control
            if (total_ctrls > 0 && total_cases > 0 && (count_case_have_alt / (double) total_cases) >= (count_ctrl_have_alt / (double) total_ctrls)) {
                set_filter_case_ctrl_ratio = false;
            }
            if (is_in_range) {
                set_filter_in_range = false;
            }
            if (this.bedExportPath != null && is_in_range) {
                report.print(ctx.getContig());
                report.print('\t');
                report.print(ctx.getStart() - 1);
                report.print('\t');
                report.print(ctx.getEnd());
                report.print('\t');
                report.print(ctx.getReference().getDisplayString());
                report.print('\t');
                report.print(observed_alt.getDisplayString());
                report.print('\t');
                report.print(fisherAlt.getAsDouble());
                report.print('\t');
                report.print(count_case_have_alt);
                report.print('\t');
                report.print(count_case_miss_alt);
                report.print('\t');
                report.print(count_ctrl_have_alt);
                report.print('\t');
                report.print(count_ctrl_miss_alt);
                report.println();
            }
        }
        // better than nothing, otherwise i'll mess with the filters
        if (!found_one_alt) {
            w.add(vcb.make());
            continue;
        }
        // soft filter, skip variant
        if ((set_filter_in_range && variantFilterHeader == null) || (set_filter_case_ctrl_ratio && filterCtrlgtCaseRatio == null))
            // skip variant
            continue;
        vcb.attribute(fisherAlleleInfoHeader.getID(), fisherValues);
        vcb.attribute(fisherDetailInfoHeader.getID(), infoData);
        if (this.overwrite_qual) {
            final OptionalDouble minV = fisherValues.stream().mapToDouble(V -> V.doubleValue()).min();
            if (minV.isPresent())
                vcb.log10PError(Math.max(1.0E-100, /* arbitrary */
                minV.getAsDouble()) / -10);
        }
        if (set_filter_in_range && variantFilterHeader != null) {
            vcb.filter(variantFilterHeader.getID());
            // only set this one if the filter is set above
            if (set_filter_case_ctrl_ratio && filterCtrlgtCaseRatio != null) {
                vcb.filter(filterCtrlgtCaseRatio.getID());
            }
        }
        w.add(vcb.make());
    }
    w.close();
    report.flush();
    report.close();
    return 0;
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) VCFIterator(htsjdk.variant.vcf.VCFIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) OptionalDouble(java.util.OptionalDouble) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) List(java.util.List) FractionConverter(com.github.lindenb.jvarkit.jcommander.converter.FractionConverter) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Sample(com.github.lindenb.jvarkit.pedigree.Sample) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Sample(com.github.lindenb.jvarkit.pedigree.Sample) Genotype(htsjdk.variant.variantcontext.Genotype) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) OptionalDouble(java.util.OptionalDouble) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) OptionalDouble(java.util.OptionalDouble) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder)

Example 10 with Pedigree

use of com.github.lindenb.jvarkit.pedigree.Pedigree in project jvarkit by lindenb.

the class VcfStretchOfGt method doWork.

@Override
public int doWork(final List<String> args) {
    VCFIterator iter = null;
    PrintWriter w = null;
    try {
        iter = super.openVCFIterator(oneFileOrNull(args));
        final VCFHeader header = iter.getHeader();
        if (!header.hasGenotypingData()) {
            LOG.error("No genotype in input");
            return -1;
        }
        final Set<String> vcf_samples = new HashSet<>(header.getSampleNamesInOrder());
        final List<SampleSet> all_sample_set = new ArrayList<>();
        if (!StringUtils.isBlank(this.affectedStr)) {
            final Set<String> affected = Arrays.stream(this.affectedStr.split("[ ,;]+")).filter(S -> !StringUtils.isBlank(S)).filter(S -> vcf_samples.contains(S)).collect(Collectors.toSet());
            if (affected.isEmpty()) {
                LOG.error("No affected sample in string " + this.affectedStr);
                return -1;
            }
            /* at least 2 because singleton sample are created later */
            if (affected.size() > 1) {
                all_sample_set.add(new SampleSet(vcf_samples, affected));
            }
        }
        if (this.pedigreePath != null) {
            final Pedigree pedigree = new PedigreeParser().parse(this.pedigreePath);
            final Set<String> affected = pedigree.getAffectedSamples().stream().map(P -> P.getId()).filter(ID -> header.getSampleNameToOffset().containsKey(ID)).collect(Collectors.toSet());
            if (affected.isEmpty()) {
                LOG.error("No affected sample in pedigree " + this.pedigreePath);
                return -1;
            }
            /* at least 2 because singleton sample are created later */
            if (affected.size() > 1) {
                all_sample_set.add(new SampleSet(vcf_samples, affected));
            }
        }
        /* singletons */
        for (final String sn : vcf_samples) {
            final SampleSet sampleSet = new SampleSet(vcf_samples, Collections.singleton(sn));
            all_sample_set.add(sampleSet);
        }
        w = super.openPathOrStdoutAsPrintWriter(this.outputFile);
        w.print("#chrom");
        w.print('\t');
        w.print("start0");
        w.print('\t');
        w.print("end0");
        w.print('\t');
        w.print("length");
        w.print('\t');
        w.print("samples");
        w.print('\t');
        w.print("count.affected.variants");
        w.print('\t');
        w.print("average.affected.depth");
        w.print('\t');
        w.print("count.other.variants");
        w.println();
        final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header).logger(LOG).build();
        while (iter.hasNext()) {
            final VariantContext ctx = progress.apply(iter.next());
            for (final SampleSet snSet : all_sample_set) snSet.visit(w, ctx);
        }
        for (final SampleSet snSet : all_sample_set) snSet.dump(w);
        w.flush();
        w.close();
        w = null;
        iter.close();
        iter = null;
        progress.close();
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(w);
    }
}
Also used : PrintWriter(java.io.PrintWriter) Genotype(htsjdk.variant.variantcontext.Genotype) Arrays(java.util.Arrays) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) Predicate(java.util.function.Predicate) VCFHeader(htsjdk.variant.vcf.VCFHeader) Logger(com.github.lindenb.jvarkit.util.log.Logger) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) Set(java.util.Set) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) CoordMath(htsjdk.samtools.util.CoordMath) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VariantContext(htsjdk.variant.variantcontext.VariantContext) Path(java.nio.file.Path) Collections(java.util.Collections) CloserUtil(htsjdk.samtools.util.CloserUtil) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFIterator(htsjdk.variant.vcf.VCFIterator) PrintWriter(java.io.PrintWriter) HashSet(java.util.HashSet)

Aggregations

Pedigree (com.github.lindenb.jvarkit.pedigree.Pedigree)14 PedigreeParser (com.github.lindenb.jvarkit.pedigree.PedigreeParser)14 Parameter (com.beust.jcommander.Parameter)13 Program (com.github.lindenb.jvarkit.util.jcommander.Program)13 Logger (com.github.lindenb.jvarkit.util.log.Logger)13 VCFHeader (htsjdk.variant.vcf.VCFHeader)13 Path (java.nio.file.Path)13 Genotype (htsjdk.variant.variantcontext.Genotype)12 VariantContext (htsjdk.variant.variantcontext.VariantContext)12 List (java.util.List)12 ArrayList (java.util.ArrayList)11 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)10 Collectors (java.util.stream.Collectors)10 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)9 Sample (com.github.lindenb.jvarkit.pedigree.Sample)9 PrintWriter (java.io.PrintWriter)9 Set (java.util.Set)9 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)8 HashSet (java.util.HashSet)8 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)7