Search in sources :

Example 51 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class Biostar130456 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.filepattern == null || !filepattern.contains(SAMPLE_TAG)) {
        LOG.error("File pattern is missing " + SAMPLE_TAG);
        return -1;
    }
    PrintStream out = null;
    VcfIterator in = null;
    final String inputName = oneFileOrNull(args);
    try {
        out = openFileOrStdoutAsPrintStream(outputFile);
        in = super.openVcfIterator(inputName);
        final VCFHeader header = in.getHeader();
        this.recalculator.setHeader(header);
        final Set<String> samples = new HashSet<String>(header.getSampleNamesInOrder());
        final Map<String, VariantContextWriter> sample2writer = new HashMap<String, VariantContextWriter>(samples.size());
        if (samples.isEmpty()) {
            LOG.error("VCF doesn't contain any sample");
            return -1;
        }
        LOG.info("N sample:" + samples.size());
        for (final String sample : samples) {
            final VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), Collections.singleton(sample));
            super.addMetaData(h2);
            final String sampleFile = filepattern.replaceAll(SAMPLE_TAG, sample);
            out.println(sampleFile);
            final File fout = new File(sampleFile);
            if (fout.getParentFile() != null)
                fout.getParentFile().mkdirs();
            final VariantContextWriter w = VCFUtils.createVariantContextWriter(fout);
            w.writeHeader(h2);
            sample2writer.put(sample, w);
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (in.hasNext()) {
            final VariantContext ctx = progress.watch(in.next());
            for (final String sample : samples) {
                final Genotype g = ctx.getGenotype(sample);
                if (g == null)
                    continue;
                if (remove_uncalled && (!g.isAvailable() || !g.isCalled() || g.isNoCall())) {
                    continue;
                }
                if (remove_homref && g.isHomRef())
                    continue;
                final VariantContextWriter w = sample2writer.get(sample);
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                final GenotypeBuilder gb = new GenotypeBuilder(g);
                vcb.genotypes(Collections.singletonList(gb.make()));
                final VariantContext ctx2 = this.recalculator.apply(vcb.make());
                w.add(ctx2);
            }
        }
        for (final String sample : samples) {
            LOG.info("Closing for sample " + sample);
            final VariantContextWriter w = sample2writer.get(sample);
            w.close();
        }
        progress.finish();
        out.flush();
        return RETURN_OK;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(out);
        CloserUtil.close(in);
    }
}
Also used : PrintStream(java.io.PrintStream) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 52 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class CaseControlJfx method doWork.

@Override
public int doWork(final Stage primaryStage, final List<String> args) {
    final VariantPartition partition;
    Pedigree pedigree = null;
    VcfIterator in = null;
    try {
        switch(this.partitionType) {
            case variantType:
                partition = new VariantTypePartition();
                break;
            case chromosome:
                partition = new ChromosomePartition();
                break;
            case autosomes:
                partition = new SexualContigPartition();
                break;
            case qual:
                partition = new QualPartition();
                break;
            case vqslod:
                partition = new VQSLODPartition();
                break;
            case typeFilter:
                partition = new TypeAndFilterPartiton();
                break;
            case distance:
                partition = new DisanceToDiagonalPartiton();
                break;
            case n_alts:
                partition = new NAltsPartition();
                break;
            default:
                throw new IllegalStateException(this.partitionType.name());
        }
        if (args.isEmpty()) {
            in = VCFUtils.createVcfIteratorStdin();
            primaryStage.setTitle(CaseControlJfx.class.getSimpleName());
        } else if (args.size() == 1) {
            in = VCFUtils.createVcfIterator(args.get(0));
            primaryStage.setTitle(args.get(0));
        } else {
            LOG.error("Illegal Number of arguments: " + args);
            return -1;
        }
        if (this.pedigreeFile != null) {
            pedigree = Pedigree.newParser().parse(this.pedigreeFile);
        } else {
            pedigree = Pedigree.newParser().parse(in.getHeader());
        }
        if (this.controlTag != null) {
            final VCFInfoHeaderLine infoHeaderLine = in.getHeader().getInfoHeaderLine(this.controlTag);
            if (infoHeaderLine == null) {
                LOG.error("No such attribute in the VCF header: " + this.controlTag);
                return -1;
            }
        }
        int count = 0;
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
        while (in.hasNext() && (this.limit_to_N_variants < 0 || count < this.limit_to_N_variants)) {
            final VariantContext ctx = progress.watch(in.next());
            if (this.ignore_ctx_filtered && ctx.isFiltered())
                continue;
            ++count;
            final List<Allele> alternates = ctx.getAlternateAlleles();
            for (int alt_idx = 0; alt_idx < alternates.size(); ++alt_idx) {
                final Allele alt = alternates.get(alt_idx);
                final Double[] mafs = { null, null };
                for (int i = 0; i < 2; ++i) {
                    if (i == 1 && this.controlTag != null) {
                        if (ctx.hasAttribute(this.controlTag)) {
                            try {
                                final List<Double> dvals = ctx.getAttributeAsDoubleList(this.controlTag, Double.NaN);
                                if (alt_idx < dvals.size() && dvals.get(alt_idx) != null) {
                                    final double d = dvals.get(alt_idx);
                                    if (!Double.isNaN(d) && d >= 0 && d <= 1.0)
                                        mafs[1] = d;
                                }
                            } catch (NumberFormatException err) {
                            }
                        }
                    } else {
                        final MafCalculator mafCalculator = new MafCalculator(alt, ctx.getContig());
                        mafCalculator.setNoCallIsHomRef(no_call_is_homref);
                        for (Pedigree.Person person : (i == 0 ? pedigree.getAffected() : pedigree.getUnaffected())) {
                            if (selectSamples.equals(SelectSamples.males) && !person.isMale())
                                continue;
                            if (selectSamples.equals(SelectSamples.females) && !person.isFemale())
                                continue;
                            final Genotype genotype = ctx.getGenotype(person.getId());
                            if (genotype == null)
                                continue;
                            if (ignore_gt_filtered && genotype.isFiltered())
                                continue;
                            mafCalculator.add(genotype, person.isMale());
                        }
                        if (!mafCalculator.isEmpty()) {
                            mafs[i] = mafCalculator.getMaf();
                        }
                    }
                }
                if (mafs[0] == null || mafs[1] == null)
                    continue;
                final XYChart.Data<Number, Number> data = new XYChart.Data<Number, Number>(mafs[0], mafs[1]);
                if (this.add_tooltip && this.outputFile == null) {
                    data.setExtraValue(ctx.getContig() + ":" + ctx.getStart());
                }
                partition.add(ctx, pedigree, data);
            }
        }
        progress.finish();
        in.close();
        in = null;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
    }
    final NumberAxis xAxis = new NumberAxis(0.0, 1.0, 0.1);
    xAxis.setLabel("Cases");
    final NumberAxis yAxis = new NumberAxis(0.0, 1.0, 0.1);
    yAxis.setLabel("Controls" + (this.controlTag == null ? "" : "[" + this.controlTag + "]"));
    final ScatterChart<Number, Number> chart = new ScatterChart<>(xAxis, yAxis);
    for (final XYChart.Series<Number, Number> series : partition.getSeries()) {
        chart.getData().add(series);
    }
    String title = "Case/Control";
    if (!args.isEmpty()) {
        title = args.get(0);
        int slash = title.lastIndexOf("/");
        if (slash != -1)
            title = title.substring(slash + 1);
        if (title.endsWith(".vcf.gz"))
            title = title.substring(0, title.length() - 7);
        if (title.endsWith(".vcf"))
            title = title.substring(0, title.length() - 4);
    }
    if (userTitle != null)
        title = userTitle;
    chart.setTitle(title);
    chart.setAnimated(false);
    chart.setLegendSide(this.legendSide);
    final VBox root = new VBox();
    MenuBar menuBar = new MenuBar();
    Menu menu = new Menu("File");
    MenuItem item = new MenuItem("Save image as...");
    item.setOnAction(AE -> {
        doMenuSave(chart);
    });
    menu.getItems().add(item);
    menu.getItems().add(new SeparatorMenuItem());
    item = new MenuItem("Quit");
    item.setOnAction(AE -> {
        Platform.exit();
    });
    menu.getItems().add(item);
    menuBar.getMenus().add(menu);
    root.getChildren().add(menuBar);
    final BorderPane contentPane = new BorderPane();
    contentPane.setCenter(chart);
    root.getChildren().add(contentPane);
    Rectangle2D screen = Screen.getPrimary().getVisualBounds();
    double minw = Math.max(Math.min(screen.getWidth(), screen.getHeight()) - 50, 50);
    chart.setPrefSize(minw, minw);
    final Scene scene = new Scene(root, minw, minw);
    primaryStage.setScene(scene);
    if (this.outputFile != null) {
        primaryStage.setOnShown(WE -> {
            LOG.info("saving as " + this.outputFile);
            try {
                saveImageAs(chart, this.outputFile);
            } catch (IOException err) {
                LOG.error(err);
                System.exit(-1);
            }
            Platform.exit();
        });
    }
    primaryStage.show();
    if (this.outputFile == null) {
        // http://stackoverflow.com/questions/14117867
        for (final XYChart.Series<Number, Number> series : partition.getSeries()) {
            for (XYChart.Data<Number, Number> d : series.getData()) {
                if (dataOpacity >= 0 && dataOpacity < 1.0) {
                    d.getNode().setStyle(d.getNode().getStyle() + "-fx-opacity:0.3;");
                }
                if (this.add_tooltip) {
                    final Tooltip tooltip = new Tooltip();
                    tooltip.setText(String.format("%s (%f / %f)", String.valueOf(d.getExtraValue()), d.getXValue().doubleValue(), d.getYValue().doubleValue()));
                    Tooltip.install(d.getNode(), tooltip);
                }
            }
        }
    }
    return 0;
}
Also used : BorderPane(javafx.scene.layout.BorderPane) NumberAxis(javafx.scene.chart.NumberAxis) ScatterChart(javafx.scene.chart.ScatterChart) VariantContext(htsjdk.variant.variantcontext.VariantContext) MenuBar(javafx.scene.control.MenuBar) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Menu(javafx.scene.control.Menu) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Tooltip(javafx.scene.control.Tooltip) Rectangle2D(javafx.geometry.Rectangle2D) Genotype(htsjdk.variant.variantcontext.Genotype) MenuItem(javafx.scene.control.MenuItem) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) IOException(java.io.IOException) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) Scene(javafx.scene.Scene) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) XYChart(javafx.scene.chart.XYChart) VBox(javafx.scene.layout.VBox)

Example 53 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfBurdenFilterExac method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator vcfIterator, final VariantContextWriter delegate) {
    final VcfIterator in = VCFUtils.createAssertSortedVcfIterator(vcfIterator, VCFUtils.createTidPosComparator(vcfIterator.getHeader().getSequenceDictionary()));
    final VariantContextWriter out = this.component.open(delegate);
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
    out.writeHeader(in.getHeader());
    while (in.hasNext()) {
        out.add(progess.watch(in.next()));
    }
    out.close();
    progess.finish();
    CloserUtil.close(in);
    return 0;
}
Also used : EqualRangeVcfIterator(com.github.lindenb.jvarkit.tools.vcfcmp.EqualRangeVcfIterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) DelegateVariantContextWriter(com.github.lindenb.jvarkit.util.vcf.DelegateVariantContextWriter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Example 54 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfBurdenGoEnrichment method doWork.

@Override
public int doWork(final List<String> args) {
    if (StringUtil.isBlank(this.readingGo.goUri)) {
        LOG.error("Undefined GOs uri.");
        return -1;
    }
    if (this.geneFile == null || !this.geneFile.exists()) {
        LOG.error("Undefined gene file option.");
        return -1;
    }
    try {
        final GoTree gotree = this.readingGo.createParser().setIgnoreDbXRef(true).parse(this.readingGo.goUri);
        List<GoTree.Term> terms = new ArrayList<>(gotree.getTerms());
        final Map<GoTree.Term, Node> term2node = new HashMap<>();
        // build the node TREE
        while (!terms.isEmpty()) {
            int i = 0;
            while (i < terms.size()) {
                final GoTree.Term t = terms.get(i);
                if (!t.hasRelations()) {
                    term2node.put(t, new Node(t));
                    terms.remove(i);
                } else if (t.getRelations().stream().allMatch(L -> term2node.containsKey(L.getTo()))) {
                    final Node n = new Node(t);
                    n.parents.addAll(t.getRelations().stream().map(L -> term2node.get(L.getTo())).collect(Collectors.toSet()));
                    term2node.put(t, n);
                    terms.remove(i);
                } else {
                    i++;
                }
            }
        }
        terms = null;
        final Set<String> unknownAcn = new HashSet<>();
        final Map<String, Set<Node>> gene2node = new HashMap<>();
        final BufferedReader r = IOUtils.openFileForBufferedReading(this.geneFile);
        String line;
        while ((line = r.readLine()) != null) {
            if (line.isEmpty() || line.startsWith("#"))
                continue;
            final int t = line.indexOf('\t');
            if (t == -1) {
                r.close();
                LOG.error("tab missing in " + line + " of " + this.geneFile);
                return -1;
            }
            final String gene = line.substring(0, t).trim();
            if (StringUtil.isBlank(gene)) {
                r.close();
                LOG.error("Emtpy gene in " + line);
                return -1;
            }
            // using getTermByName because found sysnonym in GOA
            final String termAcn = line.substring(t + 1).trim();
            if (unknownAcn.contains(termAcn))
                continue;
            final GoTree.Term term = gotree.getTermByName(termAcn);
            if (term == null && !unknownAcn.contains(termAcn)) {
                unknownAcn.add(termAcn);
                LOG.warning("Don't know this GO term in " + line + " of " + this.geneFile + ". Could be obsolete, synonym, go specific division. Skipping.");
                continue;
            }
            final Node node = term2node.get(term);
            if (node == null) {
                r.close();
                LOG.error("Don't know this node in " + line + " of " + this.geneFile);
                return -1;
            }
            Set<Node> nodes = gene2node.get(gene);
            if (nodes == null) {
                nodes = new HashSet<>();
                gene2node.put(gene, nodes);
            }
            node.numGenes++;
            nodes.add(node);
        }
        ;
        // clean up
        unknownAcn.clear();
        r.close();
        final VcfIterator iter = openVcfIterator(oneFileOrNull(args));
        final VCFHeader header = iter.getHeader();
        final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
        final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
        final Set<Pedigree.Person> persons;
        if (this.pedFile != null) {
            final Pedigree pedigree = Pedigree.newParser().parse(this.pedFile);
            persons = new Pedigree.CaseControlExtractor().extract(header, pedigree);
        } else {
            persons = new Pedigree.CaseControlExtractor().extract(header);
        }
        final Set<Pedigree.Person> affected = persons.stream().filter(P -> P.isAffected()).collect(Collectors.toSet());
        final Set<Pedigree.Person> unaffected = persons.stream().filter(P -> P.isUnaffected()).collect(Collectors.toSet());
        if (affected.isEmpty()) {
            LOG.error("No Affected individual");
            return -1;
        }
        if (unaffected.isEmpty()) {
            LOG.error("No unaffected individual");
            return -1;
        }
        final List<String> lookColumns = Arrays.asList("CCDS", "Feature", "ENSP", "Gene", "HGNC", "HGNC_ID", "SYMBOL", "RefSeq");
        final Predicate<Genotype> isWildGenotype = G -> {
            if (G == null)
                return false;
            return G.isHomRef();
        };
        final Predicate<Genotype> isAltGenotype = G -> {
            if (G == null)
                return false;
            return G.isCalled() && !G.isHomRef();
        };
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            if (!this.variantFilter.test(ctx))
                continue;
            final Set<String> genes = new HashSet<>();
            for (final String predStr : ctx.getAttributeAsList(vepParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final VepPredictionParser.VepPrediction pred = vepParser.parseOnePrediction(ctx, predStr);
                for (final String col : lookColumns) {
                    final String token = pred.getByCol(col);
                    if (!StringUtil.isBlank(token)) {
                        genes.add(token);
                    }
                }
            }
            for (final String predStr : ctx.getAttributeAsList(annParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final AnnPredictionParser.AnnPrediction pred = annParser.parseOnePrediction(predStr);
                final String token = pred.getGeneName();
                if (!StringUtil.isBlank(token)) {
                    genes.add(token);
                }
            }
            if (genes.isEmpty())
                continue;
            final Set<Node> nodes = genes.stream().filter(G -> gene2node.containsKey(G)).flatMap(G -> gene2node.get(G).stream()).collect(Collectors.toSet());
            if (nodes.isEmpty())
                continue;
            final long unaffected_alt = unaffected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isAltGenotype).count();
            final long affected_alt = affected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isAltGenotype).count();
            /* no informative */
            if (unaffected_alt + affected_alt == 0L) {
                continue;
            }
            final long affected_ref = affected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isWildGenotype).count();
            final long unaffected_ref = unaffected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isWildGenotype).count();
            nodes.stream().forEach(N -> N.resetVisitedFlag());
            nodes.stream().forEach(N -> N.visit(unaffected_ref, unaffected_alt, affected_ref, affected_alt));
        }
        iter.close();
        progress.finish();
        LOG.info("Calculating Fisher and dumping.. please wait");
        final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        pw.println("#go_term\tfisher\tname\tgo_term_depth\tcount_genes_in_this_node" + "\tunaffected_ref_gt" + "\tunaffected_alt_gt" + "\taffected_ref_gt" + "\taffected_alt_gt");
        term2node.values().stream().filter(N -> this.show_never_seeen_term || N.sum() > 0L).sorted((n1, n2) -> Double.compare(n1.fisher(), n2.fisher())).forEach(N -> {
            pw.print(N.goTerm.getAcn());
            pw.print('\t');
            pw.print(N.fisher());
            pw.print("\t");
            pw.print(N.goTerm.getName().replaceAll("[ \',\\-]+", "_"));
            pw.print("\t");
            pw.print(N.goTerm.getMinDepth());
            pw.print('\t');
            pw.print(N.numGenes);
            pw.print('\t');
            pw.print(N.unaffected_ref);
            pw.print('\t');
            pw.print(N.unaffected_alt);
            pw.print('\t');
            pw.print(N.affected_ref);
            pw.print('\t');
            pw.print(N.affected_alt);
            pw.println();
        });
        pw.flush();
        pw.close();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Arrays(java.util.Arrays) JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) StringUtil(htsjdk.samtools.util.StringUtil) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) PrintWriter(java.io.PrintWriter) JexlGenotypePredicate(com.github.lindenb.jvarkit.util.vcf.JexlGenotypePredicate) GoTree(com.github.lindenb.jvarkit.util.go.GoTree) Predicate(java.util.function.Predicate) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) PrintWriter(java.io.PrintWriter) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Genotype(htsjdk.variant.variantcontext.Genotype) GoTree(com.github.lindenb.jvarkit.util.go.GoTree) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) BufferedReader(java.io.BufferedReader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 55 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfBurdenRscriptV method doWork.

@Override
public int doWork(final List<String> args) {
    /* 
		 mail matile April 11: requires calling R for SKAT: needs
		 matrix of samples and genotypes */
    PrintWriter pw = null;
    VcfIterator in = null;
    LineIterator lr = null;
    long vcf_id = System.currentTimeMillis();
    try {
        String inputName = oneFileOrNull(args);
        lr = (inputName == null ? IOUtils.openStreamForLineIterator(stdin()) : IOUtils.openURIForLineIterator(inputName));
        pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        pw.println("# This is the header of the generated R script. ");
        pw.println("# generated by " + getProgramName());
        pw.println("# version " + getGitHash());
        if (this.userDefinedFunName != null && !this.userDefinedFunName.trim().isEmpty()) {
            pw.println("# user defined function " + userDefinedFunName + " should have been defined BEFORE this header. ");
            pw.println("#  something like 'cat user_function.R this_file.R |  R --no-save > result.txt'");
        }
        if (!lr.hasNext()) {
            LOG.warn("No input found. Couldn't read any VCF header file");
        }
        while (lr.hasNext()) {
            vcf_id++;
            in = VCFUtils.createVcfIteratorFromLineIterator(lr, true);
            final VCFHeader header = in.getHeader();
            final Set<Pedigree.Person> samples = new TreeSet<>(super.getCasesControlsInPedigree(header));
            final List<Variant> variants = new ArrayList<>();
            boolean first = true;
            pw.println("# BEGIN  VCF ##########################################");
            pw.println("# Title ");
            final VCFHeaderLine vcfTitle = (StringUtil.isBlank(this.titleHeaderStr) ? null : header.getOtherHeaderLine(this.titleHeaderStr.trim()));
            if (vcfTitle == null) {
                LOG.warn("No title was found");
                pw.println("# [WARNING] No title was found. ");
                pw.println("vcf.title <- \"vcf" + String.format("%04d", vcf_id) + "\"");
            } else {
                pw.println("vcf.title <- \"" + vcfTitle.getValue() + "\"");
            }
            first = true;
            pw.println("# samples ( 0: unaffected 1:affected)");
            pw.print("population <- data.frame(family=c(");
            first = true;
            for (final Pedigree.Person person : samples) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + person.getFamily().getId() + "\"");
                first = false;
            }
            pw.print("),name=c(");
            first = true;
            for (final Pedigree.Person person : samples) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + person.getId() + "\"");
                first = false;
            }
            pw.print("),status=c(");
            first = true;
            for (final Pedigree.Person person : samples) {
                if (!first)
                    pw.print(",");
                pw.print(person.isUnaffected() ? 0 : 1);
                first = false;
            }
            pw.println("))");
            first = true;
            pw.println();
            pw.println("# genotypes as a list. Should be a multiple of length(samples).");
            pw.println("# 0 is homref (0/0), 1 is het (0/1), 2 is homvar (1/1)");
            pw.println("# if the variant contains another ALT allele: (0/2) and (2/2) are considered 0 (homref)");
            pw.print("genotypes <- c(");
            final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header).logger(LOG);
            while (in.hasNext()) {
                final VariantContext ctx = progess.watch(in.next());
                if (ctx.isFiltered() && !this.acceptFiltered)
                    continue;
                final int n_alts = ctx.getAlternateAlleles().size();
                if (n_alts == 0) {
                    LOG.warn("ignoring variant without ALT allele.");
                    continue;
                }
                if (n_alts > 1) {
                    LOG.warn("variant with more than one ALT. Using getAltAlleleWithHighestAlleleCount.");
                }
                final Allele observed_alt = ctx.getAltAlleleWithHighestAlleleCount();
                final MafCalculator mafCalculator = new MafCalculator(observed_alt, ctx.getContig());
                for (final Pedigree.Person person : samples) {
                    final Genotype genotype = ctx.getGenotype(person.getId());
                    if (genotype == null) {
                        pw.close();
                        pw = null;
                        in.close();
                        throw new IllegalStateException("Cannot get genotype for " + person.getId());
                    }
                    mafCalculator.add(genotype, person.isMale());
                    if (!first)
                        pw.print(",");
                    if (genotype.isHomRef()) {
                        pw.print('0');
                    } else if (genotype.isHomVar() && genotype.getAlleles().contains(observed_alt)) {
                        pw.print('2');
                    } else if (genotype.isHet() && genotype.getAlleles().contains(observed_alt) && genotype.getAlleles().contains(ctx.getReference())) {
                        pw.print('1');
                    } else /* we treat 0/2 has hom-ref */
                    if (genotype.isHet() && !genotype.getAlleles().contains(observed_alt) && genotype.getAlleles().contains(ctx.getReference())) {
                        LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + observed_alt);
                        pw.print('0');
                    } else /* we treat 2/2 has hom-ref */
                    if (genotype.isHomVar() && !genotype.getAlleles().contains(observed_alt)) {
                        LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + observed_alt);
                        pw.print('0');
                    } else {
                        pw.print(this.nocalliszero ? "0" : "-9");
                    }
                    first = false;
                }
                final Variant variant = new Variant();
                variant.contig = ctx.getContig();
                variant.start = ctx.getStart();
                variant.end = ctx.getEnd();
                variant.ref = ctx.getReference();
                variant.alt = observed_alt;
                if (!mafCalculator.isEmpty()) {
                    variant.maf = mafCalculator.getMaf();
                } else {
                    variant.maf = null;
                }
                variants.add(variant);
            }
            // end reading vcf
            progess.finish();
            in.close();
            pw.println(")");
            first = true;
            pw.println("# variants. CONTIG/START/END/REF/ALT/MAF");
            pw.print("variants <- data.frame(chrom=c(");
            first = true;
            for (final Variant v : variants) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + v.contig + "\"");
                first = false;
            }
            pw.print("),chromStart=c(");
            first = true;
            for (final Variant v : variants) {
                if (!first)
                    pw.print(",");
                pw.print(v.start);
                first = false;
            }
            pw.print("),chromEnd=c(");
            first = true;
            for (final Variant v : variants) {
                if (!first)
                    pw.print(",");
                pw.print(v.end);
                first = false;
            }
            pw.print("),refAllele=c(");
            first = true;
            for (final Variant v : variants) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + v.ref.getDisplayString() + "\"");
                first = false;
            }
            pw.print("),altAllele=c(");
            first = true;
            for (final Variant v : variants) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + v.alt.getDisplayString() + "\"");
                first = false;
            }
            pw.print("),maf=c(");
            first = true;
            for (final Variant v : variants) {
                if (!first)
                    pw.print(",");
                pw.print(v.maf == null ? "NA" : String.valueOf(v.maf));
                first = false;
            }
            pw.println("))");
            if (!variants.isEmpty()) {
                pw.println("# assert sizes");
                pw.println("stopifnot( length(genotypes) %% NROW(population) == 0 )");
                pw.println("stopifnot(NROW(variants) * NROW(population) == length(genotypes) )");
                if (this.userDefinedFunName == null || this.userDefinedFunName.trim().isEmpty()) {
                    pw.println("## WARNING not user-defined R function was defined");
                } else {
                    pw.println("# consumme data with user-defined R function ");
                    pw.println(this.userDefinedFunName + "()");
                }
            } else {
                LOG.warn("No Variant found");
            }
            pw.println("# END VCF ##########################################");
        }
        pw.flush();
        if (pw.checkError()) {
            LOG.error(this.getProgramName() + " : pw.checkError(): I/O error ###### ");
            return -1;
        }
        pw.close();
        pw = null;
        LOG.info("done");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(pw);
        CloserUtil.close(in);
        CloserUtil.close(lr);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) LineIterator(htsjdk.tribble.readers.LineIterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) TreeSet(java.util.TreeSet) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter)

Aggregations

VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)55 VariantContext (htsjdk.variant.variantcontext.VariantContext)39 VCFHeader (htsjdk.variant.vcf.VCFHeader)35 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)30 ArrayList (java.util.ArrayList)28 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)26 IOException (java.io.IOException)24 File (java.io.File)22 HashSet (java.util.HashSet)19 List (java.util.List)19 Genotype (htsjdk.variant.variantcontext.Genotype)18 Parameter (com.beust.jcommander.Parameter)17 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)17 Program (com.github.lindenb.jvarkit.util.jcommander.Program)17 Logger (com.github.lindenb.jvarkit.util.log.Logger)17 Set (java.util.Set)17 Allele (htsjdk.variant.variantcontext.Allele)16 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)15 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)14