Examples with VcfIterator - com.github.lindenb.jvarkit.util.vcf.VcfIterator

Example 26 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfSkatSlidingWindow method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.nJobs < 1) {
        this.nJobs = Math.max(1, Runtime.getRuntime().availableProcessors());
        LOG.info("setting njobs to " + this.nJobs);
    }
    VcfIterator r = null;
    try {
        final VCFHeader header;
        final SAMSequenceDictionary dict;
        final File vcfFile = new File(oneAndOnlyOneFile(args));
        try (final VCFFileReader vr = new VCFFileReader(vcfFile, true)) {
            header = vr.getFileHeader();
            dict = header.getSequenceDictionary();
        }
        if (dict == null || dict.isEmpty()) {
            throw new JvarkitException.VcfDictionaryMissing(vcfFile);
        }
        if (!this.limit_contigs.isEmpty()) {
            if (this.limit_contigs.stream().anyMatch(C -> dict.getSequence(C) == null)) {
                LOG.error("user contig missing in vcf dictionary.");
                return -1;
            }
        }
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            pedigree = new Pedigree.Parser().parse(header);
        }
        final Set<Pedigree.Person> samples = new HashSet<>(pedigree.getPersons());
        samples.removeIf(I -> !(I.isAffected() || I.isUnaffected()) || !header.getSampleNamesInOrder().contains(I.getId()));
        this.writer = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        final Consumer<SkatCallerResult> writeResult = (R) -> {
            synchronized (this.writer) {
                this.writer.println(R.toString());
            }
        };
        for (final SAMSequenceRecord ssr : dict.getSequences()) {
            if (!this.limit_contigs.isEmpty() && !this.limit_contigs.contains(ssr.getSequenceName())) {
                LOG.warning("skipping contig " + ssr.getSequenceName());
                continue;
            }
            LOG.info("contig " + ssr.getSequenceName());
            final ExecutorService executorService = new ThreadPoolExecutor(this.nJobs, this.nJobs, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingDeque<>(this.nJobs));
            final List<Future<Integer>> results = new ArrayList<>(this.nJobs);
            for (int i = 0; i < this.nJobs; i++) {
                final int windowLen = Math.max(1, ssr.getSequenceLength() / this.nJobs);
                final SkatWorker caller = new SkatWorker(vcfFile, new Interval(ssr.getSequenceName(), i * windowLen, Math.min(ssr.getSequenceLength(), (i + 1) * windowLen)), samples, this.skat.build(), writeResult);
                results.add(executorService.submit(caller));
            }
            executorService.shutdown();
            executorService.awaitTermination(365, TimeUnit.DAYS);
            for (final Future<Integer> fl : results) {
                try {
                    if (fl.get() != 0) {
                        LOG.error("An error occured");
                        return -1;
                    }
                } catch (final Exception err) {
                    LOG.error(err);
                    return -1;
                }
            }
        }
        this.writer.flush();
        this.writer.close();
        this.writer = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(this.writer);
    }
}

Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) Callable(java.util.concurrent.Callable) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) Future(java.util.concurrent.Future) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) ExecutorService(java.util.concurrent.ExecutorService) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) List(java.util.List) LinkedBlockingDeque(java.util.concurrent.LinkedBlockingDeque) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 27 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfToHilbert method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.imgOut == null) {
        LOG.error("output image file not defined");
        return -1;
    }
    if (this.imageWidth < 1) {
        LOG.error("Bad image size:" + this.imageWidth);
        return -1;
    }
    VcfIterator iter = null;
    try {
        iter = this.openVcfIterator(oneFileOrNull(args));
        final VCFHeader header = iter.getHeader();
        this.dict = header.getSequenceDictionary();
        if (this.dict == null) {
            throw new JvarkitException.FastaDictionaryMissing("no dict in input");
        }
        final List<String> samples = header.getSampleNamesInOrder();
        if (samples.isEmpty()) {
            throw new JvarkitException.SampleMissing("no.sample.in.vcf");
        }
        LOG.info("N-Samples:" + samples.size());
        double marginWidth = (this.imageWidth - 2) * 0.05;
        this.sampleWidth = ((this.imageWidth - 2) - marginWidth) / samples.size();
        LOG.info("sample Width:" + sampleWidth);
        BufferedImage img = new BufferedImage(this.imageWidth, this.imageWidth, BufferedImage.TYPE_INT_RGB);
        this.g = (Graphics2D) img.getGraphics();
        this.g.setColor(Color.WHITE);
        this.g.fillRect(0, 0, imageWidth, imageWidth);
        g.setColor(Color.BLACK);
        final Hershey hershey = new Hershey();
        EvalCurve evalCurve = new EvalCurve();
        evalCurve.run();
        this.genomicSizePerCurveUnit = ((double) dict.getReferenceLength() / (double) (evalCurve.count));
        if (this.genomicSizePerCurveUnit < 1)
            this.genomicSizePerCurveUnit = 1;
        LOG.info("genomicSizePerCurveUnit:" + genomicSizePerCurveUnit);
        for (int x = 0; x < samples.size(); ++x) {
            String samplex = samples.get(x);
            double labelHeight = marginWidth;
            if (labelHeight > 50)
                labelHeight = 50;
            g.setColor(Color.BLACK);
            hershey.paint(g, samplex, marginWidth + x * sampleWidth, marginWidth - labelHeight, sampleWidth * 0.9, labelHeight * 0.9);
            AffineTransform old = g.getTransform();
            AffineTransform tr = AffineTransform.getTranslateInstance(marginWidth, marginWidth + x * sampleWidth);
            tr.rotate(Math.PI / 2);
            g.setTransform(tr);
            hershey.paint(g, samplex, 0.0, 0.0, sampleWidth * 0.9, labelHeight * 0.9);
            // g.drawString(this.tabixFile.getFile().getName(),0,0);
            g.setTransform(old);
            double tx = marginWidth + x * sampleWidth;
            for (int y = 0; y < samples.size(); ++y) {
                double ty = marginWidth + y * sampleWidth;
                g.translate(tx, ty);
                g.setColor(Color.BLUE);
                g.draw(new Rectangle2D.Double(0, 0, sampleWidth, sampleWidth));
                // paint each chromosome
                paintReference();
                g.translate(-tx, -ty);
            }
        }
        LOG.info("genomicSizePerCurveUnit:" + (long) genomicSizePerCurveUnit * evalCurve.count + " " + dict.getReferenceLength() + " count=" + evalCurve.count);
        LOG.info("Scanning variants");
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dict);
        while (iter.hasNext()) {
            final VariantContext var = progress.watch(iter.next());
            for (int x = 0; x < samples.size(); ++x) {
                final String samplex = samples.get(x);
                final Genotype gx = var.getGenotype(samplex);
                if (!gx.isCalled())
                    continue;
                double tx = marginWidth + x * sampleWidth;
                for (int y = 0; y < samples.size(); ++y) {
                    final String sampley = samples.get(y);
                    final Genotype gy = var.getGenotype(sampley);
                    if (!gy.isCalled())
                        continue;
                    if (gx.isHomRef() && gy.isHomRef())
                        continue;
                    double ty = marginWidth + y * sampleWidth;
                    g.translate(tx, ty);
                    final PaintVariant paint = new PaintVariant(var, x, y);
                    paint.run();
                    g.translate(-tx, -ty);
                }
            }
        }
        progress.finish();
        this.g.dispose();
        // save file
        LOG.info("saving " + imgOut);
        if (imgOut != null) {
            ImageIO.write(img, imgOut.getName().toLowerCase().endsWith(".png") ? "PNG" : "JPG", imgOut);
        } else {
            ImageIO.write(img, "PNG", stdout());
        }
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
    }
}

Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Hershey(com.github.lindenb.jvarkit.util.Hershey) Rectangle2D(java.awt.geom.Rectangle2D) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) BufferedImage(java.awt.image.BufferedImage) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) AffineTransform(java.awt.geom.AffineTransform) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 28 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfMultiToOne method doWork.

@Override
public int doWork(final List<String> arguments) {
    VariantContextWriter out = null;
    Set<String> args = IOUtils.unrollFiles(arguments);
    List<VcfIterator> inputs = new ArrayList<>(args.size() + 1);
    List<String> inputFiles = new ArrayList<>(args.size() + 1);
    try {
        if (args.isEmpty() && arguments.isEmpty()) {
            inputs.add(VCFUtils.createVcfIteratorStdin());
            inputFiles.add("stdin");
        } else if (args.isEmpty()) {
            LOG.error("No vcf provided");
            return -1;
        } else {
            for (final String vcfFile : args) {
                inputs.add(VCFUtils.createVcfIterator(vcfFile));
                inputFiles.add(VCFUtils.escapeInfoField(vcfFile));
            }
        }
        SAMSequenceDictionary dict = null;
        final Set<String> sampleNames = new HashSet<String>();
        final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        for (final VcfIterator in : inputs) {
            final VCFHeader header = in.getHeader();
            if (dict == null) {
                dict = header.getSequenceDictionary();
            } else if (header.getSequenceDictionary() == null) {
                LOG.error("No Dictionary in vcf");
                return -1;
            } else if (!SequenceUtil.areSequenceDictionariesEqual(dict, header.getSequenceDictionary())) {
                LOG.error("Not the same dictionary between vcfs");
                return -1;
            }
            metaData.addAll(in.getHeader().getMetaDataInInputOrder());
            sampleNames.addAll(in.getHeader().getSampleNamesInOrder());
        }
        final Comparator<VariantContext> comparator = (dict == null ? VCFUtils.createChromPosRefComparator() : VCFUtils.createTidPosRefComparator(dict));
        // addMetaData(metaData);
        metaData.add(new VCFInfoHeaderLine(DEFAULT_SAMPLE_TAGID, 1, VCFHeaderLineType.String, "Sample Name from multi-sample vcf"));
        metaData.add(new VCFInfoHeaderLine(DEFAULT_SAMPLE_FILETAGID, 1, VCFHeaderLineType.String, "Origin of sample"));
        for (final String sample : sampleNames) {
            metaData.add(new VCFHeaderLine(SAMPLE_HEADER_DECLARATION, sample));
        }
        final VCFHeader h2 = new VCFHeader(metaData, Collections.singleton(DEFAULT_VCF_SAMPLE_NAME));
        recalculator.setHeader(h2);
        out = super.openVariantContextWriter(this.outputFile);
        out.writeHeader(h2);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        for (; ; ) {
            if (out.checkError())
                break;
            /* get 'smallest' variant */
            VariantContext smallest = null;
            int idx = 0;
            int best_idx = -1;
            while (idx < inputs.size()) {
                final VcfIterator in = inputs.get(idx);
                if (!in.hasNext()) {
                    CloserUtil.close(in);
                    inputs.remove(idx);
                    inputFiles.remove(idx);
                } else {
                    final VariantContext ctx = in.peek();
                    if (smallest == null || comparator.compare(smallest, ctx) > 0) {
                        smallest = ctx;
                        best_idx = idx;
                    }
                    ++idx;
                }
            }
            if (smallest == null)
                break;
            final VariantContext ctx = progress.watch(inputs.get(best_idx).next());
            if (ctx.getNSamples() == 0) {
                if (!this.discard_no_call) {
                    final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                    vcb.attribute(DEFAULT_SAMPLE_FILETAGID, inputFiles.get(best_idx));
                    vcb.genotypes(GenotypeBuilder.createMissing(DEFAULT_VCF_SAMPLE_NAME, 2));
                    out.add(this.recalculator.apply(vcb.make()));
                }
                continue;
            }
            for (int i = 0; i < ctx.getNSamples(); ++i) {
                final Genotype g = ctx.getGenotype(i);
                final String sample = g.getSampleName();
                if (!g.isCalled() && this.discard_no_call)
                    continue;
                if (!g.isAvailable() && this.discard_non_available)
                    continue;
                if (g.isHomRef() && this.discard_hom_ref)
                    continue;
                final GenotypeBuilder gb = new GenotypeBuilder(g);
                gb.name(DEFAULT_VCF_SAMPLE_NAME);
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                vcb.attribute(DEFAULT_SAMPLE_TAGID, sample);
                vcb.attribute(DEFAULT_SAMPLE_FILETAGID, inputFiles.get(best_idx));
                vcb.genotypes(gb.make());
                out.add(this.recalculator.apply(vcb.make()));
            }
        }
        progress.finish();
        LOG.debug("done");
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(inputs);
        CloserUtil.close(out);
    }
}

Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 29 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfBurden method doWork2.

private int doWork2(List<String> args) {
    ZipOutputStream zout = null;
    FileOutputStream fout = null;
    VcfIterator in = null;
    try {
        if (args.isEmpty()) {
            LOG.info("reading from stdin.");
            in = VCFUtils.createVcfIteratorStdin();
        } else if (args.size() == 1) {
            String filename = args.get(0);
            LOG.info("reading from " + filename);
            in = VCFUtils.createVcfIterator(filename);
        } else {
            LOG.error("Illegal number of arguments.");
            return -1;
        }
        if (outputFile == null) {
            LOG.error("undefined output");
            return -1;
        } else if (!outputFile.getName().endsWith(".zip")) {
            LOG.error("output " + outputFile + " should end with .zip");
            return -1;
        } else {
            fout = new FileOutputStream(outputFile);
            zout = new ZipOutputStream(fout);
        }
        final List<String> samples = in.getHeader().getSampleNamesInOrder();
        final VCFHeader header = in.getHeader();
        String prev_chrom = null;
        final VepPredictionParser vepPredParser = new VepPredictionParserFactory().header(header).get();
        final Map<GeneTranscript, List<VariantAndCsq>> gene2variants = new HashMap<>();
        final SequenceOntologyTree soTree = SequenceOntologyTree.getInstance();
        final Set<SequenceOntologyTree.Term> acn = new HashSet<>();
        /* mail solena  *SO en remplacement des SO actuels (VEP HIGH + MODERATE) - pas la peine de faire retourner les analyses mais servira pour les futures analyses burden */
        String[] acn_list = new String[] { "SO:0001893", "SO:0001574", "SO:0001575", "SO:0001587", "SO:0001589", "SO:0001578", "SO:0002012", "SO:0001889", "SO:0001821", "SO:0001822", "SO:0001583", "SO:0001818" /*
					"SO:0001589", "SO:0001587", "SO:0001582", "SO:0001583",
					"SO:0001575", "SO:0001578", "SO:0001574", "SO:0001889",
					"SO:0001821", "SO:0001822", "SO:0001893"*/
        };
        if (this.highdamage) {
            acn_list = new String[] { "SO:0001893", "SO:0001574", "SO:0001575", "SO:0001587", "SO:0001589", "SO:0001578", "SO:0002012", "SO:0001889" };
        }
        for (final String acns : acn_list) {
            final SequenceOntologyTree.Term tacn = soTree.getTermByAcn(acns);
            if (tacn == null) {
                in.close();
                throw new NullPointerException("tacn == null pour " + acns);
            }
            acn.addAll(tacn.getAllDescendants());
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
        for (; ; ) {
            VariantContext ctx1 = null;
            if (in.hasNext()) {
                ctx1 = progress.watch(in.next());
                if (ctx1.getAlternateAlleles().size() != 1) {
                    // info("count(ALT)!=1 in "+ctx1.getChr()+":"+ctx1.getStart());
                    continue;
                }
                if (ctx1.getAlternateAlleles().get(0).equals(Allele.SPAN_DEL)) {
                    continue;
                }
            }
            if (ctx1 == null || !ctx1.getContig().equals(prev_chrom)) {
                LOG.info("DUMP to zip n=" + gene2variants.size());
                final Set<String> geneNames = new HashSet<>();
                for (GeneTranscript gene_transcript : gene2variants.keySet()) {
                    geneNames.add(gene_transcript.geneName);
                    final Set<VariantAndCsq> uniq = new TreeSet<>(this.variantAndCsqComparator);
                    uniq.addAll(gene2variants.get(gene_transcript));
                    dumpVariants(zout, prev_chrom, gene_transcript.geneName + "_" + gene_transcript.transcriptName, samples, new ArrayList<VariantAndCsq>(uniq));
                    LOG.info("dumped" + gene_transcript.geneName);
                }
                LOG.info("loop over geneName");
                for (final String geneName : geneNames) {
                    final SortedSet<VariantAndCsq> lis_nm = new TreeSet<>(this.variantAndCsqComparator);
                    final SortedSet<VariantAndCsq> lis_all = new TreeSet<>(this.variantAndCsqComparator);
                    final SortedSet<VariantAndCsq> lis_refseq = new TreeSet<>(this.variantAndCsqComparator);
                    final SortedSet<VariantAndCsq> lis_enst = new TreeSet<>(this.variantAndCsqComparator);
                    LOG.info("loop over gene2variants");
                    for (final GeneTranscript gene_transcript : gene2variants.keySet()) {
                        if (!geneName.equals(gene_transcript.geneName))
                            continue;
                        lis_all.addAll(gene2variants.get(gene_transcript));
                        if (gene_transcript.transcriptName.startsWith("NM_")) {
                            lis_nm.addAll(gene2variants.get(gene_transcript));
                        }
                        if (!gene_transcript.transcriptName.startsWith("ENST")) {
                            lis_refseq.addAll(gene2variants.get(gene_transcript));
                        }
                        if (gene_transcript.transcriptName.startsWith("ENST")) {
                            lis_enst.addAll(gene2variants.get(gene_transcript));
                        }
                    }
                    LOG.info("dump_ALL_TRANSCRIPTS");
                    dumpVariants(zout, prev_chrom, geneName + "_ALL_TRANSCRIPTS", samples, new ArrayList<VariantAndCsq>(lis_all));
                    LOG.info("dump_ALL_NM");
                    dumpVariants(zout, prev_chrom, geneName + "_ALL_NM", samples, new ArrayList<VariantAndCsq>(lis_nm));
                    LOG.info("dump_ALL_REFSEQ");
                    dumpVariants(zout, prev_chrom, geneName + "_ALL_REFSEQ", samples, new ArrayList<VariantAndCsq>(lis_refseq));
                    LOG.info("dump_ALL_ENST");
                    dumpVariants(zout, prev_chrom, geneName + "_ALL_ENST", samples, new ArrayList<VariantAndCsq>(lis_enst));
                }
                if (ctx1 == null)
                    break;
                LOG.info("gene2variants");
                gene2variants.clear();
                LOG.info("System.gc();");
                System.gc();
                prev_chrom = ctx1.getContig();
                LOG.info("prev_chrom=" + prev_chrom);
            }
            final Set<GeneTranscript> seen_names = new HashSet<>();
            for (final VepPredictionParser.VepPrediction pred : vepPredParser.getPredictions(ctx1)) {
                String geneName = pred.getSymbol();
                if (geneName == null || geneName.trim().isEmpty())
                    continue;
                if (this._gene2seen != null) {
                    if (!this._gene2seen.containsKey(geneName))
                        continue;
                }
                final String transcriptName = pred.getFeature();
                if (transcriptName == null || transcriptName.trim().isEmpty()) {
                    LOG.info("No transcript in " + ctx1);
                    continue;
                }
                final GeneTranscript geneTranscript = new GeneTranscript(geneName, transcriptName);
                if (seen_names.contains(geneTranscript))
                    continue;
                boolean ok = false;
                for (SequenceOntologyTree.Term so : pred.getSOTerms()) {
                    if (acn.contains(so)) {
                        ok = true;
                    }
                }
                if (!printSOTerms && !ok)
                    continue;
                List<VariantAndCsq> L = gene2variants.get(geneTranscript);
                if (L == null) {
                    L = new ArrayList<>();
                    gene2variants.put(geneTranscript, L);
                }
                Float vqslod = null;
                if (this.printVQSLOD && ctx1.hasAttribute("VQSLOD")) {
                    vqslod = (float) ctx1.getAttributeAsDouble("VQSLOD", -9999999.0);
                }
                L.add(new VariantAndCsq(ctx1, pred.getSOTerms(), pred.getPositionInCDS(), vqslod));
                seen_names.add(geneTranscript);
                if (this._gene2seen != null) {
                    this._gene2seen.put(geneTranscript.geneName, Boolean.TRUE);
                }
            }
        }
        if (this._gene2seen != null) {
            final List<VariantAndCsq> emptylist = Collections.emptyList();
            for (final String gene : this._gene2seen.keySet()) {
                if (this._gene2seen.get(gene).equals(Boolean.TRUE))
                    continue;
                LOG.warning("Gene not found : " + gene);
                dumpVariants(zout, "UNDEFINED", gene + "_000000000000000.txt", samples, emptylist);
            }
        }
        progress.finish();
        zout.finish();
        fout.flush();
        zout.flush();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(zout);
        CloserUtil.close(fout);
    }
}

Also used : HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) List(java.util.List) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IOException(java.io.IOException) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) ZipOutputStream(java.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 30 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class FindAVariation method scan.

private void scan(final BufferedReader in) throws IOException {
    String line;
    while ((line = in.readLine()) != null) {
        if (line.isEmpty() || line.startsWith("#"))
            continue;
        final File f = new File(line);
        if (!f.isFile())
            continue;
        if (!f.canRead())
            continue;
        if (!VCFUtils.isVcfFile(f))
            continue;
        VcfIterator iter = null;
        if (VCFUtils.isTribbleVcfFile(f)) {
            VCFFileReader r = null;
            try {
                r = new VCFFileReader(f, true);
                final VCFHeader header = r.getFileHeader();
                for (final Mutation m : convertFromVcfHeader(f, header)) {
                    final CloseableIterator<VariantContext> iter2 = r.query(m.chrom, m.pos, m.pos);
                    while (iter2.hasNext()) {
                        final VariantContext ctx = iter2.next();
                        if (this.onlySnp) {
                            if (ctx.getStart() != m.pos || ctx.getEnd() != m.pos)
                                continue;
                        }
                        report(f, header, ctx, m);
                    }
                    CloserUtil.close(iter2);
                }
            } catch (final htsjdk.tribble.TribbleException.InvalidHeader err) {
                LOG.warn(f + "\t" + err.getMessage());
            } catch (final Exception err) {
                LOG.severe("cannot read " + f, err);
            } finally {
                CloserUtil.close(r);
            }
        } else if (VCFUtils.isTabixVcfFile(f)) {
            TabixVcfFileReader r = null;
            try {
                r = new TabixVcfFileReader(f.getPath());
                final VCFHeader header = r.getHeader();
                for (final Mutation m : convertFromVcfHeader(f, header)) {
                    final Iterator<VariantContext> iter2 = r.iterator(m.chrom, m.pos, m.pos);
                    while (iter2.hasNext()) {
                        final VariantContext ctx = iter2.next();
                        if (this.onlySnp) {
                            if (ctx.getStart() != m.pos || ctx.getEnd() != m.pos)
                                continue;
                        }
                        report(f, header, ctx, m);
                    }
                    CloserUtil.close(iter2);
                }
            } catch (final htsjdk.tribble.TribbleException.InvalidHeader err) {
                LOG.warn(f + "\t" + err.getMessage());
            } catch (final Exception err) {
                LOG.severe("cannot read " + f, err);
            } finally {
                CloserUtil.close(r);
            }
        } else if (!this.indexedOnly) {
            try {
                iter = VCFUtils.createVcfIteratorFromFile(f);
                final VCFHeader header = iter.getHeader();
                final Set<Mutation> mutlist = convertFromVcfHeader(f, iter.getHeader());
                while (iter.hasNext()) {
                    final VariantContext ctx = iter.next();
                    final Mutation m = new Mutation(ctx.getContig(), ctx.getStart());
                    for (final Mutation m2 : mutlist) {
                        if (m.equals(m2)) {
                            if (this.onlySnp) {
                                if (ctx.getStart() != m2.pos || ctx.getEnd() != m2.pos)
                                    continue;
                            }
                            report(f, header, ctx, m2);
                            break;
                        }
                    }
                }
            } catch (final htsjdk.tribble.TribbleException.InvalidHeader err) {
                LOG.warn(f + "\t" + err.getMessage());
            } catch (final Exception err) {
                LOG.severe("Error in " + f, err);
            } finally {
                CloserUtil.close(iter);
            }
        }
    }
}

Also used : VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) TabixVcfFileReader(com.github.lindenb.jvarkit.util.vcf.TabixVcfFileReader) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Iterator(java.util.Iterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Aggregations

VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)55 VariantContext (htsjdk.variant.variantcontext.VariantContext)39 VCFHeader (htsjdk.variant.vcf.VCFHeader)35 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)30 ArrayList (java.util.ArrayList)28 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)26 IOException (java.io.IOException)24 File (java.io.File)22 HashSet (java.util.HashSet)19 List (java.util.List)19 Genotype (htsjdk.variant.variantcontext.Genotype)18 Parameter (com.beust.jcommander.Parameter)17 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)17 Program (com.github.lindenb.jvarkit.util.jcommander.Program)17 Logger (com.github.lindenb.jvarkit.util.log.Logger)17 Set (java.util.Set)17 Allele (htsjdk.variant.variantcontext.Allele)16 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)15 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)14