Search in sources :

Example 41 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class ForkVcf method doWork.

@Override
public int doWork(List<String> args) {
    if (this.outputFile == null || !this.outputFile.getName().contains(REPLACE_GROUPID)) {
        LOG.error("Output file pattern undefined or doesn't contain " + REPLACE_GROUPID + " : " + this.outputFile);
        return -1;
    }
    if (!(this.outputFile.getName().endsWith(".vcf") || this.outputFile.getName().endsWith(".vcf.gz"))) {
        LOG.error("output file must end with '.vcf' or '.vcf.gz'");
        return -1;
    }
    if (this.number_of_files <= 0) {
        LOG.error("Bad value for number of files:" + this.number_of_files);
        return -1;
    }
    BufferedReader r = null;
    VcfIterator in = null;
    PrintWriter manifestWriter = null;
    final List<SplitGroup> groups = new ArrayList<>();
    VCFBuffer vcfBuffer = null;
    try {
        in = openVcfIterator(oneFileOrNull(args));
        manifestWriter = (this.manifestFile == null ? new PrintWriter(new NullOuputStream()) : IOUtils.openFileForPrintWriter(this.manifestFile));
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
        if (!this.split_by_chunk) {
            while (groups.size() < this.number_of_files) {
                final SplitGroup sg = new SplitGroup(groups.size() + 1);
                sg.open(in.getHeader());
                manifestWriter.println(sg.getFile().getPath());
                groups.add(sg);
            }
            int idx = 0;
            while (in.hasNext()) {
                final VariantContext ctx = progress.watch(in.next());
                groups.get(idx % this.number_of_files)._writer.add(ctx);
                ++idx;
            }
            in.close();
        } else {
            long count_variants = 0;
            vcfBuffer = new VCFBuffer(this.maxRecordsInRam, this.tmpDir);
            vcfBuffer.writeHeader(in.getHeader());
            while (in.hasNext()) {
                final VariantContext ctx = progress.watch(in.next());
                vcfBuffer.add(ctx);
                ++count_variants;
            }
            in.close();
            final long variant_per_file = Math.max(1L, (long) Math.ceil(count_variants / (double) this.number_of_files));
            LOG.info("done buffering. n=" + count_variants + " now forking " + variant_per_file + " variants for " + this.number_of_files + " files.");
            VcfIterator iter2 = vcfBuffer.iterator();
            long count_ctx = 0L;
            while (iter2.hasNext()) {
                if (groups.isEmpty() || count_ctx >= variant_per_file) {
                    if (!groups.isEmpty())
                        groups.get(groups.size() - 1).close();
                    final SplitGroup last = new SplitGroup(groups.size() + 1);
                    last.open(in.getHeader());
                    manifestWriter.println(last.getFile().getPath());
                    groups.add(last);
                    count_ctx = 0;
                }
                final VariantContext ctx = iter2.next();
                groups.get(groups.size() - 1)._writer.add(ctx);
                count_ctx++;
            }
            iter2.close();
            vcfBuffer.close();
            vcfBuffer.dispose();
            vcfBuffer = null;
            // save remaining empty VCFs
            while (groups.size() < this.number_of_files) {
                LOG.info("creating empty vcf");
                final SplitGroup sg = new SplitGroup(groups.size() + 1);
                sg.open(in.getHeader());
                manifestWriter.println(sg.getFile().getPath());
                sg.close();
                groups.add(sg);
            }
        }
        progress.finish();
        for (final SplitGroup g : groups) {
            g.close();
        }
        manifestWriter.flush();
        manifestWriter.close();
        manifestWriter = null;
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        for (final SplitGroup g : groups) {
            CloserUtil.close(g);
            if (in != null)
                g.getFile().delete();
        }
        return -1;
    } finally {
        if (vcfBuffer != null)
            vcfBuffer.dispose();
        CloserUtil.close(r);
        CloserUtil.close(in);
        IOUtils.flush(manifestWriter);
        CloserUtil.close(manifestWriter);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) BufferedReader(java.io.BufferedReader) VCFBuffer(com.github.lindenb.jvarkit.util.vcf.VCFBuffer) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) PrintWriter(java.io.PrintWriter)

Example 42 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfTreePack method doWork.

@Override
public int doWork(List<String> args) {
    setDimension(this.dimensionStr);
    VcfIterator in = null;
    try {
        parseConfigFile();
        if (super.nodeFactoryChain.next == null) {
            LOG.error("no path defined");
            return -1;
        }
        if (args.isEmpty()) {
            LOG.info("Reading stdin");
            in = VCFUtils.createVcfIteratorFromStream(stdin());
            scan(in);
            CloserUtil.close(in);
        } else {
            for (final String f : args) {
                in = VCFUtils.createVcfIterator(f);
                scan(in);
                CloserUtil.close(in);
            }
        }
        this.layout();
        this.svg(this.outputFile);
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
    }
}
Also used : VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) IOException(java.io.IOException) ScriptException(javax.script.ScriptException)

Example 43 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfToRdf method scanVCF.

private void scanVCF(final File filein) throws IOException {
    VcfIterator in = null;
    URI source = null;
    try {
        if (filein != null)
            source = filein.toURI();
        in = (filein == null ? VCFUtils.createVcfIteratorStdin() : VCFUtils.createVcfIteratorFromFile(filein));
        final VCFHeader header = in.getHeader();
        final VepPredictionParser vepPredictionParser = new VepPredictionParserFactory(header).get();
        writeHeader(header, source);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        while (in.hasNext()) {
            if (this.w.checkError()) {
                LOG.warn("I/O interruption");
                break;
            }
            final VariantContext ctx = progress.watch(in.next());
            /* Variant */
            final URI variant = URI.create("urn:variant/" + ctx.getContig() + ":" + ctx.getStart() + ":" + ctx.getReference().getBaseString());
            emit(variant, "rdf:type", "vcf:Variant", "vcf:chrom", URI.create("urn:chrom/" + ctx.getContig()), "vcf:position", ctx.getStart(), "vcf:ref", ctx.getReference().getBaseString(), "vcf:id", (ctx.hasID() ? ctx.getID() : null), "vcf:qual", (ctx.hasLog10PError() ? ctx.getPhredScaledQual() : null));
            if (this.printAlleles) {
                for (final Allele alt : ctx.getAlternateAlleles()) {
                    emit(variant, "vcf:alt", alt.getBaseString());
                }
            }
            if (this.printFilters) {
                for (final String f : ctx.getFilters()) {
                    emit(variant, "vcf:filter", URI.create("urn:filter/" + f));
                }
            }
            if (this.printVep) {
                for (final VepPrediction prediction : vepPredictionParser.getPredictions(ctx)) {
                /* 
					final List<Object> L=new ArrayList<>();
					L.add("rdf:type");L.add("vep:Prediction");
					L.add("vcf:variant"); L.add(variant);
					L.add("vcf:allele");L.add(prediction.getAllele().getBaseString());
					for(final SequenceOntologyTree.Term term:prediction.getSOTerms())
						{
						L.add("vcf:so");
						L.add(URI.create(term.getUri()));
						}
					if(prediction.getEnsemblTranscript()!=null)
						{
						final  URI transcriptid=URI.create("http://www.ensembl.org/id/"+prediction.getEnsemblTranscript());
						L.add("vep:transcript");
						L.add(transcriptid);

						
						if(prediction.getEnsemblGene()!=null)
							{
							emit(transcriptid,
								"uniprot:transcribedFrom",//used  in uniprot dump
								URI.create("http://www.ensembl.org/id/"+prediction.getEnsemblGene())
								);
							}
						
						if(prediction.getEnsemblProtein()!=null)
							{
							emit(
								transcriptid,
								"uniprot:translatedTo",//used  in uniprot dump
								URI.create("http://www.ensembl.org/id/"+prediction.getEnsemblProtein())
								);
							}
						}
					
					
					
					emit(
						URI.create("urn:vep/"+(++id_generator)),
						L.toArray()
						);
					*/
                }
            }
            if (this.printGenotypes) {
                for (final String sample : ctx.getSampleNames()) {
                    final Genotype g = ctx.getGenotype(sample);
                    final List<Object> L = new ArrayList<>();
                    L.add("vcf:sample");
                    L.add(URI.create("urn:sample/" + sample));
                    L.add("vcf:variant");
                    L.add(variant);
                    L.add("rdf:type");
                    L.add("vcf:Genotype");
                    if (g.hasDP()) {
                        L.add("vcf:dp");
                        L.add(g.getDP());
                    }
                    if (g.hasGQ()) {
                        L.add("vcf:gq");
                        L.add(g.getGQ());
                    }
                    if (g.isCalled()) {
                        if (g.isHet()) {
                            if (g.isHetNonRef()) {
                                L.add("rdf:type");
                                L.add("vcf:HetNonRefGenotype");
                            } else {
                                L.add("rdf:type");
                                L.add("vcf:HetGenotype");
                            }
                        } else if (g.isHom()) {
                            if (g.isHomRef()) {
                                L.add("rdf:type");
                                L.add("vcf:HomRefGenotype");
                            } else {
                                L.add("rdf:type");
                                L.add("vcf:HomVarGenotype");
                            }
                        }
                        for (final Allele a : g.getAlleles()) {
                            L.add("vcf:allele");
                            L.add(a.getBaseString());
                        }
                    }
                    emit(URI.create("urn:gt/" + ctx.getContig() + ":" + ctx.getStart() + ":" + ctx.getReference().getBaseString() + ":" + sample), L.toArray());
                }
            }
        }
        in.close();
        in = null;
        progress.finish();
    } catch (final Exception e) {
        throw new IOException(e);
    } finally {
        CloserUtil.close(in);
    }
}
Also used : VepPrediction(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser.VepPrediction) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) URI(java.net.URI) IOException(java.io.IOException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Allele(htsjdk.variant.variantcontext.Allele) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VCFHeader(htsjdk.variant.vcf.VCFHeader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 44 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfOptimizePedForSkat method doWork.

@Override
public int doWork(final List<String> args) {
    VcfIterator r = null;
    try {
        this.random = new Random(this.seed == -1 ? System.currentTimeMillis() : seed);
        if (this.nSamplesRemove < 1) {
            LOG.error("bad number of samples to remove.");
            return -1;
        }
        final SkatFactory.SkatExecutor executor = this.skatInstance.build();
        r = super.openVcfIterator(oneFileOrNull(args));
        final List<VariantContext> variants = new ArrayList<>();
        while (r.hasNext()) {
            final VariantContext ctx = r.next();
            if (!executor.getUpstreamVariantFilter().test(ctx))
                continue;
            variants.add(ctx);
        }
        LOG.info("number of variants : " + variants.size());
        if (variants.stream().map(V -> V.getContig()).collect(Collectors.toSet()).size() != 1) {
            LOG.error("multiple contig/chromosome in input.");
            return -1;
        }
        final VCFHeader header = r.getHeader();
        final Set<String> sampleNames = new HashSet<>(header.getSampleNamesInOrder());
        if (this.pedigreeFile != null) {
            this.pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            this.pedigree = new Pedigree.Parser().parse(header);
        }
        r.close();
        r = null;
        final List<Pedigree.Person> samples = this.pedigree.getPersons().stream().filter(P -> (P.isAffected() || P.isUnaffected())).filter(S -> sampleNames.contains(S.getId())).collect(Collectors.toList());
        if (samples.isEmpty()) {
            LOG.error("Not enough Samples in pedigree/vcf");
            return -1;
        }
        if (!samples.stream().anyMatch(P -> P.isAffected())) {
            LOG.error("No affected Samples in pedigree/vcf");
            return -1;
        }
        if (!samples.stream().anyMatch(P -> P.isUnaffected())) {
            LOG.error("No unaffected Samples in pedigree/vcf");
            return -1;
        }
        long nIter = 0L;
        while (max_iterations == -1L || nIter < max_iterations) {
            exec(nIter, header, variants, samples, executor);
            ++nIter;
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
    }
}
Also used : Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Random(java.util.Random) TreeSet(java.util.TreeSet) ParametersDelegate(com.beust.jcommander.ParametersDelegate) VcfBurdenFisherH(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFisherH) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringUtil(htsjdk.samtools.util.StringUtil) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) VcfBurdenMAF(com.github.lindenb.jvarkit.tools.burden.VcfBurdenMAF) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Collections(java.util.Collections) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Random(java.util.Random) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 45 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VCFFilterJS method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator r, final VariantContextWriter w) {
    try {
        final VCFHeader header = r.getHeader();
        final VcfTools vcfTools = new VcfTools(header);
        final VCFHeader h2 = new VCFHeader(header);
        addMetaData(h2);
        final Pedigree pedigree;
        if (pedigreeFile != null) {
            pedigree = Pedigree.newParser().parse(this.pedigreeFile);
        } else // try to read from VCF header
        {
            Pedigree p = null;
            try {
                p = Pedigree.newParser().parse(header);
            } catch (final Exception err) {
                LOG.warn("cannot decode pedigree from vcf header");
                p = Pedigree.createEmptyPedigree();
            }
            pedigree = p;
        }
        final VCFFilterHeaderLine filterHeaderLine = (filteredTag.trim().isEmpty() ? null : new VCFFilterHeaderLine(this.filteredTag.trim(), "Filtered with " + getProgramName()));
        if (filterHeaderLine != null)
            h2.addMetaDataLine(filterHeaderLine);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        final Bindings bindings = this.compiledScript.getEngine().createBindings();
        bindings.put("header", header);
        bindings.put("tools", vcfTools);
        bindings.put("pedigree", pedigree);
        bindings.put("individuals", Collections.unmodifiableList(pedigree.getPersons().stream().filter(P -> (P.isAffected() || P.isUnaffected())).filter(P -> P.hasUniqId()).filter(P -> header.getSampleNamesInOrder().contains(P.getId())).collect(Collectors.toList())));
        for (final String jsonkv : this.jsonFiles) {
            int eq = jsonkv.indexOf("=");
            if (eq <= 0)
                throw new JvarkitException.UserError("Bad format for json . expected key=/path/to/file.json but got '" + jsonkv + "'");
            final String key = jsonkv.substring(0, eq);
            final FileReader jsonFile = new FileReader(jsonkv.substring(eq + 1));
            JsonParser jsonParser = new JsonParser();
            final JsonElement root = jsonParser.parse(jsonFile);
            jsonFile.close();
            bindings.put(key, root);
        }
        w.writeHeader(h2);
        while (r.hasNext() && !w.checkError()) {
            final VariantContext variation = progress.watch(r.next());
            bindings.put("variant", variation);
            final Object result = compiledScript.eval(bindings);
            // result is an array of a collection of variants
            if (result != null && (result.getClass().isArray() || (result instanceof Collection))) {
                final Collection<?> col;
                if (result.getClass().isArray()) {
                    final Object[] array = (Object[]) result;
                    col = Arrays.asList(array);
                } else {
                    col = (Collection<?>) result;
                }
                // write all of variants
                for (final Object item : col) {
                    if (item == null)
                        throw new JvarkitException.UserError("item in array is null");
                    if (!(item instanceof VariantContext))
                        throw new JvarkitException.UserError("item in array is not a VariantContext " + item.getClass());
                    w.add(VariantContext.class.cast(item));
                }
            } else // result is a VariantContext
            if (result != null && (result instanceof VariantContext)) {
                w.add(VariantContext.class.cast(result));
            } else {
                boolean accept = true;
                if (result == null) {
                    accept = false;
                } else if (result instanceof Boolean) {
                    if (Boolean.FALSE.equals(result))
                        accept = false;
                } else if (result instanceof Number) {
                    if (((Number) result).intValue() != 1)
                        accept = false;
                } else {
                    LOG.warn("Script returned something that is not a boolean or a number:" + result.getClass());
                    accept = false;
                }
                if (!accept) {
                    if (filterHeaderLine != null) {
                        final VariantContextBuilder vcb = new VariantContextBuilder(variation);
                        vcb.filter(filterHeaderLine.getID());
                        w.add(vcb.make());
                    }
                    continue;
                }
                // set PASS filter if needed
                if (filterHeaderLine != null && !variation.isFiltered()) {
                    w.add(new VariantContextBuilder(variation).passFilters().make());
                    continue;
                }
                w.add(variation);
            }
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : Arrays(java.util.Arrays) Bindings(javax.script.Bindings) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Term(com.github.lindenb.semontology.Term) JsonParser(com.google.gson.JsonParser) ArrayList(java.util.ArrayList) JsonElement(com.google.gson.JsonElement) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Collection(java.util.Collection) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CompiledScript(javax.script.CompiledScript) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) VariantContext(htsjdk.variant.variantcontext.VariantContext) FileReader(java.io.FileReader) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContext(htsjdk.variant.variantcontext.VariantContext) Bindings(javax.script.Bindings) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) JsonElement(com.google.gson.JsonElement) Collection(java.util.Collection) FileReader(java.io.FileReader) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) JsonParser(com.google.gson.JsonParser)

Aggregations

VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)55 VariantContext (htsjdk.variant.variantcontext.VariantContext)39 VCFHeader (htsjdk.variant.vcf.VCFHeader)35 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)30 ArrayList (java.util.ArrayList)28 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)26 IOException (java.io.IOException)24 File (java.io.File)22 HashSet (java.util.HashSet)19 List (java.util.List)19 Genotype (htsjdk.variant.variantcontext.Genotype)18 Parameter (com.beust.jcommander.Parameter)17 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)17 Program (com.github.lindenb.jvarkit.util.jcommander.Program)17 Logger (com.github.lindenb.jvarkit.util.log.Logger)17 Set (java.util.Set)17 Allele (htsjdk.variant.variantcontext.Allele)16 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)15 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)14