Search in sources :

Example 1 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class Biostar175929 method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("fasta reference was not defined.");
        return -1;
    }
    IndexedFastaSequenceFile reference = null;
    VcfIterator iter = null;
    try {
        reference = new IndexedFastaSequenceFile(this.faidx);
        iter = super.openVcfIterator(oneFileOrNull(args));
        this.pw = openFileOrStdoutAsPrintWriter(this.outputFile);
        final List<VariantContext> variants = new ArrayList<>();
        for (; ; ) {
            VariantContext ctx = null;
            if (iter.hasNext()) {
                ctx = iter.next();
            }
            if (ctx == null || (!variants.isEmpty() && !ctx.getContig().equals(variants.get(0).getContig()))) {
                if (!variants.isEmpty()) {
                    LOG.info("chrom:" + variants.get(0).getContig() + " N=" + variants.size());
                    final GenomicSequence genomic = new GenomicSequence(reference, variants.get(0).getContig());
                    final StringBuilder title = new StringBuilder();
                    final StringBuilder sequence = new StringBuilder();
                    recursive(genomic, variants, 0, title, sequence);
                    variants.clear();
                }
                if (ctx == null)
                    break;
            }
            variants.add(ctx);
        }
        iter.close();
        iter = null;
        this.pw.flush();
        this.pw.close();
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(reference);
        CloserUtil.close(iter);
        CloserUtil.close(pw);
    }
}
Also used : VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 2 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class KnimeVariantHelper method processVcfMulti.

/**
 * process the VCF file,
 *
 * @param vcfIn input file name
 * @param fun functional
 * @return the output file name
 * @throws IOException
 */
public String processVcfMulti(final String vcfIn, final Function<VariantContext, List<VariantContext>> fun) throws IOException {
    this.lastVariantCount = 0;
    if (vcfIn == null) {
        final String msg = "Vcf Input URI/FIle is null.";
        LOG.error(msg);
        throw new IllegalArgumentException(msg);
    }
    File outVcfFile = null;
    File outVcfIndexFile = null;
    final File STOP_FILE = new File(this.workfingDirectory, "STOP");
    if (STOP_FILE.exists()) {
        final String msg = "There is a stop file in " + STOP_FILE;
        LOG.error(msg);
        throw new IOException(msg);
    }
    boolean fail_flag = false;
    VcfIterator iter = null;
    VariantContextWriter variantContextWriter = null;
    try {
        IOUtil.assertDirectoryIsReadable(this.workfingDirectory);
        IOUtil.assertDirectoryIsWritable(this.workfingDirectory);
        if (!IOUtil.isUrl(vcfIn)) {
            IOUtil.assertFileIsReadable(new File(vcfIn));
        }
        final String extension;
        if (this.forceSuffix.equals(ForceSuffix.ForceTabix)) {
            extension = ".vcf.gz";
        } else if (this.forceSuffix.equals(ForceSuffix.ForceTribble)) {
            extension = ".vcf";
        } else if (vcfIn.endsWith(".gz")) {
            extension = ".vcf.gz";
        } else {
            extension = ".vcf";
        }
        final String filename = this.createOutputFile(vcfIn, extension);
        final String indexFilename;
        if (extension.endsWith(".gz")) {
            indexFilename = filename + Tribble.STANDARD_INDEX_EXTENSION;
        } else {
            indexFilename = filename + TabixUtils.STANDARD_INDEX_EXTENSION;
        }
        outVcfFile = new File(filename);
        outVcfIndexFile = new File(indexFilename);
        LOG.info("opening " + vcfIn);
        iter = VCFUtils.createVcfIterator(vcfIn);
        super.init(iter.getHeader());
        final VCFHeader vcfHeader2;
        if (this.getExtraVcfHeaderLines().isEmpty()) {
            vcfHeader2 = iter.getHeader();
        } else {
            vcfHeader2 = new VCFHeader(iter.getHeader());
            for (final VCFHeaderLine extra : this.getExtraVcfHeaderLines()) {
                vcfHeader2.addMetaDataLine(extra);
            }
            // clear vcf header line now they 've been added to the header.
            this.getExtraVcfHeaderLines().clear();
        }
        final SAMSequenceDictionary dict = this.getHeader().getSequenceDictionary();
        if (dict == null) {
            final String msg = "There is no dictionary (##contig lines) in " + vcfIn + " but they are required.";
            LOG.error(msg);
            throw new IllegalArgumentException(msg);
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        progress.setLogPrefix(this.filePrefix);
        LOG.info("writing " + outVcfFile + ". Emergency stop file is " + STOP_FILE);
        variantContextWriter = this.variantContextWriterBuilder.setOutputFile(outVcfFile).setReferenceDictionary(dict).build();
        long lastTick = System.currentTimeMillis();
        variantContextWriter.writeHeader(vcfHeader2);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            final List<VariantContext> array = fun.apply(ctx);
            if (array != null) {
                for (final VariantContext ctx2 : array) {
                    variantContextWriter.add(ctx2);
                    this.lastVariantCount++;
                }
            }
            // check STOP File
            final long now = System.currentTimeMillis();
            if (// 10sec
            (now - lastTick) > 10 * 1000) {
                lastTick = now;
                if (STOP_FILE.exists()) {
                    LOG.warn("STOP FILE detected " + STOP_FILE + " Aborting.");
                    fail_flag = true;
                    break;
                }
            }
        }
        progress.finish();
        iter.close();
        iter = null;
        variantContextWriter.close();
        variantContextWriter = null;
        return outVcfFile.getPath();
    } catch (final Exception err) {
        fail_flag = true;
        LOG.error(err);
        throw new IOException(err);
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(variantContextWriter);
        if (fail_flag) {
            if (outVcfFile != null && outVcfFile.exists()) {
                LOG.warn("deleting " + outVcfFile);
                outVcfFile.delete();
            }
            if (outVcfIndexFile != null && outVcfIndexFile.exists()) {
                LOG.warn("deleting " + outVcfIndexFile);
                outVcfIndexFile.delete();
            }
        }
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Example 3 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class Launcher method doVcfToVcf.

protected int doVcfToVcf(final String inputNameOrNull, final File outorNull) {
    VcfIterator iterin = null;
    VariantContextWriter w = null;
    try {
        iterin = openVcfIterator(inputNameOrNull);
        w = openVariantContextWriter(outorNull);
        int ret = doVcfToVcf(inputNameOrNull == null ? "<STDIN>" : inputNameOrNull, iterin, w);
        w.close();
        w = null;
        iterin.close();
        iterin = null;
        return ret;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iterin);
        CloserUtil.close(w);
    }
}
Also used : VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) ParameterException(com.beust.jcommander.ParameterException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException)

Example 4 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfBiomart method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator iter, final VariantContextWriter out) {
    HttpGet httpGet = null;
    final Pattern tab = Pattern.compile("[\t]");
    try {
        final TransformerFactory factory = TransformerFactory.newInstance();
        final Transformer transformer = factory.newTransformer();
        // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        final VCFHeader header = iter.getHeader();
        StringBuilder desc = new StringBuilder("Biomart query. Format: ");
        desc.append(this.attributes.stream().map(S -> this.printLabels ? S + "|" + S : S).collect(Collectors.joining("|")));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header.addMetaDataLine(new VCFInfoHeaderLine(this.TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, desc.toString()));
        out.writeHeader(header);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.rmAttribute(this.TAG);
            this.filterColumnContig.set(ctx.getContig());
            this.filterColumnStart.set(String.valueOf(ctx.getStart()));
            this.filterColumnEnd.set(String.valueOf(ctx.getEnd()));
            final StringWriter domToStr = new StringWriter();
            transformer.transform(new DOMSource(this.domQuery), new StreamResult(domToStr));
            final URIBuilder builder = new URIBuilder(this.serviceUrl);
            builder.addParameter("query", domToStr.toString());
            // System.err.println("\nwget -O - 'http://grch37.ensembl.org/biomart/martservice?query="+escapedQuery+"'\n");
            // escapedQuery = URLEncoder.encode(escapedQuery,"UTF-8");
            httpGet = new HttpGet(builder.build());
            final CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
            int responseCode = httpResponse.getStatusLine().getStatusCode();
            if (responseCode != 200) {
                throw new RuntimeIOException("Response code was not 200. Detected response was " + responseCode);
            }
            InputStream response = httpResponse.getEntity().getContent();
            if (this.teeResponse) {
                response = new TeeInputStream(response, stderr(), false);
            }
            final BufferedReader br = new BufferedReader(new InputStreamReader(response));
            final Set<String> infoAtts = br.lines().filter(L -> !StringUtil.isBlank(L)).filter(L -> !L.equals("[success]")).map(L -> tab.split(L)).map(T -> {
                final StringBuilder sb = new StringBuilder();
                for (int i = 0; i < this.attributes.size(); i++) {
                    if (i > 0)
                        sb.append("|");
                    if (this.printLabels)
                        sb.append(escapeInfo(this.attributes.get(i))).append("|");
                    sb.append(i < T.length ? escapeInfo(T[i]) : "");
                }
                return sb.toString();
            }).collect(Collectors.toCollection(LinkedHashSet::new));
            CloserUtil.close(br);
            CloserUtil.close(response);
            CloserUtil.close(httpResponse);
            if (!infoAtts.isEmpty()) {
                vcb.attribute(this.TAG, new ArrayList<>(infoAtts));
            }
            out.add(vcb.make());
        }
        progress.finish();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        throw new RuntimeIOException(err);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Transformer(javax.xml.transform.Transformer) DOMSource(javax.xml.transform.dom.DOMSource) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Attr(org.w3c.dom.Attr) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Document(org.w3c.dom.Document) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) Node(org.w3c.dom.Node) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) LinkedHashSet(java.util.LinkedHashSet) CloserUtil(htsjdk.samtools.util.CloserUtil) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) URIBuilder(org.apache.http.client.utils.URIBuilder) StringWriter(java.io.StringWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) Element(org.w3c.dom.Element) HttpGet(org.apache.http.client.methods.HttpGet) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) Pattern(java.util.regex.Pattern) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) TransformerFactory(javax.xml.transform.TransformerFactory) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) HttpClients(org.apache.http.impl.client.HttpClients) InputStream(java.io.InputStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DOMSource(javax.xml.transform.dom.DOMSource) Transformer(javax.xml.transform.Transformer) HttpGet(org.apache.http.client.methods.HttpGet) VariantContext(htsjdk.variant.variantcontext.VariantContext) StringWriter(java.io.StringWriter) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) VCFHeader(htsjdk.variant.vcf.VCFHeader) Pattern(java.util.regex.Pattern) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) TransformerFactory(javax.xml.transform.TransformerFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) StreamResult(javax.xml.transform.stream.StreamResult) InputStreamReader(java.io.InputStreamReader) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) InputStream(java.io.InputStream) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) URIBuilder(org.apache.http.client.utils.URIBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Example 5 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfEnsemblReg method annotate.

private void annotate(Track track, File inf, File outf) throws IOException {
    boolean contained = false;
    LOG.info("Processing " + track.id + " (" + track.shortLabel + ") " + track.url);
    VcfIterator in = VCFUtils.createVcfIteratorFromFile(inf);
    VCFHeader header = in.getHeader();
    VCFInfoHeaderLine info = null;
    SeekableStream sstream = SeekableStreamFactory.getInstance().getStreamFor(track.url);
    BBFileReader bigFile = new BBFileReader(track.url.toString(), new SeekableStreamAdaptor(sstream));
    VariantContextWriter w1 = VCFUtils.createVariantContextWriter(outf);
    if (bigFile.isBigWigFile()) {
        info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.Float, String.valueOf(track.longLabel) + " " + track.url);
    } else {
        info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.String, String.valueOf(track.longLabel) + " " + track.url);
    }
    header.addMetaDataLine(info);
    w1.writeHeader(in.getHeader());
    while (in.hasNext()) {
        VariantContext ctx = in.next();
        String chrom = ctx.getContig();
        if (!chrom.startsWith("chr"))
            chrom = "chr" + chrom;
        if (!chrom.matches("(chrX|chrY|chr[0-9]|chr1[0-9]|chr2[12])")) {
            w1.add(ctx);
        } else if (bigFile.isBigWigFile()) {
            BigWigIterator iter = bigFile.getBigWigIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
            Float wigValue = null;
            while (iter != null && iter.hasNext() && wigValue == null) {
                WigItem item = iter.next();
                wigValue = item.getWigValue();
            }
            if (wigValue == null) {
                w1.add(ctx);
                continue;
            }
            VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(track.id, wigValue);
            w1.add(vcb.make());
        } else {
            BigBedIterator iter = bigFile.getBigBedIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
            Set<String> bedValues = new HashSet<String>();
            while (iter != null && iter.hasNext()) {
                BedFeature item = iter.next();
                String[] rest = item.getRestOfFields();
                if (rest == null || rest.length != 6) {
                    System.err.println(track.id + " " + Arrays.toString(item.getRestOfFields()));
                    continue;
                }
                String color = null;
                if (track.parent != null) {
                    if (track.parent.startsWith("Segway_17SegmentationSummaries")) {
                        color = segway_17SegmentationSummaries(rest[5]);
                    } else if (track.parent.startsWith("ProjectedSegments")) {
                        color = projectedSegments(rest[5]);
                    } else if (track.parent.startsWith("RegBuildOverview")) {
                        color = regBuildOverview(rest[5]);
                    } else if (track.parent.startsWith("Segway_17CellSegments")) {
                        color = segway_17CellSegments(rest[5]);
                    } else {
                        System.err.println("Unknown parent:" + track.parent);
                    }
                }
                if (color == null)
                    continue;
                bedValues.add(rest[0] + "|" + color);
            }
            if (bedValues.isEmpty()) {
                w1.add(ctx);
                continue;
            }
            StringBuilder sb = new StringBuilder();
            for (String s : bedValues) {
                if (sb.length() != 0)
                    sb.append(",");
                sb.append(s);
            }
            VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(track.id, sb.toString());
            w1.add(vcb.make());
        }
    }
    sstream.close();
    in.close();
    w1.close();
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) SeekableStream(htsjdk.samtools.seekablestream.SeekableStream) VariantContext(htsjdk.variant.variantcontext.VariantContext) BigBedIterator(org.broad.igv.bbfile.BigBedIterator) BedFeature(org.broad.igv.bbfile.BedFeature) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) WigItem(org.broad.igv.bbfile.WigItem) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) SeekableStreamAdaptor(com.github.lindenb.jvarkit.util.igv.SeekableStreamAdaptor) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BBFileReader(org.broad.igv.bbfile.BBFileReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) BigWigIterator(org.broad.igv.bbfile.BigWigIterator)

Aggregations

VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)55 VariantContext (htsjdk.variant.variantcontext.VariantContext)39 VCFHeader (htsjdk.variant.vcf.VCFHeader)35 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)30 ArrayList (java.util.ArrayList)28 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)26 IOException (java.io.IOException)24 File (java.io.File)22 HashSet (java.util.HashSet)19 List (java.util.List)19 Genotype (htsjdk.variant.variantcontext.Genotype)18 Parameter (com.beust.jcommander.Parameter)17 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)17 Program (com.github.lindenb.jvarkit.util.jcommander.Program)17 Logger (com.github.lindenb.jvarkit.util.log.Logger)17 Set (java.util.Set)17 Allele (htsjdk.variant.variantcontext.Allele)16 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)15 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)14