Search in sources :

Example 6 with CharSplitter

use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.

the class KnownGene method loadUriAsIntervalTreeMap.

/**
 * load knownGene file/uri as an IntervalTreeMap. Intervals in the IntervalTreeMap are *1-based* (interval.start= kg.txStart+1)
 */
public static IntervalTreeMap<List<KnownGene>> loadUriAsIntervalTreeMap(final String uri, final Predicate<KnownGene> filterOrNull) throws IOException {
    final IntervalTreeMap<List<KnownGene>> treeMap = new IntervalTreeMap<>();
    BufferedReader in = null;
    try {
        in = IOUtils.openURIForBufferedReading(uri);
        String line;
        final CharSplitter tab = CharSplitter.TAB;
        while ((line = in.readLine()) != null) {
            if (line.isEmpty())
                continue;
            final String[] tokens = tab.split(line);
            final KnownGene g = new KnownGene(tokens);
            if (filterOrNull != null && !filterOrNull.test(g))
                continue;
            final Interval interval = new Interval(g.getContig(), g.getTxStart() + 1, g.getTxEnd(), g.isNegativeStrand(), g.getName());
            List<KnownGene> L = treeMap.get(interval);
            if (L == null) {
                L = new ArrayList<>(2);
                treeMap.put(interval, L);
            }
            L.add(g);
        }
        in.close();
        in = null;
        return treeMap;
    } finally {
        CloserUtil.close(in);
    }
}
Also used : CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) List(java.util.List) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Interval(htsjdk.samtools.util.Interval)

Example 7 with CharSplitter

use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.

the class PedigreeParser method parse.

/**
 * parse pedigree file
 */
public Pedigree parse(final BufferedReader br) throws IOException {
    final CharSplitter tab = CharSplitter.TAB;
    final PedigreeImpl ped = new PedigreeImpl();
    String line;
    while ((line = br.readLine()) != null) {
        if (StringUtils.isBlank(line))
            continue;
        if (line.startsWith("#"))
            continue;
        final String[] tokens = tab.split(line);
        if (tokens.length < 4)
            throw new IllegalArgumentException("not enough tokens for pedigree in " + line);
        final String famId = tokens[0];
        final String indiId = tokens[1];
        final String fatherId = tokens[2];
        final String motherId = tokens[3];
        final String sex = (tokens.length > 4 ? tokens[4] : "");
        final String status = (tokens.length > 5 ? tokens[5] : "");
        build(ped, famId, indiId, fatherId, motherId, sex, status);
    }
    ped.validate();
    return ped;
}
Also used : CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter)

Example 8 with CharSplitter

use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.

the class VcfBiomart method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator iter, final VariantContextWriter out) {
    HttpGet httpGet = null;
    final CharSplitter tab = CharSplitter.TAB;
    try {
        final TransformerFactory factory = TransformerFactory.newInstance();
        final Transformer transformer = factory.newTransformer();
        // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        final VCFHeader header = iter.getHeader();
        StringBuilder desc = new StringBuilder("Biomart query. Format: ");
        desc.append(this.attributes.stream().map(S -> this.printLabels ? S + "|" + S : S).collect(Collectors.joining("|")));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header.addMetaDataLine(new VCFInfoHeaderLine(this.TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, desc.toString()));
        out.writeHeader(header);
        while (iter.hasNext()) {
            final VariantContext ctx = iter.next();
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.rmAttribute(this.TAG);
            this.filterColumnContig.set(ctx.getContig());
            this.filterColumnStart.set(String.valueOf(ctx.getStart()));
            this.filterColumnEnd.set(String.valueOf(ctx.getEnd()));
            final StringWriter domToStr = new StringWriter();
            transformer.transform(new DOMSource(this.domQuery), new StreamResult(domToStr));
            final URIBuilder builder = new URIBuilder(this.serviceUrl);
            builder.addParameter("query", domToStr.toString());
            // System.err.println("\nwget -O - 'http://grch37.ensembl.org/biomart/martservice?query="+escapedQuery+"'\n");
            // escapedQuery = URLEncoder.encode(escapedQuery,"UTF-8");
            httpGet = new HttpGet(builder.build());
            final CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
            int responseCode = httpResponse.getStatusLine().getStatusCode();
            if (responseCode != 200) {
                throw new RuntimeIOException("Response code was not 200. Detected response was " + responseCode);
            }
            InputStream response = httpResponse.getEntity().getContent();
            if (this.teeResponse) {
                response = new TeeInputStream(response, stderr(), false);
            }
            final BufferedReader br = new BufferedReader(new InputStreamReader(response));
            final Set<String> infoAtts = br.lines().filter(L -> !StringUtil.isBlank(L)).filter(L -> !L.equals("[success]")).map(L -> tab.split(L)).map(T -> {
                final StringBuilder sb = new StringBuilder();
                for (int i = 0; i < this.attributes.size(); i++) {
                    if (i > 0)
                        sb.append("|");
                    if (this.printLabels)
                        sb.append(escapeInfo(this.attributes.get(i))).append("|");
                    sb.append(i < T.length ? escapeInfo(T[i]) : "");
                }
                return sb.toString();
            }).collect(Collectors.toCollection(LinkedHashSet::new));
            CloserUtil.close(br);
            CloserUtil.close(response);
            CloserUtil.close(httpResponse);
            if (!infoAtts.isEmpty()) {
                vcb.attribute(this.TAG, new ArrayList<>(infoAtts));
            }
            out.add(vcb.make());
        }
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        throw new RuntimeIOException(err);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Transformer(javax.xml.transform.Transformer) DOMSource(javax.xml.transform.dom.DOMSource) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) Attr(org.w3c.dom.Attr) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Document(org.w3c.dom.Document) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) Node(org.w3c.dom.Node) LinkedHashSet(java.util.LinkedHashSet) CloserUtil(htsjdk.samtools.util.CloserUtil) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) URIBuilder(org.apache.http.client.utils.URIBuilder) StringWriter(java.io.StringWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) Element(org.w3c.dom.Element) HttpGet(org.apache.http.client.methods.HttpGet) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) BufferedReader(java.io.BufferedReader) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) TransformerFactory(javax.xml.transform.TransformerFactory) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) HttpClients(org.apache.http.impl.client.HttpClients) InputStream(java.io.InputStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DOMSource(javax.xml.transform.dom.DOMSource) Transformer(javax.xml.transform.Transformer) HttpGet(org.apache.http.client.methods.HttpGet) VariantContext(htsjdk.variant.variantcontext.VariantContext) StringWriter(java.io.StringWriter) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) VCFHeader(htsjdk.variant.vcf.VCFHeader) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) TransformerFactory(javax.xml.transform.TransformerFactory) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) StreamResult(javax.xml.transform.stream.StreamResult) InputStreamReader(java.io.InputStreamReader) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) InputStream(java.io.InputStream) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) URIBuilder(org.apache.http.client.utils.URIBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Example 9 with CharSplitter

use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.

the class VcfGnomadOld method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        if (this.gnomadBufferSize < 10) {
            LOG.error("buffer size is too small " + this.gnomadBufferSize);
            return -1;
        }
        if (this.manifestFile == null) {
            LOG.info("Building default manifest file...");
            for (final OmeType ot : OmeType.values()) {
                if (this.useGenomeOnly && !ot.equals(OmeType.genome))
                    continue;
                for (int i = 1; i <= 24; ++i) {
                    final ManifestEntry entry = new ManifestEntry();
                    entry.omeType = ot;
                    switch(i) {
                        case 23:
                            entry.contig = "X";
                            break;
                        case 24:
                            entry.contig = "Y";
                            break;
                        default:
                            entry.contig = String.valueOf(i);
                            break;
                    }
                    switch(gnomadVersion) {
                        case v2_1:
                            if (ot == OmeType.genome) {
                                // no "chrY" for this version for genome
                                if (i == 24)
                                    continue;
                                entry.uri = "https://storage.googleapis.com/gnomad-public/release/2.1/vcf/exomes/gnomad.exomes.r2.1.sites.chr" + entry.contig + ".vcf.bgz";
                            } else {
                                entry.uri = "https://storage.googleapis.com/gnomad-public/release/2.1/vcf/genomes/gnomad.genomes.r2.1.sites.chr" + entry.contig + ".vcf.bgz";
                            }
                            break;
                        default:
                            {
                                entry.close();
                                LOG.error("Building default manifest is not available for version: " + this.gnomadVersion);
                                return -1;
                            }
                    }
                    this.manifestEntries.add(entry);
                }
            }
            LOG.info("Building default manifest file... Done");
        } else {
            try {
                final CharSplitter tab = CharSplitter.TAB;
                Files.lines(this.manifestFile.toPath()).forEach(L -> {
                    if (L.startsWith("#") || StringUtil.isBlank(L))
                        return;
                    final String[] tokens = tab.split(L);
                    if (tokens.length < 3)
                        throw new JvarkitException.TokenErrors("Expected 3 words", tokens);
                    final ManifestEntry entry = new ManifestEntry();
                    entry.omeType = OmeType.valueOf(tokens[0]);
                    if (this.useGenomeOnly && !entry.omeType.equals(OmeType.genome)) {
                        entry.close();
                        return;
                    }
                    entry.contig = tokens[1].trim();
                    entry.uri = tokens[2].trim();
                    this.manifestEntries.add(entry);
                });
            } catch (final IOException err) {
                LOG.error(err);
                return -1;
            }
        }
        return doVcfToVcf(args, this.outputFile);
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException)

Example 10 with CharSplitter

use of com.github.lindenb.jvarkit.lang.CharSplitter in project jvarkit by lindenb.

the class VcfGnomadPext method findOverlapping.

/**
 * find matching variant in tabix file, use a buffer to avoid multiple random accesses
 */
final List<PextEntry> findOverlapping(final TabixReader tabix, final VariantContext ctx) throws IOException {
    final String normContig = this.ensemblCtgConvert.apply(ctx.getContig());
    if (StringUtil.isBlank(normContig))
        return Collections.emptyList();
    if (!buffer.isEmpty() && !buffer.get(0).contig.equals(normContig)) {
        this.buffer.clear();
    }
    if (this.lastInterval == null || !this.lastInterval.getContig().equals(normContig) || !CoordMath.encloses(lastInterval.getStart(), lastInterval.getEnd(), ctx.getStart(), ctx.getEnd())) {
        final CharSplitter tab = CharSplitter.TAB;
        this.buffer.clear();
        this.lastInterval = new Interval(normContig, Math.max(0, ctx.getStart() - 10), ctx.getEnd() + VcfGnomadPext.this.gnomadBufferSize);
        final TabixReader.Iterator iter = tabix.query(this.lastInterval.getContig(), this.lastInterval.getStart(), this.lastInterval.getEnd());
        for (; ; ) {
            final String line = iter.next();
            if (line == null)
                break;
            final String[] tokens = tab.split(line);
            this.buffer.add(new PextEntry(tokens));
        }
    }
    return this.buffer.stream().filter(V -> V.getStart() == ctx.getStart() && V.getEnd() == ctx.getEnd() && V.getContig().equals(normContig) && V.ref.equals(ctx.getReference()) && ctx.getAlleles().contains(V.alt)).collect(Collectors.toList());
}
Also used : JsonObject(com.google.gson.JsonObject) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) JsonParser(com.google.gson.JsonParser) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) JsonElement(com.google.gson.JsonElement) Interval(htsjdk.samtools.util.Interval) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) TabixReader(htsjdk.tribble.readers.TabixReader) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) Locatable(htsjdk.samtools.util.Locatable) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) List(java.util.List) JsonArray(com.google.gson.JsonArray) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) OrderChecker(com.github.lindenb.jvarkit.dict.OrderChecker) CoordMath(htsjdk.samtools.util.CoordMath) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) TabixReader(htsjdk.tribble.readers.TabixReader) Interval(htsjdk.samtools.util.Interval)

Aggregations

CharSplitter (com.github.lindenb.jvarkit.lang.CharSplitter)28 BufferedReader (java.io.BufferedReader)19 IOException (java.io.IOException)12 List (java.util.List)11 Parameter (com.beust.jcommander.Parameter)10 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)10 Program (com.github.lindenb.jvarkit.util.jcommander.Program)10 Logger (com.github.lindenb.jvarkit.util.log.Logger)10 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)10 Set (java.util.Set)10 Path (java.nio.file.Path)9 ArrayList (java.util.ArrayList)9 HashSet (java.util.HashSet)9 SequenceDictionaryUtils (com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils)8 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)7 CloserUtil (htsjdk.samtools.util.CloserUtil)7 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)7 VCFHeader (htsjdk.variant.vcf.VCFHeader)7 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)7 Map (java.util.Map)7